diff --git a/.travis.yml b/.travis.yml index 162bebba091d84b295f929527de9804e65df5a65..5a7f45a748ac7e81f3f90c245bcf2cd84c4e9027 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,17 +8,10 @@ sudo: required dist: trusty os: - linux - - osx env: - JOB=DOCS - JOB=BUILD_AND_TEST - JOB=PRE_COMMIT -matrix: - exclude: - - os: osx - env: JOB=DOCS # Only generate documentation in linux. - - os: osx - env: JOB=PRE_COMMIT # Only check pre-commit hook in linux addons: apt: @@ -52,9 +45,10 @@ before_install: fi fi fi - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi - - pip install numpy wheel protobuf sphinx recommonmark sphinx_rtd_theme virtualenv pre-commit requests==2.9.2 LinkChecker + # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python + # protobuf version. + - pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker script: - paddle/scripts/travis/main.sh notifications: diff --git a/cmake/FindSphinx.cmake b/cmake/FindSphinx.cmake index d319442ef10b38b9edf5844e5540a92c7094c7ce..1c29cb22a31f1e41a6b5575837c6374175cfdea5 100644 --- a/cmake/FindSphinx.cmake +++ b/cmake/FindSphinx.cmake @@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination ) ${source} ${destination} COMMENT "Generating sphinx documentation: ${builder}" - COMMAND ln -sf ${destination}/index_*.html ${destination}/index.html + COMMAND cd ${destination} && ln -s ./index_*.html index.html ) set_property( diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 26306f9849100d4463dde267acae5392cc81d7ac..235c95f017f2b6ef24195a0210ccafff36b6ed61 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -16,7 +16,8 @@ set(CBLAS_FOUND OFF) ## Find MKL First. -set(MKL_ROOT $ENV{MKLROOT} CACHE PATH "Folder contains MKL") +set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs") +set(MKL_ROOT ${INTEL_ROOT}/mkl CACHE PATH "Folder contains MKL") find_path(MKL_INCLUDE_DIR mkl.h PATHS ${MKL_ROOT}/include) diff --git a/cmake/coverallsGcovJsons.cmake b/cmake/coverallsGcovJsons.cmake index ae3530c3a0eeb79ddbcbf4f2e99be75aa7968a2f..ad9a10cb8616159b9e3aff445e698cb2edb92820 100644 --- a/cmake/coverallsGcovJsons.cmake +++ b/cmake/coverallsGcovJsons.cmake @@ -110,14 +110,13 @@ endmacro() # Get the coverage data. file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda") -message("GCDA files:") +message("Process GCDA files:") +message("===============================") # Get a list of all the object directories needed by gcov # (The directories the .gcda files and .o files are found in) # and run gcov on those. foreach(GCDA ${GCDA_FILES}) - message("Process: ${GCDA}") - message("------------------------------------------------------------------------------") get_filename_component(GCDA_DIR ${GCDA} PATH) # @@ -135,7 +134,7 @@ foreach(GCDA ${GCDA_FILES}) # If -p is not specified then the file is named only "the_file.c.gcov" # execute_process( - COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} + COMMAND "${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null" WORKING_DIRECTORY ${GCDA_DIR} ) endforeach() @@ -383,7 +382,6 @@ foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING}) set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]") # Generate the final JSON for this file. - message("Generate JSON for non-gcov file: ${NOT_COVERED_SRC}...") string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON) set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ") endforeach() diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake index 71e20c85276b014c2e33735c3199c3772526c6c7..ab105611c812a4f4b642ac5b1213fdfe93fab97d 100644 --- a/cmake/external/glog.cmake +++ b/cmake/external/glog.cmake @@ -1,11 +1,11 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,12 +29,14 @@ INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR}) ExternalProject_Add( glog ${EXTERNAL_PROJECT_LOG_ARGS} + DEPENDS gflags GIT_REPOSITORY "https://github.com/google/glog.git" PREFIX ${GLOG_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR} CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON - CMAKE_ARGS -DWITH_GFLAGS=OFF + CMAKE_ARGS -DWITH_GFLAGS=ON + CMAKE_ARGS -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags CMAKE_ARGS -DBUILD_TESTING=OFF ) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 613614c0e3d42fac4147f78edbc1bd6d62847419..26da7e8e384bafdcbcd1a358c39cc6eb167b067e 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -14,50 +14,50 @@ INCLUDE(ExternalProject) -SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf) -SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf) -SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE) +FIND_PACKAGE(Protobuf) -INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) +IF(NOT PROTOBUF_FOUND) + SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf) + SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf) + SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE) + + IF(WIN32) + SET(PROTOBUF_LITE_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE) + SET(PROTOBUF_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE) + SET(PROTOBUF_PROTOC_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE) + SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE) + ELSE(WIN32) + SET(PROTOBUF_LITE_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE) + SET(PROTOBUF_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE) + SET(PROTOBUF_PROTOC_LIBRARY + "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE) + SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE) + ENDIF(WIN32) -IF(WIN32) - SET(PROTOBUF_LITE_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE) - SET(PROTOBUF_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE) - SET(PROTOBUF_PROTOC_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE) - SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE) -ELSE(WIN32) - IF(${HOST_SYSTEM} STREQUAL "centos") - SET(LIB "lib64") - ELSE() - SET(LIB "lib") - ENDIF() - SET(PROTOBUF_LITE_LIBRARY - "${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE) - SET(PROTOBUF_LIBRARY - "${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE) - SET(PROTOBUF_PROTOC_LIBRARY - "${PROTOBUF_INSTALL_DIR}/${LIB}/libprotoc.a" CACHE FILEPATH "protoc library." FORCE) - SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE) -ENDIF(WIN32) + ExternalProject_Add( + protobuf + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${PROTOBUF_SOURCES_DIR} + UPDATE_COMMAND "" + DEPENDS zlib + GIT_REPOSITORY "https://github.com/google/protobuf.git" + GIT_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546" + CONFIGURE_COMMAND + ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake + -Dprotobuf_BUILD_TESTS=OFF + -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=lib + ) -ExternalProject_Add( - protobuf - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${PROTOBUF_SOURCES_DIR} - UPDATE_COMMAND "" - DEPENDS zlib - GIT_REPOSITORY "https://github.com/google/protobuf.git" - GIT_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546" - CONFIGURE_COMMAND - ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake - -Dprotobuf_BUILD_TESTS=OFF - -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT} - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=Release - -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} -) + LIST(APPEND external_project_dependencies protobuf) +ENDIF(NOT PROTOBUF_FOUND) -LIST(APPEND external_project_dependencies protobuf) +INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake index 209e679f2cb2178423bf20dec73a0bccef199fcb..0accf1a8dd83560324716f0f4936be56dd7a9f1b 100644 --- a/cmake/external/python.cmake +++ b/cmake/external/python.cmake @@ -26,10 +26,10 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) find_python_module(wheel REQUIRED) find_python_module(google.protobuf REQUIRED) FIND_PACKAGE(NumPy REQUIRED) - IF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") + IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, " - "please use pip to upgrade protobuf.") - ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") + "please use pip to upgrade protobuf. pip install -U protobuf") + ENDIF() ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.") ##################################### PYTHON ######################################## @@ -221,7 +221,3 @@ ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR}) - -MESSAGE("[Paddle] Python Executable: ${PYTHON_EXECUTABLE}") -MESSAGE("[Paddle] Python Include: ${PYTHON_INCLUDE_DIRS}") -MESSAGE("[Paddle] Python Libraries: ${PYTHON_LIBRARIES}") diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index f5e4b3e1eb39acbe8dbcd0023956ca7e52c1ecd8..172c318b35d611d0432b78f2a18eb58a7d272b90 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -54,6 +54,7 @@ ExternalProject_Add( CMAKE_ARGS -DWITH_GPU=${WITH_GPU} CMAKE_ARGS -DWITH_OMP=${USE_OMP} CMAKE_ARGS -DWITH_TORCH=OFF + CMAKE_ARGS -DCMAKE_DISABLE_FIND_PACKAGE_Torch=TRUE CMAKE_ARGS -DBUILD_SHARED=ON ) diff --git a/cmake/system.cmake b/cmake/system.cmake index ab124a89dcdc1cc5b865f6a15c1693b2f72ea39a..3e472da7e0bd9c433f92f3e8b52970cd2cc6dcba 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -12,6 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Detects the OS and sets appropriate variables. +# CMAKE_SYSTEM_NAME only give us a coarse-grained name, +# but the name like centos is necessary in some scenes +# to distinguish system for customization. +# +# for instance, protobuf libs path is /lib64 +# on CentOS, but /lib on other systems. + IF(WIN32) SET(HOST_SYSTEM "win32") ELSE(WIN32) @@ -30,6 +38,10 @@ ELSE(WIN32) SET(HOST_SYSTEM "debian") ELSEIF(LINUX_ISSUE MATCHES "Ubuntu") SET(HOST_SYSTEM "ubuntu") + ELSEIF(LINUX_ISSUE MATCHES "Red Hat") + SET(HOST_SYSTEM "redhat") + ELSEIF(LINUX_ISSUE MATCHES "Fedora") + SET(HOST_SYSTEM "fedora") ENDIF() ENDIF(EXISTS "/etc/issue") @@ -40,6 +52,10 @@ ELSE(WIN32) ENDIF() ENDIF(EXISTS "/etc/redhat-release") + IF(NOT HOST_SYSTEM) + SET(HOST_SYSTEM ${CMAKE_SYSTEM_NAME}) + ENDIF() + ENDIF(APPLE) ENDIF(WIN32) diff --git a/demo/image_classification/api_v2_resnet.py b/demo/image_classification/api_v2_resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..19d20540780becf504973a23b50445d4b65dc2ef --- /dev/null +++ b/demo/image_classification/api_v2_resnet.py @@ -0,0 +1,74 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.v2 as paddle + +__all__ = ['resnet_cifar10'] + + +def conv_bn_layer(input, + ch_out, + filter_size, + stride, + padding, + active_type=paddle.activation.Relu(), + ch_in=None): + tmp = paddle.layer.img_conv( + input=input, + filter_size=filter_size, + num_channels=ch_in, + num_filters=ch_out, + stride=stride, + padding=padding, + act=paddle.activation.Linear(), + bias_attr=False) + return paddle.layer.batch_norm(input=tmp, act=active_type) + + +def shortcut(ipt, n_in, n_out, stride): + if n_in != n_out: + return conv_bn_layer(ipt, n_out, 1, stride, 0, + paddle.activation.Linear()) + else: + return ipt + + +def basicblock(ipt, ch_out, stride): + ch_in = ch_out * 2 + tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1) + tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear()) + short = shortcut(ipt, ch_in, ch_out, stride) + return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu()) + + +def layer_warp(block_func, ipt, features, count, stride): + tmp = block_func(ipt, features, stride) + for i in range(1, count): + tmp = block_func(tmp, features, 1) + return tmp + + +def resnet_cifar10(ipt, depth=32): + # depth should be one of 20, 32, 44, 56, 110, 1202 + assert (depth - 2) % 6 == 0 + n = (depth - 2) / 6 + nStages = {16, 64, 128} + conv1 = conv_bn_layer( + ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1) + res1 = layer_warp(basicblock, conv1, 16, n, 1) + res2 = layer_warp(basicblock, res1, 32, n, 2) + res3 = layer_warp(basicblock, res2, 64, n, 2) + pool = paddle.layer.img_pool( + input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg()) + return pool diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py new file mode 100644 index 0000000000000000000000000000000000000000..e0fc0e04bbd21f691caa1ce3fb95c8a7065d1b3f --- /dev/null +++ b/demo/image_classification/api_v2_train.py @@ -0,0 +1,91 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License + +import sys +import paddle.v2 as paddle +from api_v2_vgg import vgg_bn_drop +from api_v2_resnet import resnet_cifar10 + + +def main(): + datadim = 3 * 32 * 32 + classdim = 10 + + # PaddlePaddle init + paddle.init(use_gpu=True, trainer_count=1) + + image = paddle.layer.data( + name="image", type=paddle.data_type.dense_vector(datadim)) + + # Add neural network config + # option 1. resnet + net = resnet_cifar10(image, depth=32) + # option 2. vgg + # net = vgg_bn_drop(image) + + out = paddle.layer.fc(input=net, + size=classdim, + act=paddle.activation.Softmax()) + + lbl = paddle.layer.data( + name="label", type=paddle.data_type.integer_value(classdim)) + cost = paddle.layer.classification_cost(input=out, label=lbl) + + # Create parameters + parameters = paddle.parameters.create(cost) + + # Create optimizer + momentum_optimizer = paddle.optimizer.Momentum( + momentum=0.9, + regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128), + learning_rate=0.1 / 128.0, + learning_rate_decay_a=0.1, + learning_rate_decay_b=50000 * 100, + learning_rate_schedule='discexp', + batch_size=128) + + # End batch and end pass event handler + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + print "\nPass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + else: + sys.stdout.write('.') + sys.stdout.flush() + if isinstance(event, paddle.event.EndPass): + result = trainer.test( + reader=paddle.batch( + paddle.dataset.cifar.test10(), batch_size=128), + reader_dict={'image': 0, + 'label': 1}) + print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) + + # Create trainer + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=momentum_optimizer) + trainer.train( + reader=paddle.batch( + paddle.reader.shuffle( + paddle.dataset.cifar.train10(), buf_size=50000), + batch_size=128), + num_passes=5, + event_handler=event_handler, + reader_dict={'image': 0, + 'label': 1}) + + +if __name__ == '__main__': + main() diff --git a/demo/image_classification/api_v2_vgg.py b/demo/image_classification/api_v2_vgg.py new file mode 100644 index 0000000000000000000000000000000000000000..1e0e6b93adde30425f17aa9cd07542275f4fec37 --- /dev/null +++ b/demo/image_classification/api_v2_vgg.py @@ -0,0 +1,47 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.v2 as paddle + +__all__ = ['vgg_bn_drop'] + + +def vgg_bn_drop(input): + def conv_block(ipt, num_filter, groups, dropouts, num_channels=None): + return paddle.networks.img_conv_group( + input=ipt, + num_channels=num_channels, + pool_size=2, + pool_stride=2, + conv_num_filter=[num_filter] * groups, + conv_filter_size=3, + conv_act=paddle.activation.Relu(), + conv_with_batchnorm=True, + conv_batchnorm_drop_rate=dropouts, + pool_type=paddle.pooling.Max()) + + conv1 = conv_block(input, 64, 2, [0.3, 0], 3) + conv2 = conv_block(conv1, 128, 2, [0.4, 0]) + conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0]) + conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) + conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) + + drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5) + fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear()) + bn = paddle.layer.batch_norm( + input=fc1, + act=paddle.activation.Relu(), + layer_attr=paddle.attr.Extra(drop_rate=0.5)) + fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear()) + return fc2 diff --git a/demo/image_classification/prediction.py b/demo/image_classification/prediction.py index 9a86aafcb2fa4d4354d1dd9443c1b73ddcda980b..49c0ff600c40e0222093ff0a8a2f7e8e38ccba29 100755 --- a/demo/image_classification/prediction.py +++ b/demo/image_classification/prediction.py @@ -126,7 +126,7 @@ class ImageClassifier(): # For oversampling, average predictions across crops. # If not, the shape of output[name]: (1, class_number), # the mean is also applicable. - return output[output_layer].mean(0) + return output[output_layer]['value'].mean(0) def predict(self, image=None, output_layer=None): assert isinstance(image, basestring) diff --git a/demo/introduction/api_train_v2.py b/demo/introduction/api_train_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..75dd65f9fc8cd8e7fab5bf30a6337574a645e89f --- /dev/null +++ b/demo/introduction/api_train_v2.py @@ -0,0 +1,58 @@ +import paddle.v2 as paddle +import paddle.v2.dataset.uci_housing as uci_housing + + +def main(): + # init + paddle.init(use_gpu=False, trainer_count=1) + + # network config + x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) + y_predict = paddle.layer.fc(input=x, + param_attr=paddle.attr.Param(name='w'), + size=1, + act=paddle.activation.Linear(), + bias_attr=paddle.attr.Param(name='b')) + y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) + cost = paddle.layer.regression_cost(input=y_predict, label=y) + + # create parameters + parameters = paddle.parameters.create(cost) + + # create optimizer + optimizer = paddle.optimizer.Momentum(momentum=0) + + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer) + + # event_handler to print training and testing info + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + print "Pass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + + if isinstance(event, paddle.event.EndPass): + result = trainer.test( + reader=paddle.reader.batched( + uci_housing.test(), batch_size=2), + reader_dict={'x': 0, + 'y': 1}) + if event.pass_id % 10 == 0: + print "Test %d, %s" % (event.pass_id, result.metrics) + + # training + trainer.train( + reader=paddle.reader.batched( + paddle.reader.shuffle( + uci_housing.train(), buf_size=500), + batch_size=2), + reader_dict={'x': 0, + 'y': 1}, + event_handler=event_handler, + num_passes=30) + + +if __name__ == '__main__': + main() diff --git a/demo/mnist/.gitignore b/demo/mnist/.gitignore index 8bd9837523ccf98e6e72d5b82934b7b104816217..7e61d5e3a0cabd46d4185454d46610ac2ee2e63f 100644 --- a/demo/mnist/.gitignore +++ b/demo/mnist/.gitignore @@ -5,3 +5,6 @@ plot.png train.log *pyc .ipynb_checkpoints +params.pkl +params.tar +params.tar.gz diff --git a/demo/mnist/api_train.py b/demo/mnist/api_train.py index f301da382ff8a5bc16d9c18b956f78566ed4894f..ea1caa7dd9653a2cc2860ace736fe3d25a3767e0 100644 --- a/demo/mnist/api_train.py +++ b/demo/mnist/api_train.py @@ -6,33 +6,15 @@ passed to C++ side of Paddle. The user api could be simpler and carefully designed. """ -import py_paddle.swig_paddle as api -from py_paddle import DataProviderConverter -import paddle.trainer.PyDataProvider2 as dp -import numpy as np import random -from mnist_util import read_from_mnist -from paddle.trainer_config_helpers import * - - -def optimizer_config(): - settings( - learning_rate=1e-4, - learning_method=AdamOptimizer(), - batch_size=1000, - model_average=ModelAverage(average_window=0.5), - regularization=L2Regularization(rate=0.5)) +import numpy as np +import paddle.v2 as paddle_v2 +import py_paddle.swig_paddle as api +from paddle.trainer_config_helpers import * +from py_paddle import DataProviderConverter -def network_config(): - imgs = data_layer(name='pixel', size=784) - hidden1 = fc_layer(input=imgs, size=200) - hidden2 = fc_layer(input=hidden1, size=200) - inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation()) - cost = classification_cost( - input=inference, label=data_layer( - name='label', size=10)) - outputs(cost) +from mnist_util import read_from_mnist def init_parameter(network): @@ -75,19 +57,35 @@ def input_order_converter(generator): def main(): api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores - # get enable_types for each optimizer. - # enable_types = [value, gradient, momentum, etc] - # For each optimizer(SGD, Adam), GradientMachine should enable different - # buffers. - opt_config_proto = parse_optimizer_config(optimizer_config) - opt_config = api.OptimizationConfig.createFromProto(opt_config_proto) - _temp_optimizer_ = api.ParameterOptimizer.create(opt_config) - enable_types = _temp_optimizer_.getParameterTypes() + optimizer = paddle_v2.optimizer.Adam( + learning_rate=1e-4, + batch_size=1000, + model_average=ModelAverage(average_window=0.5), + regularization=L2Regularization(rate=0.5)) + + # Create Local Updater. Local means not run in cluster. + # For a cluster training, here we can change to createRemoteUpdater + # in future. + updater = optimizer.create_local_updater() + assert isinstance(updater, api.ParameterUpdater) + + # define network + images = paddle_v2.layer.data( + name='pixel', type=paddle_v2.data_type.dense_vector(784)) + label = paddle_v2.layer.data( + name='label', type=paddle_v2.data_type.integer_value(10)) + hidden1 = paddle_v2.layer.fc(input=images, size=200) + hidden2 = paddle_v2.layer.fc(input=hidden1, size=200) + inference = paddle_v2.layer.fc(input=hidden2, + size=10, + act=paddle_v2.activation.Softmax()) + cost = paddle_v2.layer.classification_cost(input=inference, label=label) # Create Simple Gradient Machine. - model_config = parse_network_config(network_config) - m = api.GradientMachine.createFromConfigProto( - model_config, api.CREATE_MODE_NORMAL, enable_types) + model_config = paddle_v2.layer.parse_network(cost) + m = api.GradientMachine.createFromConfigProto(model_config, + api.CREATE_MODE_NORMAL, + optimizer.enable_types()) # This type check is not useful. Only enable type hint in IDE. # Such as PyCharm @@ -96,19 +94,12 @@ def main(): # Initialize Parameter by numpy. init_parameter(network=m) - # Create Local Updater. Local means not run in cluster. - # For a cluster training, here we can change to createRemoteUpdater - # in future. - updater = api.ParameterUpdater.createLocalUpdater(opt_config) - assert isinstance(updater, api.ParameterUpdater) - # Initialize ParameterUpdater. updater.init(m) # DataProvider Converter is a utility convert Python Object to Paddle C++ # Input. The input format is as same as Paddle's DataProvider. - converter = DataProviderConverter( - input_types=[dp.dense_vector(784), dp.integer_value(10)]) + converter = DataProviderConverter(input_types=[images.type, label.type]) train_file = './data/raw_data/train' test_file = './data/raw_data/t10k' diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..072b2a08da6db1f6ae7b84ee66dbc88aef487deb --- /dev/null +++ b/demo/mnist/api_train_v2.py @@ -0,0 +1,141 @@ +import paddle.v2 as paddle +import gzip + + +def softmax_regression(img): + predict = paddle.layer.fc(input=img, + size=10, + act=paddle.activation.Softmax()) + return predict + + +def multilayer_perceptron(img): + # The first fully-connected layer + hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu()) + # The second fully-connected layer and the according activation function + hidden2 = paddle.layer.fc(input=hidden1, + size=64, + act=paddle.activation.Relu()) + # The thrid fully-connected layer, note that the hidden size should be 10, + # which is the number of unique digits + predict = paddle.layer.fc(input=hidden2, + size=10, + act=paddle.activation.Softmax()) + return predict + + +def convolutional_neural_network(img): + # first conv layer + conv_pool_1 = paddle.networks.simple_img_conv_pool( + input=img, + filter_size=5, + num_filters=20, + num_channel=1, + pool_size=2, + pool_stride=2, + act=paddle.activation.Tanh()) + # second conv layer + conv_pool_2 = paddle.networks.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=50, + num_channel=20, + pool_size=2, + pool_stride=2, + act=paddle.activation.Tanh()) + # The first fully-connected layer + fc1 = paddle.layer.fc(input=conv_pool_2, + size=128, + act=paddle.activation.Tanh()) + # The softmax layer, note that the hidden size should be 10, + # which is the number of unique digits + predict = paddle.layer.fc(input=fc1, + size=10, + act=paddle.activation.Softmax()) + return predict + + +def main(): + paddle.init(use_gpu=False, trainer_count=1) + + # define network topology + images = paddle.layer.data( + name='pixel', type=paddle.data_type.dense_vector(784)) + label = paddle.layer.data( + name='label', type=paddle.data_type.integer_value(10)) + + # Here we can build the prediction network in different ways. Please + # choose one by uncomment corresponding line. + predict = softmax_regression(images) + #predict = multilayer_perceptron(images) + #predict = convolutional_neural_network(images) + + cost = paddle.layer.classification_cost(input=predict, label=label) + + try: + with gzip.open('params.tar.gz', 'r') as f: + parameters = paddle.parameters.Parameters.from_tar(f) + except IOError: + parameters = paddle.parameters.create(cost) + + optimizer = paddle.optimizer.Momentum( + learning_rate=0.1 / 128.0, + momentum=0.9, + regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128)) + + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer) + + lists = [] + + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 1000 == 0: + result = trainer.test(reader=paddle.reader.batched( + paddle.dataset.mnist.test(), batch_size=256)) + + print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics, + result.metrics) + + with gzip.open('params.tar.gz', 'w') as f: + parameters.to_tar(f) + + elif isinstance(event, paddle.event.EndPass): + result = trainer.test(reader=paddle.reader.batched( + paddle.dataset.mnist.test(), batch_size=128)) + print "Test with Pass %d, Cost %f, %s\n" % ( + event.pass_id, result.cost, result.metrics) + lists.append((event.pass_id, result.cost, + result.metrics['classification_error_evaluator'])) + + trainer.train( + reader=paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=128), + event_handler=event_handler, + num_passes=100) + + # find the best pass + best = sorted(lists, key=lambda list: float(list[1]))[0] + print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1]) + print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100) + + # output is a softmax layer. It returns probabilities. + # Shape should be (100, 10) + probs = paddle.infer( + output=predict, + parameters=parameters, + reader=paddle.batch( + paddle.reader.firstn( + paddle.reader.map_readers(lambda item: (item[0], ), + paddle.dataset.mnist.test()), + n=100), + batch_size=32)) + print probs.shape + + +if __name__ == '__main__': + main() diff --git a/demo/model_zoo/resnet/classify.py b/demo/model_zoo/resnet/classify.py index 4631816c43ef48839df1863a0a86c3ab00924d3f..6074cc1d3a85e13e3e8d336d81e22104f9d8e7cf 100755 --- a/demo/model_zoo/resnet/classify.py +++ b/demo/model_zoo/resnet/classify.py @@ -156,7 +156,7 @@ class ImageClassifier(): # For oversampling, average predictions across crops. # If not, the shape of output[name]: (1, class_number), # the mean is also applicable. - res[name] = output[name].mean(0) + res[name] = output[name]['value'].mean(0) return res diff --git a/demo/semantic_role_labeling/api_train_v2.py b/demo/semantic_role_labeling/api_train_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..15db922b97abc5ae79f095edfd632604eec8ab94 --- /dev/null +++ b/demo/semantic_role_labeling/api_train_v2.py @@ -0,0 +1,190 @@ +import sys +import math +import numpy as np +import paddle.v2 as paddle +import paddle.v2.dataset.conll05 as conll05 + + +def db_lstm(): + word_dict, verb_dict, label_dict = conll05.get_dict() + word_dict_len = len(word_dict) + label_dict_len = len(label_dict) + pred_len = len(verb_dict) + + mark_dict_len = 2 + word_dim = 32 + mark_dim = 5 + hidden_dim = 512 + depth = 8 + + #8 features + def d_type(size): + return paddle.data_type.integer_value_sequence(size) + + word = paddle.layer.data(name='word_data', type=d_type(word_dict_len)) + predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len)) + + ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len)) + ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len)) + ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len)) + ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len)) + ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len)) + mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len)) + + target = paddle.layer.data(name='target', type=d_type(label_dict_len)) + + default_std = 1 / math.sqrt(hidden_dim) / 3.0 + + emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.) + std_0 = paddle.attr.Param(initial_std=0.) + std_default = paddle.attr.Param(initial_std=default_std) + + predicate_embedding = paddle.layer.embedding( + size=word_dim, + input=predicate, + param_attr=paddle.attr.Param( + name='vemb', initial_std=default_std)) + mark_embedding = paddle.layer.embedding( + size=mark_dim, input=mark, param_attr=std_0) + + word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] + emb_layers = [ + paddle.layer.embedding( + size=word_dim, input=x, param_attr=emb_para) for x in word_input + ] + emb_layers.append(predicate_embedding) + emb_layers.append(mark_embedding) + + hidden_0 = paddle.layer.mixed( + size=hidden_dim, + bias_attr=std_default, + input=[ + paddle.layer.full_matrix_projection( + input=emb, param_attr=std_default) for emb in emb_layers + ]) + + mix_hidden_lr = 1e-3 + lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0) + hidden_para_attr = paddle.attr.Param( + initial_std=default_std, learning_rate=mix_hidden_lr) + + lstm_0 = paddle.layer.lstmemory( + input=hidden_0, + act=paddle.activation.Relu(), + gate_act=paddle.activation.Sigmoid(), + state_act=paddle.activation.Sigmoid(), + bias_attr=std_0, + param_attr=lstm_para_attr) + + #stack L-LSTM and R-LSTM with direct edges + input_tmp = [hidden_0, lstm_0] + + for i in range(1, depth): + mix_hidden = paddle.layer.mixed( + size=hidden_dim, + bias_attr=std_default, + input=[ + paddle.layer.full_matrix_projection( + input=input_tmp[0], param_attr=hidden_para_attr), + paddle.layer.full_matrix_projection( + input=input_tmp[1], param_attr=lstm_para_attr) + ]) + + lstm = paddle.layer.lstmemory( + input=mix_hidden, + act=paddle.activation.Relu(), + gate_act=paddle.activation.Sigmoid(), + state_act=paddle.activation.Sigmoid(), + reverse=((i % 2) == 1), + bias_attr=std_0, + param_attr=lstm_para_attr) + + input_tmp = [mix_hidden, lstm] + + feature_out = paddle.layer.mixed( + size=label_dict_len, + bias_attr=std_default, + input=[ + paddle.layer.full_matrix_projection( + input=input_tmp[0], param_attr=hidden_para_attr), + paddle.layer.full_matrix_projection( + input=input_tmp[1], param_attr=lstm_para_attr) + ], ) + + crf_cost = paddle.layer.crf(size=label_dict_len, + input=feature_out, + label=target, + param_attr=paddle.attr.Param( + name='crfw', + initial_std=default_std, + learning_rate=mix_hidden_lr)) + + crf_dec = paddle.layer.crf_decoding( + name='crf_dec_l', + size=label_dict_len, + input=feature_out, + label=target, + param_attr=paddle.attr.Param(name='crfw')) + + return crf_cost, crf_dec + + +def load_parameter(file_name, h, w): + with open(file_name, 'rb') as f: + f.read(16) # skip header. + return np.fromfile(f, dtype=np.float32).reshape(h, w) + + +def main(): + paddle.init(use_gpu=False, trainer_count=1) + + # define network topology + crf_cost, crf_dec = db_lstm() + + # create parameters + parameters = paddle.parameters.create([crf_cost, crf_dec]) + + # create optimizer + optimizer = paddle.optimizer.Momentum( + momentum=0, + learning_rate=2e-2, + regularization=paddle.optimizer.L2Regularization(rate=8e-4), + model_average=paddle.optimizer.ModelAverage( + average_window=0.5, max_average_window=10000), ) + + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + print "Pass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + + trainer = paddle.trainer.SGD(cost=crf_cost, + parameters=parameters, + update_equation=optimizer) + parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32)) + + trn_reader = paddle.reader.batched( + paddle.reader.shuffle( + conll05.test(), buf_size=8192), batch_size=10) + + reader_dict = { + 'word_data': 0, + 'ctx_n2_data': 1, + 'ctx_n1_data': 2, + 'ctx_0_data': 3, + 'ctx_p1_data': 4, + 'ctx_p2_data': 5, + 'verb_data': 6, + 'mark_data': 7, + 'target': 8 + } + + trainer.train( + reader=trn_reader, + event_handler=event_handler, + num_passes=10000, + reader_dict=reader_dict) + + +if __name__ == '__main__': + main() diff --git a/demo/sentiment/dataprovider.py b/demo/sentiment/dataprovider.py index 00f72cecacb454a0dd1184fa2098be4543007de7..4b7f5d0e504aef3884a04cbed8c16503a4079772 100755 --- a/demo/sentiment/dataprovider.py +++ b/demo/sentiment/dataprovider.py @@ -32,4 +32,6 @@ def process(settings, file_name): word_slot = [ settings.word_dict[w] for w in words if w in settings.word_dict ] + if not word_slot: + continue yield word_slot, label diff --git a/demo/sentiment/predict.py b/demo/sentiment/predict.py index 8ec490f64691924013200a3d0038d39aa834b038..64c78e0d6b9297e7a321a4f070517593b0bfe332 100755 --- a/demo/sentiment/predict.py +++ b/demo/sentiment/predict.py @@ -138,7 +138,11 @@ def main(): batch = [] for line in sys.stdin: - batch.append([predict.get_index(line)]) + words = predict.get_index(line) + if words: + batch.append([words]) + else: + print('All the words in [%s] are not in the dictionary.' % line) if len(batch) == batch_size: predict.batch_predict(batch) batch = [] diff --git a/demo/sentiment/train_v2.py b/demo/sentiment/train_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..3a266e74ea93068cad2757d0076a4ae664ad4cf8 --- /dev/null +++ b/demo/sentiment/train_v2.py @@ -0,0 +1,166 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import paddle.trainer_config_helpers.attrs as attrs +from paddle.trainer_config_helpers.poolings import MaxPooling +import paddle.v2 as paddle + + +def convolution_net(input_dim, + class_dim=2, + emb_dim=128, + hid_dim=128, + is_predict=False): + data = paddle.layer.data("word", + paddle.data_type.integer_value_sequence(input_dim)) + emb = paddle.layer.embedding(input=data, size=emb_dim) + conv_3 = paddle.networks.sequence_conv_pool( + input=emb, context_len=3, hidden_size=hid_dim) + conv_4 = paddle.networks.sequence_conv_pool( + input=emb, context_len=4, hidden_size=hid_dim) + output = paddle.layer.fc(input=[conv_3, conv_4], + size=class_dim, + act=paddle.activation.Softmax()) + lbl = paddle.layer.data("label", paddle.data_type.integer_value(2)) + cost = paddle.layer.classification_cost(input=output, label=lbl) + return cost + + +def stacked_lstm_net(input_dim, + class_dim=2, + emb_dim=128, + hid_dim=512, + stacked_num=3, + is_predict=False): + """ + A Wrapper for sentiment classification task. + This network uses bi-directional recurrent network, + consisting three LSTM layers. This configure is referred to + the paper as following url, but use fewer layrs. + http://www.aclweb.org/anthology/P15-1109 + + input_dim: here is word dictionary dimension. + class_dim: number of categories. + emb_dim: dimension of word embedding. + hid_dim: dimension of hidden layer. + stacked_num: number of stacked lstm-hidden layer. + is_predict: is predicting or not. + Some layers is not needed in network when predicting. + """ + assert stacked_num % 2 == 1 + + layer_attr = attrs.ExtraLayerAttribute(drop_rate=0.5) + fc_para_attr = attrs.ParameterAttribute(learning_rate=1e-3) + lstm_para_attr = attrs.ParameterAttribute(initial_std=0., learning_rate=1.) + para_attr = [fc_para_attr, lstm_para_attr] + bias_attr = attrs.ParameterAttribute(initial_std=0., l2_rate=0.) + relu = paddle.activation.Relu() + linear = paddle.activation.Linear() + + data = paddle.layer.data("word", + paddle.data_type.integer_value_sequence(input_dim)) + emb = paddle.layer.embedding(input=data, size=emb_dim) + + fc1 = paddle.layer.fc(input=emb, + size=hid_dim, + act=linear, + bias_attr=bias_attr) + lstm1 = paddle.layer.lstmemory( + input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr) + + inputs = [fc1, lstm1] + for i in range(2, stacked_num + 1): + fc = paddle.layer.fc(input=inputs, + size=hid_dim, + act=linear, + param_attr=para_attr, + bias_attr=bias_attr) + lstm = paddle.layer.lstmemory( + input=fc, + reverse=(i % 2) == 0, + act=relu, + bias_attr=bias_attr, + layer_attr=layer_attr) + inputs = [fc, lstm] + + fc_last = paddle.layer.pooling(input=inputs[0], pooling_type=MaxPooling()) + lstm_last = paddle.layer.pooling(input=inputs[1], pooling_type=MaxPooling()) + output = paddle.layer.fc(input=[fc_last, lstm_last], + size=class_dim, + act=paddle.activation.Softmax(), + bias_attr=bias_attr, + param_attr=para_attr) + + lbl = paddle.layer.data("label", paddle.data_type.integer_value(2)) + cost = paddle.layer.classification_cost(input=output, label=lbl) + return cost + + +if __name__ == '__main__': + # init + paddle.init(use_gpu=True, trainer_count=4) + + # network config + print 'load dictionary...' + word_dict = paddle.dataset.imdb.word_dict() + dict_dim = len(word_dict) + class_dim = 2 + + # Please choose the way to build the network + # by uncommenting the corresponding line. + cost = convolution_net(dict_dim, class_dim=class_dim) + # cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3) + + # create parameters + parameters = paddle.parameters.create(cost) + + # create optimizer + adam_optimizer = paddle.optimizer.Adam( + learning_rate=2e-3, + regularization=paddle.optimizer.L2Regularization(rate=8e-4), + model_average=paddle.optimizer.ModelAverage(average_window=0.5)) + + # End batch and end pass event handler + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + print "\nPass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + else: + sys.stdout.write('.') + sys.stdout.flush() + if isinstance(event, paddle.event.EndPass): + result = trainer.test( + reader=paddle.reader.batched( + lambda: paddle.dataset.imdb.test(word_dict), + batch_size=128), + reader_dict={'word': 0, + 'label': 1}) + print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics) + + # create trainer + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=adam_optimizer) + + trainer.train( + reader=paddle.reader.batched( + paddle.reader.shuffle( + lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000), + batch_size=100), + event_handler=event_handler, + reader_dict={'word': 0, + 'label': 1}, + num_passes=10) diff --git a/demo/seqToseq/api_train_v2.py b/demo/seqToseq/api_train_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..a5f59ec379738eb5bed3e7559739cae38582ed06 --- /dev/null +++ b/demo/seqToseq/api_train_v2.py @@ -0,0 +1,110 @@ +import os + +import paddle.v2 as paddle + +from seqToseq_net_v2 import seqToseq_net_v2 + +# Data Definiation. +# TODO:This code should be merged to dataset package. +data_dir = "./data/pre-wmt14" +src_lang_dict = os.path.join(data_dir, 'src.dict') +trg_lang_dict = os.path.join(data_dir, 'trg.dict') + +source_dict_dim = len(open(src_lang_dict, "r").readlines()) +target_dict_dim = len(open(trg_lang_dict, "r").readlines()) + + +def read_to_dict(dict_path): + with open(dict_path, "r") as fin: + out_dict = { + line.strip(): line_count + for line_count, line in enumerate(fin) + } + return out_dict + + +src_dict = read_to_dict(src_lang_dict) +trg_dict = read_to_dict(trg_lang_dict) + +train_list = os.path.join(data_dir, 'train.list') +test_list = os.path.join(data_dir, 'test.list') + +UNK_IDX = 2 +START = "" +END = "" + + +def _get_ids(s, dictionary): + words = s.strip().split() + return [dictionary[START]] + \ + [dictionary.get(w, UNK_IDX) for w in words] + \ + [dictionary[END]] + + +def train_reader(file_name): + def reader(): + with open(file_name, 'r') as f: + for line_count, line in enumerate(f): + line_split = line.strip().split('\t') + if len(line_split) != 2: + continue + src_seq = line_split[0] # one source sequence + src_ids = _get_ids(src_seq, src_dict) + + trg_seq = line_split[1] # one target sequence + trg_words = trg_seq.split() + trg_ids = [trg_dict.get(w, UNK_IDX) for w in trg_words] + + # remove sequence whose length > 80 in training mode + if len(src_ids) > 80 or len(trg_ids) > 80: + continue + trg_ids_next = trg_ids + [trg_dict[END]] + trg_ids = [trg_dict[START]] + trg_ids + + yield src_ids, trg_ids, trg_ids_next + + return reader + + +def main(): + paddle.init(use_gpu=False, trainer_count=1) + + # define network topology + cost = seqToseq_net_v2(source_dict_dim, target_dict_dim) + parameters = paddle.parameters.create(cost) + + # define optimize method and trainer + optimizer = paddle.optimizer.Adam(learning_rate=1e-4) + trainer = paddle.trainer.SGD(cost=cost, + parameters=parameters, + update_equation=optimizer) + + # define data reader + reader_dict = { + 'source_language_word': 0, + 'target_language_word': 1, + 'target_language_next_word': 2 + } + + wmt14_reader = paddle.reader.batched( + paddle.reader.shuffle( + train_reader("data/pre-wmt14/train/train"), buf_size=8192), + batch_size=5) + + # define event_handler callback + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 10 == 0: + print "Pass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + + # start to train + trainer.train( + reader=wmt14_reader, + event_handler=event_handler, + num_passes=10000, + reader_dict=reader_dict) + + +if __name__ == '__main__': + main() diff --git a/demo/seqToseq/seqToseq_net_v2.py b/demo/seqToseq/seqToseq_net_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..058a6789d7094c71492ed9772ed5594c4c0c8f84 --- /dev/null +++ b/demo/seqToseq/seqToseq_net_v2.py @@ -0,0 +1,92 @@ +import paddle.v2 as paddle + + +def seqToseq_net_v2(source_dict_dim, target_dict_dim): + ### Network Architecture + word_vector_dim = 512 # dimension of word vector + decoder_size = 512 # dimension of hidden unit in GRU Decoder network + encoder_size = 512 # dimension of hidden unit in GRU Encoder network + + #### Encoder + src_word_id = paddle.layer.data( + name='source_language_word', + type=paddle.data_type.integer_value_sequence(source_dict_dim)) + src_embedding = paddle.layer.embedding( + input=src_word_id, + size=word_vector_dim, + param_attr=paddle.attr.ParamAttr(name='_source_language_embedding')) + src_forward = paddle.networks.simple_gru( + input=src_embedding, size=encoder_size) + src_backward = paddle.networks.simple_gru( + input=src_embedding, size=encoder_size, reverse=True) + encoded_vector = paddle.layer.concat(input=[src_forward, src_backward]) + + #### Decoder + with paddle.layer.mixed(size=decoder_size) as encoded_proj: + encoded_proj += paddle.layer.full_matrix_projection( + input=encoded_vector) + + backward_first = paddle.layer.first_seq(input=src_backward) + + with paddle.layer.mixed( + size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot: + decoder_boot += paddle.layer.full_matrix_projection( + input=backward_first) + + def gru_decoder_with_attention(enc_vec, enc_proj, current_word): + + decoder_mem = paddle.layer.memory( + name='gru_decoder', size=decoder_size, boot_layer=decoder_boot) + + context = paddle.networks.simple_attention( + encoded_sequence=enc_vec, + encoded_proj=enc_proj, + decoder_state=decoder_mem) + + with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs: + decoder_inputs += paddle.layer.full_matrix_projection(input=context) + decoder_inputs += paddle.layer.full_matrix_projection( + input=current_word) + + gru_step = paddle.layer.gru_step( + name='gru_decoder', + input=decoder_inputs, + output_mem=decoder_mem, + size=decoder_size) + + with paddle.layer.mixed( + size=target_dict_dim, + bias_attr=True, + act=paddle.activation.Softmax()) as out: + out += paddle.layer.full_matrix_projection(input=gru_step) + return out + + decoder_group_name = "decoder_group" + group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True) + group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True) + group_inputs = [group_input1, group_input2] + + trg_embedding = paddle.layer.embedding( + input=paddle.layer.data( + name='target_language_word', + type=paddle.data_type.integer_value_sequence(target_dict_dim)), + size=word_vector_dim, + param_attr=paddle.attr.ParamAttr(name='_target_language_embedding')) + group_inputs.append(trg_embedding) + + # For decoder equipped with attention mechanism, in training, + # target embeding (the groudtruth) is the data input, + # while encoded source sequence is accessed to as an unbounded memory. + # Here, the StaticInput defines a read-only memory + # for the recurrent_group. + decoder = paddle.layer.recurrent_group( + name=decoder_group_name, + step=gru_decoder_with_attention, + input=group_inputs) + + lbl = paddle.layer.data( + name='target_language_next_word', + type=paddle.data_type.integer_value_sequence(target_dict_dim)) + cost = paddle.layer.classification_cost(input=decoder, label=lbl) + + return cost diff --git a/demo/word2vec/train_v2.py b/demo/word2vec/train_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..7d952b446f9db432062fc3305a6b65b0ad66dd47 --- /dev/null +++ b/demo/word2vec/train_v2.py @@ -0,0 +1,80 @@ +import math + +import paddle.v2 as paddle + +dictsize = 1953 +embsize = 32 +hiddensize = 256 +N = 5 + + +def wordemb(inlayer): + wordemb = paddle.layer.table_projection( + input=inlayer, + size=embsize, + param_attr=paddle.attr.Param( + name="_proj", + initial_std=0.001, + learning_rate=1, + l2_rate=0, )) + return wordemb + + +def main(): + paddle.init(use_gpu=False, trainer_count=1) + word_dict = paddle.dataset.imikolov.build_dict() + dict_size = len(word_dict) + firstword = paddle.layer.data( + name="firstw", type=paddle.data_type.integer_value(dict_size)) + secondword = paddle.layer.data( + name="secondw", type=paddle.data_type.integer_value(dict_size)) + thirdword = paddle.layer.data( + name="thirdw", type=paddle.data_type.integer_value(dict_size)) + fourthword = paddle.layer.data( + name="fourthw", type=paddle.data_type.integer_value(dict_size)) + nextword = paddle.layer.data( + name="fifthw", type=paddle.data_type.integer_value(dict_size)) + + Efirst = wordemb(firstword) + Esecond = wordemb(secondword) + Ethird = wordemb(thirdword) + Efourth = wordemb(fourthword) + + contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth]) + hidden1 = paddle.layer.fc(input=contextemb, + size=hiddensize, + act=paddle.activation.Sigmoid(), + layer_attr=paddle.attr.Extra(drop_rate=0.5), + bias_attr=paddle.attr.Param(learning_rate=2), + param_attr=paddle.attr.Param( + initial_std=1. / math.sqrt(embsize * 8), + learning_rate=1)) + predictword = paddle.layer.fc(input=hidden1, + size=dict_size, + bias_attr=paddle.attr.Param(learning_rate=2), + act=paddle.activation.Softmax()) + + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 100 == 0: + result = trainer.test( + paddle.batch( + paddle.dataset.imikolov.test(word_dict, N), 32)) + print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics, + result.metrics) + + cost = paddle.layer.classification_cost(input=predictword, label=nextword) + parameters = paddle.parameters.create(cost) + adam_optimizer = paddle.optimizer.Adam( + learning_rate=3e-3, + regularization=paddle.optimizer.L2Regularization(8e-4)) + trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer) + trainer.train( + paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32), + num_passes=30, + event_handler=event_handler) + + +if __name__ == '__main__': + main() diff --git a/doc/api/index_cn.rst b/doc/api/index_cn.rst index 3718cd73a2003b8ef6c406a9bd51dc68e76402dc..fca981221e490686e468ae8d385d844d49767883 100644 --- a/doc/api/index_cn.rst +++ b/doc/api/index_cn.rst @@ -1,37 +1,26 @@ -API中文手册 -============ +API +=== -DataProvider API ----------------- +模型配置 API +------------ .. toctree:: :maxdepth: 1 - data_provider/dataprovider_cn.rst - data_provider/pydataprovider2_cn.rst + v2/model_configs.rst -.. _api_trainer_config: - -Model Config API ----------------- +数据 API +-------- .. toctree:: :maxdepth: 1 - trainer_config_helpers/optimizers.rst - trainer_config_helpers/data_sources.rst - trainer_config_helpers/layers.rst - trainer_config_helpers/activations.rst - trainer_config_helpers/poolings.rst - trainer_config_helpers/networks.rst - trainer_config_helpers/evaluators.rst - trainer_config_helpers/attrs.rst - + v2/data.rst -Applications API ----------------- +训练 API +-------- -.. toctree:: - :maxdepth: 1 +.. toctree:: + :maxdepth: 1 - predict/swig_py_paddle_cn.rst + v2/run_logic.rst \ No newline at end of file diff --git a/doc/api/index_en.rst b/doc/api/index_en.rst index 10c297a71d6988c002de868e804ed9ee2345fbd7..f0ad0fb2aee7345db1dd5f175a342598366f5e3c 100644 --- a/doc/api/index_en.rst +++ b/doc/api/index_en.rst @@ -1,37 +1,26 @@ API === -DataProvider API +Model Config API ---------------- .. toctree:: :maxdepth: 1 - data_provider/dataprovider_en.rst - data_provider/pydataprovider2_en.rst - -.. _api_trainer_config: + v2/model_configs.rst -Model Config API ----------------- +Data API +-------- .. toctree:: :maxdepth: 1 - trainer_config_helpers/optimizers.rst - trainer_config_helpers/data_sources.rst - trainer_config_helpers/layers.rst - trainer_config_helpers/activations.rst - trainer_config_helpers/poolings.rst - trainer_config_helpers/networks.rst - trainer_config_helpers/evaluators.rst - trainer_config_helpers/attrs.rst + v2/data.rst +Train API +--------- -Applications API ----------------- - -.. toctree:: - :maxdepth: 1 +.. toctree:: + :maxdepth: 1 - predict/swig_py_paddle_en.rst + v2/run_logic.rst \ No newline at end of file diff --git a/doc/api/data_provider/dataprovider_cn.rst b/doc/api/v1/data_provider/dataprovider_cn.rst similarity index 100% rename from doc/api/data_provider/dataprovider_cn.rst rename to doc/api/v1/data_provider/dataprovider_cn.rst diff --git a/doc/api/data_provider/dataprovider_en.rst b/doc/api/v1/data_provider/dataprovider_en.rst similarity index 100% rename from doc/api/data_provider/dataprovider_en.rst rename to doc/api/v1/data_provider/dataprovider_en.rst diff --git a/doc/api/data_provider/pydataprovider2_cn.rst b/doc/api/v1/data_provider/pydataprovider2_cn.rst similarity index 100% rename from doc/api/data_provider/pydataprovider2_cn.rst rename to doc/api/v1/data_provider/pydataprovider2_cn.rst diff --git a/doc/api/data_provider/pydataprovider2_en.rst b/doc/api/v1/data_provider/pydataprovider2_en.rst similarity index 100% rename from doc/api/data_provider/pydataprovider2_en.rst rename to doc/api/v1/data_provider/pydataprovider2_en.rst diff --git a/doc/api/data_provider/src/mnist_config.py b/doc/api/v1/data_provider/src/mnist_config.py similarity index 100% rename from doc/api/data_provider/src/mnist_config.py rename to doc/api/v1/data_provider/src/mnist_config.py diff --git a/doc/api/data_provider/src/mnist_provider.dict.py b/doc/api/v1/data_provider/src/mnist_provider.dict.py similarity index 100% rename from doc/api/data_provider/src/mnist_provider.dict.py rename to doc/api/v1/data_provider/src/mnist_provider.dict.py diff --git a/doc/api/data_provider/src/mnist_train.txt b/doc/api/v1/data_provider/src/mnist_train.txt similarity index 100% rename from doc/api/data_provider/src/mnist_train.txt rename to doc/api/v1/data_provider/src/mnist_train.txt diff --git a/doc/api/data_provider/src/sentimental_config.py b/doc/api/v1/data_provider/src/sentimental_config.py similarity index 100% rename from doc/api/data_provider/src/sentimental_config.py rename to doc/api/v1/data_provider/src/sentimental_config.py diff --git a/doc/api/data_provider/src/sentimental_provider.py b/doc/api/v1/data_provider/src/sentimental_provider.py similarity index 100% rename from doc/api/data_provider/src/sentimental_provider.py rename to doc/api/v1/data_provider/src/sentimental_provider.py diff --git a/doc/api/data_provider/src/sentimental_train.txt b/doc/api/v1/data_provider/src/sentimental_train.txt similarity index 100% rename from doc/api/data_provider/src/sentimental_train.txt rename to doc/api/v1/data_provider/src/sentimental_train.txt diff --git a/doc/api/data_provider/src/train.list b/doc/api/v1/data_provider/src/train.list similarity index 100% rename from doc/api/data_provider/src/train.list rename to doc/api/v1/data_provider/src/train.list diff --git a/doc/api/v1/index_cn.rst b/doc/api/v1/index_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..3718cd73a2003b8ef6c406a9bd51dc68e76402dc --- /dev/null +++ b/doc/api/v1/index_cn.rst @@ -0,0 +1,37 @@ +API中文手册 +============ + +DataProvider API +---------------- + +.. toctree:: + :maxdepth: 1 + + data_provider/dataprovider_cn.rst + data_provider/pydataprovider2_cn.rst + +.. _api_trainer_config: + +Model Config API +---------------- + +.. toctree:: + :maxdepth: 1 + + trainer_config_helpers/optimizers.rst + trainer_config_helpers/data_sources.rst + trainer_config_helpers/layers.rst + trainer_config_helpers/activations.rst + trainer_config_helpers/poolings.rst + trainer_config_helpers/networks.rst + trainer_config_helpers/evaluators.rst + trainer_config_helpers/attrs.rst + + +Applications API +---------------- + +.. toctree:: + :maxdepth: 1 + + predict/swig_py_paddle_cn.rst diff --git a/doc/api/v1/index_en.rst b/doc/api/v1/index_en.rst new file mode 100644 index 0000000000000000000000000000000000000000..10c297a71d6988c002de868e804ed9ee2345fbd7 --- /dev/null +++ b/doc/api/v1/index_en.rst @@ -0,0 +1,37 @@ +API +=== + +DataProvider API +---------------- + +.. toctree:: + :maxdepth: 1 + + data_provider/dataprovider_en.rst + data_provider/pydataprovider2_en.rst + +.. _api_trainer_config: + +Model Config API +---------------- + +.. toctree:: + :maxdepth: 1 + + trainer_config_helpers/optimizers.rst + trainer_config_helpers/data_sources.rst + trainer_config_helpers/layers.rst + trainer_config_helpers/activations.rst + trainer_config_helpers/poolings.rst + trainer_config_helpers/networks.rst + trainer_config_helpers/evaluators.rst + trainer_config_helpers/attrs.rst + + +Applications API +---------------- + +.. toctree:: + :maxdepth: 1 + + predict/swig_py_paddle_en.rst diff --git a/doc/api/predict/src/predict_sample.py b/doc/api/v1/predict/src/predict_sample.py similarity index 100% rename from doc/api/predict/src/predict_sample.py rename to doc/api/v1/predict/src/predict_sample.py diff --git a/doc/api/predict/swig_py_paddle_cn.rst b/doc/api/v1/predict/swig_py_paddle_cn.rst similarity index 100% rename from doc/api/predict/swig_py_paddle_cn.rst rename to doc/api/v1/predict/swig_py_paddle_cn.rst diff --git a/doc/api/predict/swig_py_paddle_en.rst b/doc/api/v1/predict/swig_py_paddle_en.rst similarity index 100% rename from doc/api/predict/swig_py_paddle_en.rst rename to doc/api/v1/predict/swig_py_paddle_en.rst diff --git a/doc/api/trainer_config_helpers/activations.rst b/doc/api/v1/trainer_config_helpers/activations.rst similarity index 100% rename from doc/api/trainer_config_helpers/activations.rst rename to doc/api/v1/trainer_config_helpers/activations.rst diff --git a/doc/api/trainer_config_helpers/attrs.rst b/doc/api/v1/trainer_config_helpers/attrs.rst similarity index 100% rename from doc/api/trainer_config_helpers/attrs.rst rename to doc/api/v1/trainer_config_helpers/attrs.rst diff --git a/doc/api/trainer_config_helpers/data_sources.rst b/doc/api/v1/trainer_config_helpers/data_sources.rst similarity index 100% rename from doc/api/trainer_config_helpers/data_sources.rst rename to doc/api/v1/trainer_config_helpers/data_sources.rst diff --git a/doc/api/trainer_config_helpers/evaluators.rst b/doc/api/v1/trainer_config_helpers/evaluators.rst similarity index 100% rename from doc/api/trainer_config_helpers/evaluators.rst rename to doc/api/v1/trainer_config_helpers/evaluators.rst diff --git a/doc/api/trainer_config_helpers/layers.rst b/doc/api/v1/trainer_config_helpers/layers.rst similarity index 93% rename from doc/api/trainer_config_helpers/layers.rst rename to doc/api/v1/trainer_config_helpers/layers.rst index 4e429650e545179eca2f947e4af660222ad7cda8..bbea823de4d870f8a4384b6a85ebb7e8182797fe 100644 --- a/doc/api/trainer_config_helpers/layers.rst +++ b/doc/api/v1/trainer_config_helpers/layers.rst @@ -139,24 +139,12 @@ lstmemory :members: lstmemory :noindex: -lstm_step_layer ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: lstm_step_layer - :noindex: - grumemory --------- .. automodule:: paddle.trainer_config_helpers.layers :members: grumemory :noindex: -gru_step_layer ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: gru_step_layer - :noindex: - Recurrent Layer Group ===================== @@ -172,6 +160,18 @@ recurrent_group :members: recurrent_group :noindex: +lstm_step_layer +--------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: lstm_step_layer + :noindex: + +gru_step_layer +--------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: gru_step_layer + :noindex: + beam_search ------------ .. automodule:: paddle.trainer_config_helpers.layers @@ -279,6 +279,12 @@ concat_layer :members: concat_layer :noindex: +seq_concat_layer +---------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: seq_concat_layer + :noindex: + Reshaping Layers ================ @@ -302,6 +308,18 @@ repeat_layer :members: repeat_layer :noindex: +rotate_layer +------------ +.. automodule:: paddle.trainer_config_helpers.layers + :members: rotate_layer + :noindex: + +seq_reshape_layer +----------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: seq_reshape_layer + :noindex: + Math Layers =========== @@ -382,6 +400,15 @@ sampling_id_layer :members: sampling_id_layer :noindex: +Slicing and Joining Layers +========================== + +pad_layer +----------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: pad_layer + :noindex: + .. _api_trainer_config_helpers_layers_cost_layers: Cost Layers @@ -441,6 +468,12 @@ ctc_layer :members: ctc_layer :noindex: +warp_ctc_layer +-------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: warp_ctc_layer + :noindex: + nce_layer ----------- .. automodule:: paddle.trainer_config_helpers.layers diff --git a/doc/api/trainer_config_helpers/networks.rst b/doc/api/v1/trainer_config_helpers/networks.rst similarity index 100% rename from doc/api/trainer_config_helpers/networks.rst rename to doc/api/v1/trainer_config_helpers/networks.rst diff --git a/doc/api/trainer_config_helpers/optimizers.rst b/doc/api/v1/trainer_config_helpers/optimizers.rst similarity index 100% rename from doc/api/trainer_config_helpers/optimizers.rst rename to doc/api/v1/trainer_config_helpers/optimizers.rst diff --git a/doc/api/trainer_config_helpers/poolings.rst b/doc/api/v1/trainer_config_helpers/poolings.rst similarity index 100% rename from doc/api/trainer_config_helpers/poolings.rst rename to doc/api/v1/trainer_config_helpers/poolings.rst diff --git a/doc/api/v2/data.rst b/doc/api/v2/data.rst new file mode 100644 index 0000000000000000000000000000000000000000..1c0a202a8c04322de3e9533c11fb5c74abac6c62 --- /dev/null +++ b/doc/api/v2/data.rst @@ -0,0 +1,93 @@ +================ +Data Related API +================ + + +######### +DataTypes +######### + +.. automodule:: paddle.v2.data_type + :members: + +########## +DataFeeder +########## + +.. automodule:: paddle.v2.data_feeder + :members: + +###### +Reader +###### + +.. automodule:: paddle.v2.reader + :members: + +.. automodule:: paddle.v2.reader.creator + :members: + +######### +minibatch +######### + +.. automodule:: paddle.v2.minibatch + :members: + +####### +Dataset +####### + +.. automodule:: paddle.v2.dataset + :members: + + +mnist ++++++ + +.. automodule:: paddle.v2.dataset.mnist + :members: + + +cifar ++++++ + +.. automodule:: paddle.v2.dataset.cifar + :members: + +conll05 ++++++++ + +.. automodule:: paddle.v2.dataset.conll05 + :members: + +imdb +++++ + +.. automodule:: paddle.v2.dataset.imdb + :members: + +imikolov +++++++++ + +.. automodule:: paddle.v2.dataset.imikolov + :members: + +movielens ++++++++++ + +.. automodule:: paddle.v2.dataset.movielens + :members: + +sentiment ++++++++++ + +.. automodule:: paddle.v2.dataset.sentiment + :members: + +uci_housing ++++++++++++ + +.. automodule:: paddle.v2.dataset.uci_housing + :members: + diff --git a/doc/api/v2/model_configs.rst b/doc/api/v2/model_configs.rst new file mode 100644 index 0000000000000000000000000000000000000000..e9cd3d5bf7b0e9e59c231bcabdb163a740909de1 --- /dev/null +++ b/doc/api/v2/model_configs.rst @@ -0,0 +1,46 @@ +######################### +Configuration Related API +######################### + +====== +Layers +====== + +.. automodule:: paddle.v2.layer + :members: + + +========== +Attributes +========== + +.. automodule:: paddle.v2.attr + :members: + +=========== +Activations +=========== + +.. automodule:: paddle.v2.activation + :members: + +======== +Poolings +======== + +.. automodule:: paddle.v2.pooling + :members: + +======== +Networks +======== + +.. automodule:: paddle.v2.networks + :members: + +========== +Optimizers +========== + +.. automodule:: paddle.v2.optimizer + :members: diff --git a/doc/api/v2/run_logic.rst b/doc/api/v2/run_logic.rst new file mode 100644 index 0000000000000000000000000000000000000000..904d45966dfc16a474016ff48fd5a951988b0ab0 --- /dev/null +++ b/doc/api/v2/run_logic.rst @@ -0,0 +1,26 @@ +########### +Trainer API +########### + +========== +Parameters +========== + +.. automodule:: paddle.v2.parameters + :members: + + +======= +Trainer +======= + +.. automodule:: paddle.v2.trainer + :members: + + +===== +Event +===== + +.. automodule:: paddle.v2.event + :members: diff --git a/doc/design/api.md b/doc/design/api.md new file mode 100644 index 0000000000000000000000000000000000000000..8185d2af0ea264a2e7b4e28b9ed05279e4a22014 --- /dev/null +++ b/doc/design/api.md @@ -0,0 +1,262 @@ +# PaddlePaddle Design Doc + +## Ingredients + +As our design principle is starting from the essence: how could we +allow users to express and solve their problems at neural networks. +Some essential concepts that our API have to provide include: + +1. A *topology* is an expression of *layers*. + +1. A layer could be any kind of computation, including *cost*. + +1. Some layers have parameters, some don't. Most costs don't have + parameters. + +1. In some topologies, layers share parameters. For + example, + [the network for training a ranking model](https://github.com/PaddlePaddle/Paddle/issues/1311#issuecomment-279121850). + +1. At programming time, users specify topologies and possible sharing + of parameters. PaddlePaddle can figure out and create parameters + required (and possibly shared) by one or more topologies. + + +## Starting from Examples + +As a summarization +of +[our disucssion](https://github.com/PaddlePaddle/Paddle/issues/1315), +let us present two examples here: + + +### Example 1. Sharing Parameters between Layers + +We use +the +[3-branch ranking](https://github.com/PaddlePaddle/Paddle/issues/1311#issuecomment-279121850) model +in this example. For your convenience, I copy-a-paste the model's +topology as follows: + +``` +A -> f -\ +Q -> f --> cost +B -> f -/ +``` + +The following program trains the topology including the cost, and then +use the sub-network in the trained topology in inference: + +```python +def f(in): + e = paddle.layer.embedding(in, parameter_name="embedding") + o = paddle.layer.softmax(e, parameter_name="semantic") + return o + +# Create 3 topologies (subnets), they share parameters because all +# correspoinding layers have the same parameter names. +fA = f(paddle.layer.data(input_name="A")) +fB = f(paddle.layer.data(input_name="B")) +fQ = f(paddle.layer.data(input_name="Q")) + +topology = paddle.layer.less_than( + paddle.layer.cross_entropy(fA, fQ), + paddle.layer.corss_entropy(fB, fQ)) + +# Derive parameters required in topology and create them in model. +parameters = paddle.parameters.create(topology) + +# Estimate parameters used in topology from data. +paddle.train(topology, parameters, reader=read_ranking_model_data) + +# Inference using fA (or fB or fC, as they share their parameters). +[testA, testB, testQ] = read_ranking_model_data() +print "The sematic-vector of testA: ", paddle.infer(fA, parameters, testA) +``` + + +### Example 2. Sharing Parameters between "Models" + +We use [GAN](https://github.com/PaddlePaddle/book/tree/develop/gan) in +this example. In the following example program, `d0` and `d1` +correspond to the two networks in the following figure: + + + +```python +def G(in): + # over-simplified example as G has only one layers: + return paddle.layer.fc(in, parameter_name="G") + +def D(in); + # again, over-simplified: + return paddle.layer.fc(in, parameter_name="D") + +# Construct the first topology, which contains both D and G. +# By learning this topology, we update parameters of G. +d0 = paddle.layer.should_be_false(D(G(paddle.layer.data()))) + +# Construct a second topology d1, which contains only D. By +# training this topology, we update parameters of D. Note +# that d1 share parameters with d0. +d1 = paddle.layer.should_be_true(D(paddle.layer.data())) + +# Create parameters from a list of multiple topologies (models) for +# the chance to share parameters between these topologies. +parameters = paddle.parameters.create([d0, d1]) + +# Iterative training of GAN. +for ...: + train(d0, parameters, reader=read_from_rng, immutable_parameters={"D"}) + train(d1, parameters, reader=read_from_realistic_images) + +# Use d1 for inference: +print "D thinks a batch of images are realistic ", infer(d1, parameters, read_mnist_images) +``` + + +### Summarization + + +Above two programs reveal some important design concerns: + +1. Users describe a topology as an expression of layers. Every layer + has a *parameter name*. If the users don't specify it explicitly, it's automatically generated as a unique name. By + specifying the parameter name, users can specify the sharing of + parameters between layers and even between topologies. + +1. `paddle.parameters.create` figures out parameters required by one + or more topologies from parameter names of layers. It creates these + parameters and returns a `ParameterSet` object, which is in essence + a map from *parameter names* to *parameters*. + +1. At training and inference time, `paddle.train` and `paddle.infer` + requires both a topology and the parameter set that holds the parameters of that topology. There are some reasons: + + 1. This prevents users from forgetting to call + `paddle.parameters.create`. + 1. `paddle.train` needs to know which parameter set to update. + 1. Users could load another (pre-trained) parameter set and use it + with a topology in `train.infer`. + +1. By specifying the `immutable_parameters` parameter of + `paddle.train`, we can forbid the update of these parameters. + + +## Reader + +Not all programming frameworks allow users to define I/O functions. +An example is Google MapReduce, which can only read from text, +SSTable, and RecordIO files. Hadoop MapReduce allows users to define +readers and writers by deriving from base classes `Reader` and +`Writer`. The former is less flexible but also less error-prone. We +decide to provide the flexibility to users to define their readers. + + +There are some open questions here: + +1. **Should a reader return a Python dictionary?** + +1. **How to map multiple outputs from a reader to multiple data layers?** + +1. **How to easily compose some existing readers to read more data and + feed a topology with more data layers?** + + +## Training + +The recommended way to training a model is to call `paddle.train`, +which simply calls `paddle.trainer.Default`, a global variable of +type `paddle.trainer.SGD`. Equivalently, we can do + +```python +opt = paddle.trainer.SGD(..., paddle.updater.Adam(...)) +opt.train(topology, parameters, reader=read, ...) +``` + +### Updater + +Please be aware that a trainer can accept an updater as its data +member, where an updater is a class derived from +`paddle.trainer.Updater`. This is to make it easier to customize +trainers, as discussed +[here](https://github.com/PaddlePaddle/Paddle/issues/1319). + +### Event Handler + +`paddle.train` and `paddle.trainer.XXX.train` take an optional +parameter `event_handler`, which should be either `None` or a function +that handle some events: + +1. BeginTraining +1. EndTraining +1. BeginIteration +1. EndIteration +1. BeginPass +1. EndPass + +where EndPass is sent if and only if the reader yields +`end_pass=True`. + +An example as follows: + +```python +def event_handler(event): + if ininstance(event, paddle.event.EndIteration): + print paddle.test(...) + +paddle.train(topology, parameters, reader, event_handler) +``` + +If we are writing a PaddlePaddle program in and for iPython/Jypyter, +we can use metaplotlib in the event handler to plot a curve of +cost/error versus iterations, as shown +[here](https://blog.dominodatalab.com/interactive-dashboards-in-jupyter/). + +### Distributed Training + +If users want to do distributed training on a cluster, s/he should +call `paddle.dist_train` and provides access tokens to the cluster as +a parameter. + +For example, if the user has a TLS certificate that allows him to +access a Kubernetes cluster, s/he should be able to call + +```python +paddle.dist_train(model, + trainer=paddle.trainer.SGD(..., + paddle.updater.Adam(...)), + reader=read, + k8s_user="yi", + k8s_token="kube_cluster_tls.pem", + k8s_job="hello", + num_parameter_servers=15) +``` + +The pseudo code if `paddle.dist_train` is as follows: + +```python +def dist_train(topology, parameters, trainer, reader, ...): + if os.getenv("KUBERNETES_SERVICE_HOST") == None: + image_name = k8s_user + '/' + k8s_job + docker_build(image_name) + docker_push() + kube_ctrl_start_job(image_name, k8s_user, k8s_token) + else: + rank = kube_list_containers_in_job_and_return_current_containers_rank() + if rank == 0: + master() + elif rank < 15: + parameter_server() + else: + trainer.train(model, reader=read) +``` + +Please be aware that if a process is running on the Kubernetes +cluster, it will have some environment variables pre-defined. + +If `dist_train` doesn't see these environment variables, it knows +that it's running on users' personal computer, and it should work as a +*launcher*. Otherwise, it knows that it's running on the cluster and +need to figure out its role as either the master, or a trainer, or a +parameter server. diff --git a/doc/design/reader/README.md b/doc/design/reader/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f21f7af520df5171798326818ecb97c3bcd14a12 --- /dev/null +++ b/doc/design/reader/README.md @@ -0,0 +1,202 @@ +# Python Data Reader Design Doc + +At training and testing time, PaddlePaddle programs need to read data. To ease the users' work to write data reading code, we define that + +- A *reader* is a function that reads data (from file, network, random number generator, etc) and yields data items. +- A *reader creator* is a function that returns a reader function. +- A *reader decorator* is a function, which accepts one or more readers, and returns a reader. +- A *batch reader* is a function that reads data (from *reader*, file, network, random number generator, etc) and yields a batch of data items. + +and provide function which converts reader to batch reader, frequently used reader creators and reader decorators. + +## Data Reader Interface + +Indeed, *data reader* doesn't have to be a function that reads and yields data items. It can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`): + +``` +iterable = data_reader() +``` + +Element produced from the iterable should be a **single** entry of data, **not** a mini batch. That entry of data could be a single item, or a tuple of items. Item should be of [supported type](http://www.paddlepaddle.org/doc/ui/data_provider/pydataprovider2.html?highlight=dense_vector#input-types) (e.g., numpy 1d array of float32, int, list of int) + +An example implementation for single item data reader creator: + +```python +def reader_creator_random_image(width, height): + def reader(): + while True: + yield numpy.random.uniform(-1, 1, size=width*height) + return reader +``` + +An example implementation for multiple item data reader creator: +```python +def reader_creator_random_image_and_label(width, height, label): + def reader(): + while True: + yield numpy.random.uniform(-1, 1, size=width*height), label + return reader +``` + +## Batch Reader Interface + +*batch reader* can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`). The output of the iterable should be a batch (list) of data items. Each item inside the list must be a tuple. + +Here are valid outputs: +```python +# a mini batch of three data items. Each data item consist three columns of data, each of which is 1. +[(1, 1, 1), +(2, 2, 2), +(3, 3, 3)] + +# a mini batch of three data items, each data item is a list (single column). +[([1,1,1],), +([2,2,2],), +([3,3,3],), +``` + +Please note that each item inside the list must be a tuple, below is an invalid output: +```python + # wrong, [1,1,1] needs to be inside a tuple: ([1,1,1],). + # Otherwise it's ambiguous whether [1,1,1] means a single column of data [1, 1, 1], + # or three column of datas, each of which is 1. +[[1,1,1], +[2,2,2], +[3,3,3]] +``` + +It's easy to convert from reader to batch reader: +```python +mnist_train = paddle.dataset.mnist.train() +mnist_train_batch_reader = paddle.batch(mnist_train, 128) +``` + +Also easy to create custom batch reader: +```python +def custom_batch_reader(): + while True: + batch = [] + for i in xrange(128): + batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended. + yield batch + +mnist_random_image_batch_reader = custom_batch_reader +``` + +## Usage + +batch reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`: + +```python +# two data layer is created: +image_layer = paddle.layer.data("image", ...) +label_layer = paddle.layer.data("label", ...) + +# ... +batch_reader = paddle.batch(paddle.dataset.mnist.train(), 128) +paddle.train(batch_reader, {"image":0, "label":1}, 128, 10, ...) +``` + +## Data Reader Decorator + +*Data reader decorator* takes a single or multiple data reader, returns a new data reader. It is similar to a [python decorator](https://wiki.python.org/moin/PythonDecorators), but it does not use `@` syntax. + +Since we have a strict interface for data readers (no parameter, return a single data item). Data reader can be used flexiable via data reader decorators. Following are a few examples: + +### Prefetch Data + +Since reading data may take time and training can not proceed without data. It is generally a good idea to prefetch data. + +Use `paddle.reader.buffered` to prefetch data: + +```python +buffered_reader = paddle.reader.buffered(paddle.dataset.mnist.train(), 100) +``` + +`buffered_reader` will try to buffer (prefetch) `100` data entries. + +### Compose Multiple Data Readers + +For example, we want to use a source of real images (reusing mnist dataset), and a source of random images as input for [Generative Adversarial Networks](https://arxiv.org/abs/1406.2661). + +We can do: + +```python +def reader_creator_random_image(width, height): + def reader(): + while True: + yield numpy.random.uniform(-1, 1, size=width*height) + return reader + +def reader_creator_bool(t): + def reader: + while True: + yield t + return reader + +true_reader = reader_creator_bool(True) +false_reader = reader_creator_bool(False) + +reader = paddle.reader.compose(paddle.dataset.mnist.train(), data_reader_creator_random_image(20, 20), true_reader, false_reader) +# Skipped 1 because paddle.dataset.mnist.train() produces two items per data entry. +# And we don't care second item at this time. +paddle.train(paddle.batch(reader, 128), {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...) +``` + +### Shuffle + +Given shuffle buffer size `n`, `paddle.reader.shuffle` will return a data reader that buffers `n` data entries and shuffle them before a data entry is read. + +Example: +```python +reader = paddle.reader.shuffle(paddle.dataset.mnist.train(), 512) +``` + +## Q & A + +### Why reader return only a single entry, but not a mini batch? + +Always returning a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2). + +We provide function `paddle.batch` to turn (single entry) reader into batch reader. + +### Why do we need batch reader, isn't train take reader and batch_size as arguments sufficient? + +In most of the case, train taking reader and batch_size as arguments would be sufficent. However sometimes user want to customize order of data entries inside a mini batch. Or even change batch size dynamically. + +### Why use a dictionary but not a list to provide mapping? + +We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["image", "label"]`) is because that user can easily resue item (e.g., using `{"image_a":0, "image_b":0, "label":1}`) or skip item (e.g., using `{"image_a":0, "label":2}`). + +### How to create custom data reader creator + +```python +def image_reader_creator(image_path, label_path, n): + def reader(): + f = open(image_path) + l = open(label_path) + images = numpy.fromfile( + f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32') + images = images / 255.0 * 2.0 - 1.0 + labels = numpy.fromfile(l, 'ubyte', count=n).astype("int") + for i in xrange(n): + yield images[i, :], labels[i] # a single entry of data is created each time + f.close() + l.close() + return reader + +# images_reader_creator creates a reader +reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024) +paddle.train(paddle.batch(reader, 128), {"image":0, "label":1}, ...) +``` + +### How is `paddle.train` implemented + +An example implementation of paddle.train could be: + +```python +def train(batch_reader, mapping, batch_size, total_pass): + for pass_idx in range(total_pass): + for mini_batch in batch_reader(): # this loop will never end in online learning. + do_forward_backward(mini_batch, mapping) +``` diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst index 35234e0eb3ece3cb20d62841c1d75e60b485b9ea..6b132d2a4d31ab85347bd41d0243ffee858ac909 100644 --- a/doc/getstarted/build_and_install/docker_install_cn.rst +++ b/doc/getstarted/build_and_install/docker_install_cn.rst @@ -12,7 +12,7 @@ PaddlePaddle项目提供官方 `Docker `_ 镜像。Dock PaddlePaddle提供的Docker镜像版本 -------------------------------- -我们提供了12个 `Docker image `_ ,他们的image name都是 :code:`paddle-dev/paddle` ,tag分别为 +我们提供了12个 `Docker image `_ ,他们的image name都是 :code:`paddledev/paddle` ,tag分别为 +-----------------+------------------+------------------------+-----------------------+ | | normal | devel | demo | @@ -45,7 +45,7 @@ PaddlePaddle提供的Docker镜像版本 if cat /proc/cpuinfo | grep -q avx ; then echo "Support AVX"; else echo "Not support AVX"; fi -如果输出 :code:`Support AVX`,则可以选择上表中的AVX版本PaddlePaddle。否则需要选择非AVX的PaddlePaddle。选择普通CPU版本的devel版本的image,则可以使用 :code:`paddle-dev/paddle:cpu-devel-latest` 来引用这个image。 +如果输出 :code:`Support AVX`,则可以选择上表中的AVX版本PaddlePaddle。否则需要选择非AVX的PaddlePaddle。选择普通CPU版本的devel版本的image,则可以使用 :code:`paddledev/paddle:cpu-devel-latest` 来引用这个image。 PaddlePaddle提供的镜像并不包含任何命令运行,想要运行PaddlePaddle,您需要进入镜像运行PaddlePaddle 程序或者自定义一个含有启动脚本的image。具体请参考注意事项中的 :code:`使用ssh访问PaddlePaddle镜像` diff --git a/doc/getstarted/build_and_install/docker_install_en.rst b/doc/getstarted/build_and_install/docker_install_en.rst index 51a1a11674d98781d04137ff14bf8debe3277318..5a1056e859a0c977c9cd365ff1e4ffe58596f41f 100644 --- a/doc/getstarted/build_and_install/docker_install_en.rst +++ b/doc/getstarted/build_and_install/docker_install_en.rst @@ -16,70 +16,71 @@ Developers can work on PaddlePaddle using Docker. This allows developers to work on different platforms -- Linux, Mac OS X, and Windows -- in a consistent way. -The general development workflow with Docker and CMake is as follows: - -1. Get the source code of Paddle: +1. Build the Development Environment as a Docker Image .. code-block:: bash - git clone https://github.com/PaddlePaddle/Paddle.git + git clone --recursive https://github.com/PaddlePaddle/Paddle + cd Paddle + docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile . -2. Build a development Docker image :code:`paddle:dev` from the source - code. This image contains all the development tools and - dependencies of PaddlePaddle. + Note that by default :code:`docker build` wouldn't import source + tree into the image and build it. If we want to do that, we need + to set a build arg: .. code-block:: bash - cd paddle - docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile . + docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile --build-arg BUILD_AND_INSTALL=ON . + + +2. Run the Development Environment - Sometimes docker build might suffer from a slow network connection to the official Ubuntu apt-source servers. In such case, we can specify an apt-source mirror server that is geologically nearer to us. In the following example, we specified an apt-source server that responds fast in China.You can specify the UBUNTU MIRROR with :code:`--build-arg UBUNTU_MIRROR` like the example below. + Once we got the image :code:`paddle:dev`, we can use it to develop + Paddle by mounting the local source code tree into a container that + runs the image: .. code-block:: bash - docker build \ - --build-arg UBUNTU_MIRROR="http://mirrors.163.com" \ - -t paddle:dev \ - -f paddle/scripts/docker/Dockerfile . + docker run -d -p 2202:22 -v $PWD:/paddle paddle:dev + + This runs a container of the development environment Docker image + with the local source tree mounted to :code:`/paddle` of the + container. + + Note that the default entry-point of :code:`paddle:dev` is + :code:`sshd`, and above :code:`docker run` commands actually starts + an SSHD server listening on port 2202. This allows us to log into + this container with: + + .. code-block:: bash + ssh root@localhost -p 2202 -3. Run the image as a container and mounting local source code - directory into the container. This allows us to change the code on - the host and build it within the container. + Usually, I run above commands on my Mac. I can also run them on a + GPU server :code:`xxx.yyy.zzz.www` and ssh from my Mac to it: .. code-block:: bash - docker run \ - -d \ - --name paddle \ - -p 2022:22 \ - -v $PWD:/paddle \ - paddle:dev + my-mac$ ssh root@xxx.yyy.zzz.www -p 2202 - where :code:`-d` makes the container running in background, - :code:`--name paddle` allows us to run a nginx container to serve - documents in this container, :code:`-p 2022:22` allows us to SSH - into this container, :code:`-v $PWD:/paddle` shares the source code - on the host with the container. +3. Build and Install Using the Development Environment -4. SSH into the container: + Once I am in the container, I can use + :code:`paddle/scripts/docker/build.sh` to build, install, and test + Paddle: .. code-block:: bash - ssh root@localhost -p 2022 + /paddle/paddle/scripts/docker/build.sh -5. We can edit the source code in the container or on this host. Then - we can build using cmake + This builds everything about Paddle in :code:`/paddle/build`. And + we can run unit tests there: .. code-block:: bash - cd /paddle # where paddle source code has been mounted into the container - mkdir -p build - cd build - cmake -DWITH_TESTING=ON .. - make -j `nproc` - CTEST_OUTPUT_ON_FAILURE=1 ctest + cd /paddle/build + ctest CPU-only and GPU Images diff --git a/doc/howto/index_cn.rst b/doc/howto/index_cn.rst index bd3d0ec292057037414792b1ac176d12605b90d5..5b84eea491f874459ed2071e4c942657cdc9b18b 100644 --- a/doc/howto/index_cn.rst +++ b/doc/howto/index_cn.rst @@ -10,6 +10,7 @@ usage/cmd_parameter/index_cn.rst usage/concepts/use_concepts_cn.rst usage/cluster/cluster_train_cn.md + usage/k8s/k8s_basis_cn.md usage/k8s/k8s_cn.md usage/k8s/k8s_distributed_cn.md diff --git a/doc/howto/usage/cluster/cluster_train_cn.md b/doc/howto/usage/cluster/cluster_train_cn.md index acdcfa1c0047ced85c0a9c53d691edc0b4489336..274452fbf0c595ad7b4dbeffe85ad9038f12b458 100644 --- a/doc/howto/usage/cluster/cluster_train_cn.md +++ b/doc/howto/usage/cluster/cluster_train_cn.md @@ -6,7 +6,7 @@ 在本文中,我们将阐释如何在集群上运行分布式 Paddle 训练作业。我们将以[推荐系统](https://github.com/baidu/Paddle/tree/develop/demo/recommendation)为例创建分布式的单进程训练。 -在本文中使用的[脚本](https://github.com/baidu/Paddle/tree/develop/paddle/scripts/cluster_train)通过 SSH 运行分布式作业。 它们还可以供那些运行更复杂的集群管理系统(如 MPI 和 [Kubernetes](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/howto/usage/cluster/k8s) )的用户参考。 +在本文中使用的[脚本](https://github.com/baidu/Paddle/tree/develop/paddle/scripts/cluster_train)通过 SSH 运行分布式作业。 它们还可以供那些运行更复杂的集群管理系统(如 MPI 和 [Kubernetes](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/howto/usage/k8s) )的用户参考。 ## 前提条件 diff --git a/doc/howto/usage/cluster/cluster_train_en.md b/doc/howto/usage/cluster/cluster_train_en.md index 30963dcd927250651f3ed0b39949f541cc28ed4a..c60876721cbf5565d6e48c8061811aacada748cd 100644 --- a/doc/howto/usage/cluster/cluster_train_en.md +++ b/doc/howto/usage/cluster/cluster_train_en.md @@ -2,7 +2,7 @@ In this article, we explain how to run distributed Paddle training jobs on clusters. We will create the distributed version of the single-process training example, [recommendation](https://github.com/baidu/Paddle/tree/develop/demo/recommendation). -[Scripts](https://github.com/baidu/Paddle/tree/develop/paddle/scripts/cluster_train) used in this article launch distributed jobs via SSH. They also work as a reference for users running more sophisticated cluster management systems like MPI and [Kubernetes](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/howto/usage/cluster/k8s). +[Scripts](https://github.com/baidu/Paddle/tree/develop/paddle/scripts/cluster_train) used in this article launch distributed jobs via SSH. They also work as a reference for users running more sophisticated cluster management systems like MPI and [Kubernetes](https://github.com/PaddlePaddle/Paddle/tree/develop/doc/howto/usage/k8s). ## Prerequisite diff --git a/doc/howto/usage/cmd_parameter/arguments_cn.md b/doc/howto/usage/cmd_parameter/arguments_cn.md index 833e21dd19ef3c01f5ef990bd12c3fc3b41ba483..2e2a2fcc54a09f4f41e4ebbc317e1409591ddd9c 100644 --- a/doc/howto/usage/cmd_parameter/arguments_cn.md +++ b/doc/howto/usage/cmd_parameter/arguments_cn.md @@ -127,11 +127,6 @@ √√ - -allow_inefficient_sparse_update -√√ - - start_pass √√ diff --git a/doc/howto/usage/cmd_parameter/arguments_en.md b/doc/howto/usage/cmd_parameter/arguments_en.md index 013edbc9047817d7f6b82c4d5188412bd2ce41d6..e5546f0ddc78a9f8bdc306a19c2fe9a415463e5a 100644 --- a/doc/howto/usage/cmd_parameter/arguments_en.md +++ b/doc/howto/usage/cmd_parameter/arguments_en.md @@ -127,11 +127,6 @@ It looks like there are a lot of arguments. However, most of them are for develo √√ - -allow_inefficient_sparse_update -√√ - - start_pass √√ diff --git a/doc/howto/usage/cmd_parameter/detail_introduction_cn.md b/doc/howto/usage/cmd_parameter/detail_introduction_cn.md index dbf7c6f00b8ba5c62d86fb2143221a27330b9506..3b573a324d541b024600a254d5266e517db229c5 100644 --- a/doc/howto/usage/cmd_parameter/detail_introduction_cn.md +++ b/doc/howto/usage/cmd_parameter/detail_introduction_cn.md @@ -306,10 +306,6 @@ - 指示是否显示参数服务器上的稀疏参数分布的日志细节. - 类型: bool (默认: 0). -* `--allow_inefficient_sparse_update` - - 指示是否允许低效率的稀疏更新. - - 类型: bool (默认: 0). - * `--check_sparse_distribution_batches` - 每运行多少个批次执行一次稀疏参数分布的检查. - 类型: int32 (默认: 100). diff --git a/doc/howto/usage/cmd_parameter/detail_introduction_en.md b/doc/howto/usage/cmd_parameter/detail_introduction_en.md index aa69a3bd5423c4f3223242bdafda251271925f2d..33b7ec0d51a96ee126197e7aa819fdae0d3dc353 100644 --- a/doc/howto/usage/cmd_parameter/detail_introduction_en.md +++ b/doc/howto/usage/cmd_parameter/detail_introduction_en.md @@ -310,10 +310,6 @@ - show log details for sparse parameter distribution in pserver. - type: bool (default: 0). -* `--allow_inefficient_sparse_update` - - Whether to allow inefficient sparse update. - - type: bool (default: 0). - * `--check_sparse_distribution_batches` - Running sparse parameter distribution check every so many batches. - type: int32 (default: 100). diff --git a/doc/howto/usage/k8s/k8s_aws_en.md b/doc/howto/usage/k8s/k8s_aws_en.md index a6422b9be00e210a6a305260585520acd72fb2f1..ce72b0803818d5bf0c18753c421848cf2fc1b668 100644 --- a/doc/howto/usage/k8s/k8s_aws_en.md +++ b/doc/howto/usage/k8s/k8s_aws_en.md @@ -1,6 +1,56 @@ -# Kubernetes on AWS -## Create AWS Account and IAM Account +# Distributed PaddlePaddle Training on AWS with Kubernetes + +We will show you step by step on how to run distributed PaddlePaddle training on AWS cluster with Kubernetes. Let's start from core concepts. + +## Distributed PaddlePaddle Training Core Concepts + +### Distributed Training Job + +A distributed training job is represented by a [Kubernetes job](https://kubernetes.io/docs/user-guide/jobs/#what-is-a-job). + +Each Kuberentes job is described by a job config file, which specifies the information like the number of [pods](https://kubernetes.io/docs/user-guide/pods/#what-is-a-pod) in the job and environment variables. + +In a distributed training job, we would: + +1. prepare partitioned training data and configuration file on a distributed file system (in this tutorial we use Amazon Elastic File System), and +1. create and submit the Kubernetes job config to the Kubernetes cluster to start the training job. + +### Parameter Servers and Trainers + +There are two roles in a PaddlePaddle cluster: *parameter server (pserver)* and *trainer*. Each parameter server process maintains a shard of the global model. Each trainer has its local copy of the model, and uses its local data to update the model. During the training process, trainers send model updates to parameter servers, parameter servers are responsible for aggregating these updates, so that trainers can synchronize their local copy with the global model. + +
![Model is partitioned into two shards. Managed by two parameter servers respectively.](src/pserver_and_trainer.png)
+ +In order to communicate with pserver, trainer needs to know the ip address of each pserver. In kubernetes it's better to use a service discovery mechanism (e.g., DNS hostname) rather than static ip address, since any pserver's pod may be killed and a new pod could be schduled onto another node of different ip address. However, now we are using static ip. This will be improved. + +Parameter server and trainer are packaged into a same docker image. They will run once pod is scheduled by kubernetes job. + +### Trainer ID + +Each trainer process requires a trainer ID, a zero-based index value, passed in as a command-line parameter. The trainer process thus reads the data partition indexed by this ID. + +### Training + +The entry-point of a container is a shell script. It can see some environment variables pre-defined by Kubernetes. This includes one that gives the job's identity, which can be used in a remote call to the Kubernetes apiserver that lists all pods in the job. + +We rank each pod by sorting them by their ips. The rank of each pod could be the "pod ID". Because we run one trainer and one parameter server in each pod, we can use this "pod ID" as the trainer ID. A detailed workflow of the entry-point script is as follows: + +1. Query the api server to get pod information, and assign the `trainer_id` by sorting the ip. +1. Copy the training data from EFS persistent volume into container. +1. Parse the `paddle pserver` and `paddle trainer` startup parameters from environment variables, and then start up the processes. +1. Trainer with `train_id` 0 will automatically write results onto EFS volume. + + +## PaddlePaddle on AWS with Kubernetes + +### Choose AWS Service Region +This tutorial requires several AWS services work in the same region. Before we create anything in AWS, please check the following link +https://aws.amazon.com/about-aws/global-infrastructure/regional-product-services/ +Choose a region which has the following services available: EC2, EFS, VPS, CloudFormation, KMS, VPC, S3. +In this tutorial, we use "Oregon(us-west-2)" as example. + +### Create AWS Account and IAM Account Under each AWS account, we can create multiple [IAM](http://docs.aws.amazon.com/IAM/latest/UserGuide/introduction.html) users. This allows us to grant some privileges to each IAM user and to create/operate AWS clusters as an IAM user. @@ -25,17 +75,13 @@ Please be aware that this tutorial needs the following privileges for the user i - AWSKeyManagementServicePowerUser -## PaddlePaddle on AWS - -Here we will show you step by step on how to run PaddlePaddle training on AWS cluster. - - ### Download kube-aws and kubectl #### kube-aws [kube-aws](https://github.com/coreos/kube-aws) is a CLI tool to automate cluster deployment to AWS. - +##### Verify kube-aws integrity +Note: if you are using a non-official release (e.g RC release) kube-aws, you can skip this setp. Import the CoreOS Application Signing Public Key: ``` @@ -60,7 +106,7 @@ PLATFORM=darwin-amd64 gpg2 --verify kube-aws-${PLATFORM}.tar.gz.sig kube-aws-${PLATFORM}.tar.gz ``` - +##### Install kube-aws Extract the binary: ``` @@ -103,7 +149,6 @@ And then configure your AWS account information: ``` aws configure - ``` @@ -113,7 +158,7 @@ Fill in the required fields: ``` AWS Access Key ID: YOUR_ACCESS_KEY_ID AWS Secrete Access Key: YOUR_SECRETE_ACCESS_KEY -Default region name: us-west-1 +Default region name: us-west-2 Default output format: json ``` @@ -131,25 +176,28 @@ aws ec2 describe-instances The keypair that will authenticate SSH access to your EC2 instances. The public half of this key pair will be configured on each CoreOS node. -Follow [EC2 Keypair docs](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html) to create a EC2 key pair +Follow [EC2 Keypair User Guide](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-key-pairs.html) to create a EC2 key pair After creating a key pair, you will use the key pair name to configure the cluster. -Key pairs are only available to EC2 instances in the same region. We are using us-west-1 in our tutorial, so make sure to creat key pairs in that region (N. California). +Key pairs are only available to EC2 instances in the same region. We are using us-west-2 in our tutorial, so make sure to creat key pairs in that region (Oregon). + +Your browser will download a `key-name.pem` file which is the key to access the EC2 instances. We will use it later. + #### KMS key Amazon KMS keys are used to encrypt and decrypt cluster TLS assets. If you already have a KMS Key that you would like to use, you can skip creating a new key and provide the Arn string for your existing key. -You can create a KMS key in the AWS console, or with the aws command line tool: +You can create a KMS key with the aws command line tool: ``` -aws kms --region=us-west-1 create-key --description="kube-aws assets" +aws kms --region=us-west-2 create-key --description="kube-aws assets" { "KeyMetadata": { "CreationDate": 1458235139.724, "KeyState": "Enabled", - "Arn": "arn:aws:kms:us-west-1:aaaaaaaaaaaaa:key/xxxxxxxxxxxxxxxxxxx", + "Arn": "arn:aws:kms:us-west-2:aaaaaaaaaaaaa:key/xxxxxxxxxxxxxxxxxxx", "AWSAccountId": "xxxxxxxxxxxxx", "Enabled": true, "KeyUsage": "ENCRYPT_DECRYPT", @@ -161,14 +209,14 @@ aws kms --region=us-west-1 create-key --description="kube-aws assets" We will need to use the value of `Arn` later. -And then you need to add several inline policies in your user permission. +And then let's add several inline policies in your IAM user permission. -Go to IAM user page, click on `Add inline policy` button, and then select `Custom Policy` +Go to [IAM Console](https://console.aws.amazon.com/iam/home?region=us-west-2#/home). Click on button `Users`, click user that we just created, and then click on `Add inline policy` button, and select `Custom Policy`. -paste into following inline policies: +Paste into following inline policies: ``` -{ + (Caution: node_0, node_1, node_2 directories represents PaddlePaddle node and train_id, not the Kubernetes node){ "Version": "2012-10-17", "Statement": [ { @@ -195,39 +243,40 @@ paste into following inline policies: "cloudformation:DescribeStackEvents" ], "Resource": [ - "arn:aws:cloudformation:us-west-1:AWS_ACCOUNT_ID:stack/MY_CLUSTER_NAME/*" + "arn:aws:cloudformation:us-west-2:AWS_ACCOUNT_ID:stack/MY_CLUSTER_NAME/*" ] } ] } ``` - +`Version` : Its value has to be exactly "2012-10-17". `AWS_ACCOUNT_ID`: You can get it from following command line: ``` aws sts get-caller-identity --output text --query Account ``` -`MY_CLUSTER_NAME`: Pick a MY_CLUSTER_NAME that you like, you will use it later as well. +`MY_CLUSTER_NAME`: Pick a MY_CLUSTER_NAME that you like, you will use it later as well. +Please note, stack name must satisfy regular expression pattern: [a-zA-Z][-a-zA-Z0-9*]*, which means no "_" or "-" in stack name, or kube-aws will throw error in later steps. #### External DNS name When the cluster is created, the controller will expose the TLS-secured API on a DNS name. -The A record of that DNS name needs to be point to the cluster ip address. +DNS name should have a CNAME points to cluster DNS name or an A record points to the cluster IP address. -We will need to use DNS name later in tutorial. If you don't already own one, you can choose any DNS name (e.g., `paddle`) and modify `/etc/hosts` to associate cluster ip with that DNS name. +We will need to use DNS name later in tutorial. If you don't already own one, you can choose any DNS name (e.g., `paddle`) and modify `/etc/hosts` to associate cluster IP with that DNS name for your local machine. And add name service (route53) in aws to associate the IP to paddle for cluster. We will find the cluster IP in later steps. #### S3 bucket You need to create an S3 bucket before startup the Kubernetes cluster. -There are some bugs in aws cli in creating S3 bucket, so let's use the [Web console](https://console.aws.amazon.com/s3/home?region=us-west-1). +There are some bugs in aws cli in creating S3 bucket, so let's use the [S3 Console](https://console.aws.amazon.com/s3/home?region=us-west-2). -Click on `Create Bucket`, fill in a unique BUCKET_NAME, and make sure region is us-west-1 (Northern California). +Click on `Create Bucket`, fill in a unique BUCKET_NAME, and make sure region is us-west-2 (Oregon). -#### Initialize an asset directory +#### Initialize Assets Create a directory on your local machine to hold the generated assets: @@ -242,10 +291,10 @@ Initialize the cluster CloudFormation stack with the KMS Arn, key pair name, and kube-aws init \ --cluster-name=MY_CLUSTER_NAME \ --external-dns-name=MY_EXTERNAL_DNS_NAME \ ---region=us-west-1 \ ---availability-zone=us-west-1a \ +--region=us-west-2 \ +--availability-zone=us-west-2a \ --key-name=KEY_PAIR_NAME \ ---kms-key-arn="arn:aws:kms:us-west-1:xxxxxxxxxx:key/xxxxxxxxxxxxxxxxxxx" +--kms-key-arn="arn:aws:kms:us-west-2:xxxxxxxxxx:key/xxxxxxxxxxxxxxxxxxx" ``` `MY_CLUSTER_NAME`: the one you picked in [KMS key](#kms-key) @@ -256,14 +305,15 @@ kube-aws init \ `--kms-key-arn`: the "Arn" in [KMS key](#kms-key) -Here `us-west-1a` is used for parameter `--availability-zone`, but supported availability zone varies among AWS accounts. +Here `us-west-2a` is used for parameter `--availability-zone`, but supported availability zone varies among AWS accounts. -Please check if `us-west-1a` is supported by `aws ec2 --region us-west-1 describe-availability-zones`, if not switch to other supported availability zone. (e.g., `us-west-1a`, or `us-west-1b`) +Please check if `us-west-2a` is supported by `aws ec2 --region us-west-2 describe-availability-zones`, if not switch to other supported availability zone. (e.g., `us-west-2a`, or `us-west-2b`) -Note: please don't use `us-west-1c`. Subnets can currently only be created in the following availability zones: us-west-1b, us-west-1a. There will now be a cluster.yaml file in the asset directory. This is the main configuration file for your cluster. +By default `kube-aws` will only create one worker node. Let's edit `cluster.yaml` and change `workerCount` from 1 to 3. + #### Render contents of the asset directory @@ -278,41 +328,14 @@ The next command generates the default set of cluster assets in your asset direc ``` kube-aws render stack ``` - -Here's what the directory structure looks like: - -``` -$ tree -. -├── cluster.yaml -├── credentials -│ ├── admin-key.pem -│ ├── admin.pem -│ ├── apiserver-key.pem -│ ├── apiserver.pem -│ ├── ca-key.pem -│ ├── ca.pem -│ ├── worker-key.pem -│ └── worker.pem -│ ├── etcd-key.pem -│ └── etcd.pem -│ ├── etcd-client-key.pem -│ └── etcd-client.pem -├── kubeconfig -├── stack-template.json -└── userdata - ├── cloud-config-controller - └── cloud-config-worker -``` - -These assets (templates and credentials) are used to create, update and interact with your Kubernetes cluster. +Assets (templates and credentials) that are used to create, update and interact with your Kubernetes cluster will be created under your current folder. ### Kubernetes Cluster Start Up #### Create the instances defined in the CloudFormation template -Now let's create your cluster (choose any PREFIX for the command below): +Now let's create your cluster (choose any `PREFIX` for the command below): ``` kube-aws up --s3-uri s3://BUCKET_NAME/PREFIX @@ -328,239 +351,182 @@ You can invoke `kube-aws status` to get the cluster API endpoint after cluster c ``` $ kube-aws status Cluster Name: paddle-cluster -Controller DNS Name: paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-1.elb.amazonaws.com +Controller DNS Name: paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com ``` +If you own a DNS name, set the A record to any of the above ip. __Or__ you can set up CNAME point to `Controller DNS Name` (`paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com`) + +##### Find IP address + Use command `dig` to check the load balancer hostname to get the ip address. ``` -$ dig paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-1.elb.amazonaws.com +$ dig paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com ;; QUESTION SECTION: -;paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-1.elb.amazonaws.com. IN A +;paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com. IN A ;; ANSWER SECTION: -paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-1.elb.amazonaws.com. 59 IN A 54.241.164.52 -paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-1.elb.amazonaws.com. 59 IN A 54.67.102.112 +paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com. 59 IN A 54.241.164.52 +paddle-cl-ElbAPISe-EEOI3EZPR86C-531251350.us-west-2.elb.amazonaws.com. 59 IN A 54.67.102.112 ``` In the above output, both ip `54.241.164.52`, `54.67.102.112` will work. -If you own a DNS name, set the A record to any of the above ip. Otherwise you can edit `/etc/hosts` to associate ip with the DNS name. +*If you own a DNS name*, set the A record to any of the above ip. Then you can skip to the step "Access the cluster". + +*If you do not own a DNS name*: +##### Update local DNS association +Edit `/etc/hosts` to associate above ip with the DNS name. +##### Add Route53 private name service in VPC + - Open [Route53 Console](https://console.aws.amazon.com/route53/home) + - Create hosted zone with following config + - Domain name: "paddle" + - Type: "Private hosted zone for amazon VPC" + - VPC ID: `` + + ![route53 zone setting](src/route53_create_zone.png) + - Add A record + - Click on the zone "paddle" just created + - Click the button "Create record set" + - Name : leave blank + - type: "A" + - Value: `` + + ![route53 create recordset](src/route53_create_recordset.png) + - Verify name service + - Connect to any instance created by kube-aws via ssh + - Run command "host paddle", see if the ip returned is the private ip of kube-controller #### Access the cluster Once the API server is running, you should see: ``` -$ kubectl --kubeconfig=kubeconfig get nodes -NAME STATUS AGE -ip-10-0-0-xxx.us-west-1.compute.internal Ready 5m -ip-10-0-0-xxx.us-west-1.compute.internal Ready 5m -ip-10-0-0-xx.us-west-1.compute.internal Ready,SchedulingDisabled 5m +$ kubectl --kubeconfig=kubeconfig get nodes +NAME STATUS AGE +ip-10-0-0-134.us-west-2.compute.internal Ready 6m +ip-10-0-0-238.us-west-2.compute.internal Ready 6m +ip-10-0-0-50.us-west-2.compute.internal Ready 6m +ip-10-0-0-55.us-west-2.compute.internal Ready 6m ``` ### Setup Elastic File System for Cluster -Training data is usually served on a distributed filesystem, we use Elastic File System (EFS) on AWS. Ceph might be a better solution, but it requires high version of Linux kernel that might not be stable enough at this moment. We haven't automated the EFS setup at this moment, so please do the following steps: +Training data is usually served on a distributed filesystem, we use Elastic File System (EFS) on AWS. +1. Create security group for EFS in [security group console](https://us-west-2.console.aws.amazon.com/ec2/v2/home?region=us-west-2#SecurityGroups:sort=groupId) + 1. Look up security group id for `paddle-cluster-sg-worker` (`sg-055ee37d` in the image below) +
![](src/worker_security_group.png)
+ 2. Add security group `paddle-efs` with `ALL TCP` inbound rule and custom source as group id of `paddle-cluster-sg-worker`. And VPC of `paddle-cluster-vpc`. Make sure availability zone is same as the one you used in [Initialize Assets](#initialize-assets). +
![](src/add_security_group.png)
-1. Make sure you added AmazonElasticFileSystemFullAccess policy in your group. - -1. Create the Elastic File System in AWS console, and attach the new VPC with it. +2. Create the Elastic File System in [EFS console](https://us-west-2.console.aws.amazon.com/efs/home?region=us-west-2#/wizard/1) with `paddle-cluster-vpc` VPC. Make sure subnet is `paddle-cluster-Subnet0` andd security group is `paddle-efs`.
![](src/create_efs.png)
-1. Modify the Kubernetes security group under ec2/Security Groups, add additional inbound policy "All TCP TCP 0 - 65535 0.0.0.0/0" for Kubernetes default VPC security group. -
![](src/add_security_group.png)
- - -1. Follow the EC2 mount instruction to mount the disk onto all the Kubernetes nodes, we recommend to mount EFS disk onto ~/efs. -
![](src/efs_mount.png)
- - -We will place user config and divided training data onto EFS. Training task will cache related files by copying them from EFS into container. It will also write the training results back onto EFS. We will show you how to place the data later in this article. - - - -### Core Concepts of PaddlePaddle Training on AWS - -Now we've already setup a 3 nodes distributed Kubernetes cluster, and on each node we've attached the EFS volume. In this training demo, we will create three Kubernetes pods and schedule them on three nodes. Each pod contains a PaddlePaddle container. When container gets created, it will start parameter server (pserver) and trainer process, load the training data from EFS volume and start the distributed training task. - -#### Distributed Training Job - -A distributed training job is represented by a [kubernetes job](https://kubernetes.io/docs/user-guide/jobs/#what-is-a-job). - -Each Kuberentes job is described by a job config file, which specifies the information like the number of pods in the job and environment variables. - -In a distributed training job, we would: - -1. upload the partitioned training data and configuration file onto EFS volume, and -1. create and submit the Kubernetes job config to the Kubernetes cluster to start the training job. - -#### Parameter Servers and Trainers - -There are two roles in a PaddlePaddle cluster: `parameter server` and `trainer`. Each parameter server process maintains a shard of the global model. Each trainer has its local copy of the model, and uses its local data to update the model. During the training process, trainers send model updates to parameter servers, parameter servers are responsible for aggregating these updates, so that trainers can synchronize their local copy with the global model. - -
![Model is partitioned into two shards. Managed by two parameter servers respectively.](src/pserver_and_trainer.png)
- -In order to communicate with pserver, trainer needs to know the ip address of each pserver. In kubernetes it's better to use a service discovery mechanism (e.g., DNS hostname) rather than static ip address, since any pserver's pod may be killed and a new pod could be schduled onto another node of different ip address. We will improve paddlepaddle's service discovery ability. For now we will use static ip. - -Parameter server and trainer are packaged into a same docker image. They will run once pod is scheduled by kubernetes job. - -#### Trainer ID - -Each trainer process requires a trainer ID, a zero-based index value, passed in as a command-line parameter. The trainer process thus reads the data partition indexed by this ID. - -#### Training - -The entry-point of a container is a Python script. As it runs in a pod, it can see some environment variables pre-defined by Kubernetes. This includes one that gives the job's identity, which can be used in a remote call to the Kubernetes apiserver that lists all pods in the job. - -We rank each pod by sorting them by their ips. The rank of each pod could be the "pod ID". Because we run one trainer and one parameter server in each pod, we can use this "pod ID" as the trainer ID. A detailed workflow of the entry-point script is as follows: - -1. Query the api server to get pod information, and assign the `trainer_id` by sorting the ip. -1. Copy the training data from EFS sharing volume into container. -1. Parse the `paddle pserver` and `paddle trainer` startup parameters from environment variables, and then start up the processes. -1. Trainer with `train_id` 0 will automatically write results onto EFS volume. - - ### Start PaddlePaddle Training Demo on AWS -Now we'll start a PaddlePaddle training demo on AWS, steps are as follows: - -1. Build PaddlePaddle Docker image. -1. Divide the training data file and upload it onto the EFS sharing volume. -1. Create the training job config file, and start up the job. -1. Check the result after training. - -#### Build PaddlePaddle Docker Image - -PaddlePaddle docker image need to provide the runtime environment for `pserver` and `trainer`, so the container use this image should have two main function: +#### Configure Kubernetes Volume that Points to EFS -1. Copy the training data into container. -1. Generate the startup parameter for `pserver` and `trainer` process, and startup the training. - - -We need to create a new image since official `paddledev/paddle:cpu-latest` only have PaddlePaddle binary, but lack of the above functionalities. - -Dockerfile for creating the new image is as follows: +First we need to create a [PersistentVolume](https://kubernetes.io/docs/user-guide/persistent-volumes/) to provision EFS volumn. +Save following snippet as `pv.yaml` ``` -FROM paddledev/paddle:cpu-latest - -MAINTAINER zjsxzong89@gmail.com - -COPY start.sh /root/ -COPY start_paddle.py /root/ -CMD ["bash"," -c","/root/start.sh"] +apiVersion: v1 +kind: PersistentVolume +metadata: + name: efsvol +spec: + capacity: + storage: 100Gi + accessModes: + - ReadWriteMany + nfs: + server: EFS_DNS_NAME + path: "/" ``` -At this point, we will copy our `start.sh` and `start_paddle.py` file into container, and then exec `start_paddle.py` script to start up the training, all the steps like assigning trainer_id, getting other nodes' ip are implemented in `start_paddle.py`. - -`start_paddle.py` will start parsing the parameters. +`EFS_DNS_NAME`: DNS name as shown in description of `paddle-efs` that we created. Looks similar to `fs-2cbf7385.efs.us-west-2.amazonaws.com` +Run following command to create a persistent volumn: ``` -parser = argparse.ArgumentParser(prog="start_paddle.py", - description='simple tool for k8s') - args, train_args_list = parser.parse_known_args() - train_args = refine_unknown_args(train_args_list) - train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2])) - podlist = getPodList() +kubectl --kubeconfig=kubeconfig create -f pv.yaml ``` -And then using function `getPodList()` to query all the pod information from the job name through Kubernetes api server. When all the pods are in the running status, using `getIdMap(podlist)` to get the trainer_id. +Next let's create a [PersistentVolumeClaim](https://kubernetes.io/docs/user-guide/persistent-volumes/) to claim the persistent volume. +Save following snippet as `pvc.yaml`. ``` - podlist = getPodList() - # need to wait until all pods are running - while not isPodAllRunning(podlist): - time.sleep(10) - podlist = getPodList() - idMap = getIdMap(podlist) +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: efsvol +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 50Gi ``` -In function `getIdMap(podlist)`, we use podlist to get the ip address for each pod and sort them, use the index as the trainer_id. - +Run following command to create a persistent volumn claim: ``` -def getIdMap(podlist): - ''' - generate tainer_id by ip - ''' - ips = [] - for pod in podlist["items"]: - ips.append(pod["status"]["podIP"]) - ips.sort() - idMap = {} - for i in range(len(ips)): - idMap[ips[i]] = i - return idMap +kubectl --kubeconfig=kubeconfig create -f pvc.yaml ``` -After getting `idMap`, we use function `startPaddle(idMap, train_args_dict)` to generate `paddle pserver` and `paddle train` start up parameters and then start up the processes. - -In function `startPaddle`, the most important work is to generate `paddle pserver` and `paddle train` start up parameters. For example, `paddle train` parameter parsing, we will get parameters like `PADDLE_NIC`, `PADDLE_PORT`, `PADDLE_PORTS_NUM`, and get the `trainer_id` from `idMap`. - -``` - program = 'paddle train' - args = " --nics=" + PADDLE_NIC - args += " --port=" + str(PADDLE_PORT) - args += " --ports_num=" + str(PADDLE_PORTS_NUM) - args += " --comment=" + "paddle_process_by_paddle" - ip_string = "" - for ip in idMap.keys(): - ip_string += (ip + ",") - ip_string = ip_string.rstrip(",") - args += " --pservers=" + ip_string - args_ext = "" - for key, value in train_args_dict.items(): - args_ext += (' --' + key + '=' + value) - localIP = socket.gethostbyname(socket.gethostname()) - trainerId = idMap[localIP] - args += " " + args_ext + " --trainer_id=" + \ - str(trainerId) + " --save_dir=" + JOB_PATH_OUTPUT -``` +#### Prepare Training Data -Use `docker build` to build toe Docker Image: +We will now launch a kubernetes job that downloads, saves and evenly splits training data into 3 shards on the persistent volumn that we just created. +save following snippet as `paddle-data-job.yaml` ``` -docker build -t your_repo/paddle:mypaddle . +apiVersion: batch/v1 +kind: Job +metadata: + name: paddle-data +spec: + template: + metadata: + name: pi + spec: + containers: + - name: paddle-data + image: paddledev/paddle-tutorial:k8s_data + imagePullPolicy: Always + volumeMounts: + - mountPath: "/efs" + name: efs + env: + - name: OUT_DIR + value: /efs/paddle-cluster-job + - name: SPLIT_COUNT + value: "3" + volumes: + - name: efs + persistentVolumeClaim: + claimName: efsvol + restartPolicy: Never ``` -And then push the built image onto docker registry. - +Run following command to launch the job: ``` -docker push your_repo/paddle:mypaddle +kubectl --kubeconfig=kubeconfig create -f paddle-data-job.yaml ``` -#### Upload Training Data File - -Here we will use PaddlePaddle's official recommendation demo as the content for this training, we put the training data file into a directory named by job name, which located in EFS sharing volume, the tree structure for the directory looks like: - +Job may take 7 min to finish, use following command to check job status. Do not proceed until `SUCCESSFUL` for `paddle-data` job is `1` ``` -efs -└── paddle-cluster-job - ├── data - │ ├── 0 - │ │ - │ ├── 1 - │ │ - │ └── 2 - ├── output - └── recommendation +$ kubectl --kubeconfig=kubeconfig get jobs +NAME DESIRED SUCCESSFUL AGE +paddle-data 1 1 6m ``` -The `paddle-cluster-job` directory is the job name for this training, this training includes 3 PaddlePaddle node, we store the partitioned data under `paddle-cluster-job/data` directory, directory 0, 1, 2 each represent 3 nodes' trainer_id. the training data in in recommendation directory, the training results and logs will be in the output directory. - - -#### Create Kubernetes Job - -Kubernetes use yaml file to describe job details, and then use command line tool to create the job in Kubernetes cluster. - -In yaml file, we describe the Docker image we use for this training, the node number we need to startup, the volume mounting information and all the necessary parameters we need for `paddle pserver` and `paddle train` processes. +Data preparation is done by docker image `paddledev/paddle-tutorial:k8s_data`, see [here](src/k8s_data/README.md) for how to build this docker image and source code. -The yaml file content is as follows: +#### Start Training +Now we are ready to start paddle training job. Save following snippet as `paddle-cluster-job.yaml` ``` apiVersion: batch/v1 kind: Job @@ -574,12 +540,12 @@ spec: name: paddle-cluster-job spec: volumes: - - name: jobpath - hostPath: - path: /home/admin/efs + - name: efs + persistentVolumeClaim: + claimName: efsvol containers: - name: trainer - image: drinkcode/paddle:k8s-job + image: paddledev/paddle-tutorial:k8s_train command: ["bin/bash", "-c", "/root/start.sh"] env: - name: JOB_NAME @@ -589,7 +555,7 @@ spec: - name: JOB_NAMESPACE value: default - name: TRAIN_CONFIG_DIR - value: recommendation + value: quick_start - name: CONF_PADDLE_NIC value: eth0 - name: CONF_PADDLE_PORT @@ -600,106 +566,124 @@ spec: value: "2" - name: CONF_PADDLE_GRADIENT_NUM value: "3" + - name: TRAINER_COUNT + value: "3" volumeMounts: - - name: jobpath - mountPath: /home/jobpath + - mountPath: "/home/jobpath" + name: efs ports: - - name: jobport - hostPort: 30001 - containerPort: 30001 + - name: jobport0 + hostPort: 7164 + containerPort: 7164 + - name: jobport1 + hostPort: 7165 + containerPort: 7165 + - name: jobport2 + hostPort: 7166 + containerPort: 7166 + - name: jobport3 + hostPort: 7167 + containerPort: 7167 restartPolicy: Never - ``` -In yaml file, the metadata's name is the job's name. `parallelism, completions` means this job will simultaneously start up 3 PaddlePaddle nodes, and this job will be finished when there are 3 finished pods. For the data store volume, we declare the path jobpath, it mount the /home/admin/efs on host machine into the container with path /home/jobpath. So in container, the /home/jobpath actually stores the data onto EFS sharing volume. - -`env` field represents container's environment variables, we pass the PaddlePaddle parameters into containers by using the `env` field. +`parallelism: 3, completions: 3` means this job will simultaneously start 3 PaddlePaddle pods, and this job will be finished when there are 3 finished pods. -`JOB_PATH` represents the sharing volume path, `JOB_NAME` represents job name, `TRAIN_CONFIG_DIR` represents the training data file directory, we can these three parameters to get the file path for this training. +`env` field represents container's environment variables, we specify PaddlePaddle parameters by environment variables. -`CONF_PADDLE_NIC` represents `paddle pserver` process's `--nics` parameters, the NIC name. +`ports` indicates that TCP port 7164 - 7167 are exposed for communication between `pserver` ans trainer. port starts continously from `CONF_PADDLE_PORT` (7164) to `CONF_PADDLE_PORT + CONF_PADDLE_PORTS_NUM + CONF_PADDLE_PORTS_NUM_SPARSE - 1` (7167). We use multiple ports for dense and sparse paramter updates to improve latency. -`CONF_PADDLE_PORT` represents `paddle pserver` process's `--port` parameters, `CONF_PADDLE_PORTS_NUM` represents `--port_num` parameter. - -`CONF_PADDLE_PORTS_NUM_SPARSE` represents the sparse updated port number, `--ports_num_for_sparse` parameter. +Run following command to launch the job. +``` +kubectl --kubeconfig=kubeconfig create -f paddle-claster-job.yaml +``` -`CONF_PADDLE_GRADIENT_NUM` represents the training node number, `--num_gradient_servers` parameter. +Inspect individual pods -After we create the yaml file, we can use Kubernetes command line tool to create the job onto the cluster. +``` +$ kubectl --kubeconfig=kubeconfig get pods +NAME READY STATUS RESTARTS AGE +paddle-cluster-job-cm469 1/1 Running 0 9m +paddle-cluster-job-fnt03 1/1 Running 0 9m +paddle-cluster-job-jx4xr 1/1 Running 0 9m +``` +Inspect individual console output ``` -kubectl create -f job.yaml +kubectl --kubeconfig=kubeconfig log -f POD_NAME ``` -After we execute the above command, Kubernetes will create 3 pods and then pull the PaddlePaddle image, then start up the containers for training. +`POD_NAME`: name of any pod (e.g., `paddle-cluster-job-cm469`). +Run `kubectl --kubeconfig=kubeconfig describe job paddle-cluster-job` to check training job status. It will complete in around 20 minutes. +The details for start `pserver` and `trainer` are hidden inside docker image `paddledev/paddle-tutorial:k8s_train`, see [here](src/k8s_train/README.md) for how to build the docker image and source code. -#### Check Training Results +#### Inspect Training Output -During the training, we can see the logs and models on EFS sharing volume, the output directory contains the training results. (Caution: node_0, node_1, node_2 directories represents PaddlePaddle node and train_id, not the Kubernetes node) +Training output (model snapshot and logs) will be saved in EFS. We can ssh into worker EC2 instance, mount EFS and check training output. +1. ssh Into Worker EC2 instance ``` -[root@paddle-kubernetes-node0 output]# tree -d -. -├── node_0 -│ ├── server.log -│ └── train.log -├── node_1 -│ ├── server.log -│ └── train.log -├── node_2 -...... -├── pass-00002 -│ ├── done -│ ├── ___embedding_0__.w0 -│ ├── ___embedding_1__.w0 -...... +chmod 400 key-name.pem +ssh -i key-name.pem core@INSTANCE_IP ``` -We can always check the container training status through logs, for example: +`INSTANCE_IP`: public IP address of EC2 kubernetes worker node. Go to [EC2 console](https://us-west-2.console.aws.amazon.com/ec2/v2/home?region=us-west-2#Instances:sort=instanceId) and check `public IP` of any `paddle-cluster-kube-aws-worker` instance. +2. Mount EFS ``` -[root@paddle-kubernetes-node0 node_0]# cat train.log -I1116 09:10:17.123121 50 Util.cpp:155] commandline: - /usr/local/bin/../opt/paddle/bin/paddle_trainer - --nics=eth0 --port=7164 - --ports_num=2 --comment=paddle_process_by_paddle - --pservers=192.168.129.66,192.168.223.143,192.168.129.71 - --ports_num_for_sparse=2 --config=./trainer_config.py - --trainer_count=4 --num_passes=10 --use_gpu=0 - --log_period=50 --dot_period=10 --saving_period=1 - --local=0 --trainer_id=0 - --save_dir=/home/jobpath/paddle-cluster-job/output -I1116 09:10:17.123440 50 Util.cpp:130] Calling runInitFunctions -I1116 09:10:17.123764 50 Util.cpp:143] Call runInitFunctions done. -[WARNING 2016-11-16 09:10:17,227 default_decorators.py:40] please use keyword arguments in paddle config. -[INFO 2016-11-16 09:10:17,239 networks.py:1282] The input order is [movie_id, title, genres, user_id, gender, age, occupation, rating] -[INFO 2016-11-16 09:10:17,239 networks.py:1289] The output order is [__regression_cost_0__] -I1116 09:10:17.392917 50 Trainer.cpp:170] trainer mode: Normal -I1116 09:10:17.613910 50 PyDataProvider2.cpp:257] loading dataprovider dataprovider::process -I1116 09:10:17.680917 50 PyDataProvider2.cpp:257] loading dataprovider dataprovider::process -I1116 09:10:17.681543 50 GradientMachine.cpp:134] Initing parameters.. -I1116 09:10:18.012390 50 GradientMachine.cpp:141] Init parameters done. -I1116 09:10:18.018641 50 ParameterClient2.cpp:122] pserver 0 192.168.129.66:7164 -I1116 09:10:18.018950 50 ParameterClient2.cpp:122] pserver 1 192.168.129.66:7165 -I1116 09:10:18.019069 50 ParameterClient2.cpp:122] pserver 2 192.168.223.143:7164 -I1116 09:10:18.019492 50 ParameterClient2.cpp:122] pserver 3 192.168.223.143:7165 -I1116 09:10:18.019716 50 ParameterClient2.cpp:122] pserver 4 192.168.129.71:7164 -I1116 09:10:18.019836 50 ParameterClient2.cpp:122] pserver 5 192.168.129.71:7165 +mkdir efs +sudo mount -t nfs4 -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 EFS_DNS_NAME:/ efs ``` -It'll take around 8 hours to finish this PaddlePaddle recommendation training demo on three 2 core 8 GB EC2 machine (m3.large). +`EFS_DNS_NAME`: DNS name as shown in description of `paddle-efs` that we created. Look similar to `fs-2cbf7385.efs.us-west-2.amazonaws.com`. +Now folder `efs` will have structure similar to: +``` +-- paddle-cluster-job + |-- ... + |-- output + | |-- node_0 + | | |-- server.log + | | `-- train.log + | |-- node_1 + | | |-- server.log + | | `-- train.log + | |-- node_2 + | | |-- server.log + | | `-- train.log + | |-- pass-00000 + | | |-- ___fc_layer_0__.w0 + | | |-- ___fc_layer_0__.wbias + | | |-- done + | | |-- path.txt + | | `-- trainer_config.lr.py + | |-- pass-00001... +``` +`server.log` contains log for `pserver`. `train.log` contains log for `trainer`. Model description and snapshot is stored in `pass-0000*`. ### Kubernetes Cluster Tear Down +#### Delete EFS + +Go to [EFS Console](https://us-west-2.console.aws.amazon.com/efs/home?region=us-west-2) and delete the EFS volumn that we created. + +#### Delete security group + +Go to [Security Group Console](https://us-west-2.console.aws.amazon.com/ec2/v2/home?region=us-west-2#SecurityGroups:sort=groupId) and delete security group `paddle-efs`. -If you want to tear down the whole Kubernetes cluster, make sure to *delete* the EFS volume first (otherwise, you will get stucked on following steps), and then use the following command: + +#### Delete S3 Bucket + +Go to [S3 Console](https://console.aws.amazon.com/s3/home?region=us-west-2#) and delete the S3 bucket that we created. + +#### Destroy Cluster ``` kube-aws destroy ``` -It's an async call, it might take 5 min to tear down the whole cluster. -If you created any Kubernetes Services of type LoadBalancer, you must delete these first, as the CloudFormation cannot be fully destroyed if any externally-managed resources still exist. +The command will return immediately, but it might take 5 min to tear down the whole cluster. + +You can go to [CludFormation Console](https://us-west-2.console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks?filter=active) to check destroy process. diff --git a/doc/howto/usage/k8s/k8s_basis_cn.md b/doc/howto/usage/k8s/k8s_basis_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..6278dacb17a378da660b2f5434247efd41c995fc --- /dev/null +++ b/doc/howto/usage/k8s/k8s_basis_cn.md @@ -0,0 +1,75 @@ +# Kubernetes 简介 + +[*Kubernetes*](http://kubernetes.io/)是Google开源的容器集群管理系统,其提供应用部署、维护、扩展机制等功能,利用Kubernetes能方便地管理跨机器运行容器化的应用。Kubernetes可以在物理机或虚拟机上运行,且支持部署到[AWS](http://kubernetes.io/docs/getting-started-guides/aws),[Azure](http://kubernetes.io/docs/getting-started-guides/azure/),[GCE](http://kubernetes.io/docs/getting-started-guides/gce)等多种公有云环境。介绍分布式训练之前,需要对[Kubernetes](http://kubernetes.io/)有一个基本的认识,下面先简要介绍一下本文用到的几个Kubernetes概念。 + +- [*Node*](http://kubernetes.io/docs/admin/node/) 表示一个Kubernetes集群中的一个工作节点,这个节点可以是物理机或者虚拟机,Kubernetes集群就是由node节点与master节点组成的。 + +- [*Pod*](http://kubernetes.io/docs/user-guide/pods/) 是一组(一个或多个)容器,pod是Kubernetes的最小调度单元,一个pod中的所有容器会被调度到同一个node上。Pod中的容器共享NET,PID,IPC,UTS等Linux namespace。由于容器之间共享NET namespace,所以它们使用同一个IP地址,可以通过*localhost*互相通信。不同pod之间可以通过IP地址访问。 + +- [*Job*](http://kubernetes.io/docs/user-guide/jobs/) 描述Kubernetes上运行的作业,一次作业称为一个job,通常每个job包括一个或者多个pods,job启动后会创建这些pod并开始执行一个程序,等待这个程序执行成功并返回0则成功退出,如果执行失败,也可以配置不同的重试机制。 + +- [*Volume*](http://kubernetes.io/docs/user-guide/volumes/) 存储卷,是pod内的容器都可以访问的共享目录,也是容器与node之间共享文件的方式,因为容器内的文件都是暂时存在的,当容器因为各种原因被销毁时,其内部的文件也会随之消失。通过volume,就可以将这些文件持久化存储。Kubernetes支持多种volume,例如hostPath(宿主机目录),gcePersistentDisk,awsElasticBlockStore等。 + +- [*Namespaces*](https://kubernetes.io/docs/user-guide/namespaces/) 命名空间,在kubernetes中创建的所有资源对象(例如上文的pod,job)等都属于一个命名空间,在同一个命名空间中,资源对象的名字是唯一的,不同空间的资源名可以重复,命名空间主要为了对象进行逻辑上的分组便于管理。本文只使用了默认命名空间。 + +- [*PersistentVolume*](https://kubernetes.io/docs/user-guide/persistent-volumes/): 和[*PersistentVolumeClaim*](https://kubernetes.io/docs/user-guide/persistent-volumes/#persistentvolumeclaims)结合,将外部的存储服务在Kubernetes中描述成为统一的资源形式,便于存储资源管理和Pod引用。 + +# 部署Kubernetes集群 + +Kubernetes提供了多种集群部署的方案,本文档内不重复介绍。这里给出集中常见的部署方法: + +- [*minikube*](https://kubernetes.io/docs/getting-started-guides/minikube/): 快速在本地启动一个单机的kubernetes服务器,便于本地验证和测试。 +- [*kubeadm*](http://kubernetes.io/docs/getting-started-guides/kubeadm/): 在不同操作系统,不同主机(Bare-Metal, AWS, GCE)条件下,快速部署集群。 +- [*AWS EC2*](https://kubernetes.io/docs/getting-started-guides/aws/): 在aws上快速部署集群。 +- [*Bare-Metal*](https://kubernetes.io/docs/getting-started-guides/centos/centos_manual_config/): 在物理机上手动部署。 + +可以参考[这个表格](https://kubernetes.io/docs/getting-started-guides/#table-of-solutions)选择适合您的场景的合适方案。 + +# 选择存储方案 + +容器不会保留在运行时生成的数据,job或者应用程序在容器中运行时生成的数据会在容器销毁时消失。为了完成分布式机器学习训练任务,需要有一个外部的存储服务来保存训练所需数据和训练输出。 +常见的可选存储服务包括: + +- [*NFS*](https://github.com/kubernetes/kubernetes/tree/master/examples/volumes/nfs): 可以将磁盘上某个目录共享给网络中其他机器访问。部署和配置比较简单,可以用于小量数据的验证。不提供分布式存储,高可用,冗余等功能。NFS的部署方法可以参考[这里](http://www.tecmint.com/how-to-setup-nfs-server-in-linux/)。 +- [*GlusterFS*](http://gluster.readthedocs.io/en/latest/Quick-Start-Guide/Quickstart/): 网络分布式文件系统,可以在Kubernetes中按照[这个](https://github.com/kubernetes/kubernetes/tree/master/examples/volumes/glusterfs)例子使用。 +- [*Ceph*](http://docs.ceph.com/docs/master/): 分布式文件系统,支持rbd,POSIX API接口(ceph fs)和对象存储API,参考[这里](https://kubernetes.io/docs/user-guide/volumes/#rbd)。 +- [*MooseFS*](https://moosefs.com/documentation.html): 一个分布式的存储系统。需要先挂载到服务器Node上再通过kubernetes hostPath Volume挂载到容器中。 + +# 配置kubectl + +## 安装kubectl +``` +# OS X +curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/darwin/amd64/kubectl + +# Linux +curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl + +# Windows +curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/windows/amd64/kubectl.exe +``` + +## 配置kubectl访问你的kubernetes集群 + +编辑`~/.kube/config`这个配置文件,修改`Master-IP`的地址。如果使用SSL认证,则需要配置`certificate-authority`和`users`中的用户证书。如果是使用非SSL方式访问(比如通过8080端口),也可以去掉这些证书的配置。 +``` +apiVersion: v1 +clusters: +- cluster: + certificate-authority: /path/to/ca.crt + server: https://[Master-IP]:443 + name: minikube +contexts: +- context: + cluster: minikube + user: minikube + name: minikube +current-context: minikube +kind: Config +preferences: {} +users: +- name: minikube + user: + client-certificate: /path/to/apiserver.crt + client-key: /Users/wuyi/.minikube/apiserver.key +``` diff --git a/doc/howto/usage/k8s/k8s_distributed_cn.md b/doc/howto/usage/k8s/k8s_distributed_cn.md index 2063b98ca8aab9c348fe2b53bb1e6d96b7750dd3..2a7a6c8c17882a6f2c95e933e051c4b8f1a8eeee 100644 --- a/doc/howto/usage/k8s/k8s_distributed_cn.md +++ b/doc/howto/usage/k8s/k8s_distributed_cn.md @@ -2,181 +2,96 @@ 前一篇文章介绍了如何在Kubernetes集群上启动一个单机PaddlePaddle训练作业 (Job)。在这篇文章里,我们介绍如何在Kubernetes集群上进行分布式PaddlePaddle训练作业。关于PaddlePaddle的分布式训练,文章 [Cluster Training](https://github.com/baidu/Paddle/blob/develop/doc/cluster/opensource/cluster_train.md)介绍了一种通过SSH远程分发任务,进行分布式训练的方法,与此不同的是,本文将介绍在Kubernetes容器管理平台上快速构建PaddlePaddle容器集群,进行分布式训练的方案。 -## Kubernetes 基本概念 - -[*Kubernetes*](http://kubernetes.io/)是Google开源的容器集群管理系统,其提供应用部署、维护、 扩展机制等功能,利用Kubernetes能方便地管理跨机器运行容器化的应用。Kubernetes可以在物理机或虚拟机上运行,且支持部署到[AWS](http://kubernetes.io/docs/getting-started-guides/aws),[Azure](http://kubernetes.io/docs/getting-started-guides/azure/),[GCE](http://kubernetes.io/docs/getting-started-guides/gce)等多种公有云环境。介绍分布式训练之前,需要对[Kubernetes](http://kubernetes.io/)有一个基本的认识,下面先简要介绍一下本文用到的几个Kubernetes概念。 - -- [*Node*](http://kubernetes.io/docs/admin/node/) 表示一个Kubernetes集群中的一个工作节点,这个节点可以是物理机或者虚拟机,Kubernetes集群就是由node节点与master节点组成的。 - -- [*Pod*](http://kubernetes.io/docs/user-guide/pods/) 是一组(一个或多个)容器,pod是Kubernetes的最小调度单元,一个pod中的所有容器会被调度到同一个node上。Pod中的容器共享NET,PID,IPC,UTS等Linux namespace。由于容器之间共享NET namespace,所以它们使用同一个IP地址,可以通过*localhost*互相通信。不同pod之间可以通过IP地址访问。 - -- [*Job*](http://kubernetes.io/docs/user-guide/jobs/) 是Kubernetes上运行的作业,一次作业称为一个job,通常每个job包括一个或者多个pods。 - -- [*Volume*](http://kubernetes.io/docs/user-guide/volumes/) 存储卷,是pod内的容器都可以访问的共享目录,也是容器与node之间共享文件的方式,因为容器内的文件都是暂时存在的,当容器因为各种原因被销毁时,其内部的文件也会随之消失。通过volume,就可以将这些文件持久化存储。Kubernetes支持多种volume,例如hostPath(宿主机目录),gcePersistentDisk,awsElasticBlockStore等。 - -- [*Namespaces*](http://kubernetes.io/docs/user-guide/volumes/) 命名空间,在kubernetes中创建的所有资源对象(例如上文的pod,job)等都属于一个命名空间,在同一个命名空间中,资源对象的名字是唯一的,不同空间的资源名可以重复,命名空间主要为了对象进行逻辑上的分组便于管理。本文只使用了默认命名空间。 +有关Kubernetes相关概念以及如何搭建和配置Kubernetes集群,可以参考[k8s_basis](./k8s_basis_cn.md)。 ## 整体方案 -### 部署Kubernetes集群 - -首先,我们需要拥有一个Kubernetes集群,在这个集群中所有node与pod都可以互相通信。关于Kubernetes集群搭建,可以参考[官方文档](http://kubernetes.io/docs/getting-started-guides/kubeadm/),在以后的文章中我们也会介绍AWS上搭建的方案。本文假设大家能找到几台物理机,并且可以按照官方文档在上面部署Kubernetes。在本文的环境中,Kubernetes集群中所有node都挂载了一个[MFS](http://moosefs.org/)(Moose filesystem,一种分布式文件系统)共享目录,我们通过这个目录来存放训练文件与最终输出的模型。关于MFS的安装部署,可以参考[MooseFS documentation](https://moosefs.com/documentation.html)。在训练之前,用户将配置与训练数据切分好放在MFS目录中,训练时,程序从此目录拷贝文件到容器内进行训练,将结果保存到此目录里。整体的结构图如下: +在训练之前,用户将配置与训练数据切分好放在分布式文件系统预先分配好的目录中(不同的分布式文件系统,需要使用其制定的方式挂载后并导入数据),训练时,程序从此目录拷贝文件到容器内进行训练,将结果保存到此目录里。整体的结构图如下: ![paddle on kubernetes结构图](src/k8s-paddle-arch.png) -上图描述了一个3节点的分布式训练场景,Kubernetes集群的每个node上都挂载了一个MFS目录,这个目录可以通过volume的形式挂载到容器中。Kubernetes为这次训练创建了3个pod并且调度到了3个node上运行,每个pod包含一个PaddlePaddle容器。在容器创建后,会启动pserver与trainer进程,读取volume中的数据进行这次分布式训练。 - -### 使用 Job - -我们使用Kubernetes中的job这个概念来代表一次分布式训练。Job表示一次性作业,在作业完成后,Kubernetes会销毁job产生的容器并且释放相关资源。 - -在Kubernetes中,可以通过编写一个YAML文件,来描述这个job,在这个文件中,主要包含了一些配置信息,例如PaddlePaddle的节点个数,`paddle pserver`开放的端口个数与端口号,使用的网卡设备等,这些信息通过环境变量的形式传递给容器内的程序使用。 - -在一次分布式训练中,用户确定好本次训练需要的PaddlePaddle节点个数,将切分好的训练数据与配置文件上传到MFS共享目录中。然后编写这次训练的job YAML文件,提交给Kubernetes集群创建并开始作业。 - -### 创建PaddlePaddle节点 +上图描述了一个3节点的分布式训练场景,在每个Pod上都通过volume方式挂载分布式文件系统的一个目录用于保存训练数据和输出结果。Kubernetes为这次训练创建了3个pod并且调度到了3个node上运行,每个pod包含一个PaddlePaddle容器。在容器创建后,会启动pserver与trainer进程,读取volume中的数据进行这次分布式训练。 -当Kubernetes master收到请求,解析完YAML文件后,会创建出多个pod(个数为PaddlePaddle节点数),Kubernetes会把这些pod调度到集群的node上运行。一个pod就代表一个PaddlePaddle节点,当pod被成功分配到一台物理/虚拟机上后,Kubernetes会启动pod内的容器,这个容器会根据YAML文件中的环境变量,启动`paddle pserver`与`paddle train`进程。 +根据前文的描述,要在已有的Kubernetes集群上进行PaddlePaddle的分布式训练,按照下面步骤即可: -### 启动训练 - -在容器启动后,会通过脚本来启动这次分布式训练,我们知道`paddle train`进程启动时需要知道其他节点的IP地址以及本节点的trainer_id,由于PaddlePaddle本身不提供类似服务发现的功能,所以在本文的启动脚本中,每个节点会根据job name向Kubernetes apiserver查询这个job对应的所有pod信息(Kubernetes默认会在每个容器的环境变量中写入apiserver的地址)。 - -根据这些pod信息,就可以通过某种方式,为每个pod分配一个唯一的trainer_id。本文把所有pod的IP地址进行排序,将顺序作为每个PaddlePaddle节点的trainer_id。启动脚本的工作流程大致如下: - - 1. 查询Kubernetes apiserver获取pod信息,根据IP分配trainer_id - 1. 从MFS共享目录中拷贝训练文件到容器内 - 1. 根据环境变量,解析出`paddle pserver`与`paddle train`的启动参数,启动进程 - 1. 训练时,PaddlePaddle会自动将结果保存在trainer_id为0的节点上,将输出路径设置为MFS目录,保存输出的文件 - - -## 搭建过程 - -根据前文的描述,要在已有的Kubernetes集群上进行PaddlePaddle的分布式训练,主要分为以下几个步骤: - -1. 制作PaddlePaddle镜像 -1. 将训练文件与切分好的数据上传到共享存储 -1. 编写本次训练的YAML文件,创建一个Kubernetes job -1. 训练结束后查看输出结果 +1. [制作PaddlePaddle镜像](#制作镜像) +1. [将训练文件与切分好的数据上传到共享存储](#上传训练文件) +1. [编写本次训练的YAML文件,创建一个Kubernetes job](#创建Job) +1. [训练结束后查看输出结果](#查看输出) 下面就根据这几个步骤分别介绍。 - ### 制作镜像 PaddlePaddle镜像需要提供`paddle pserver`与`paddle train`进程的运行环境,用这个镜像创建的容器需要有以下两个功能: - 拷贝训练文件到容器内 - - 生成`paddle pserver`与`paddle train`进程的启动参数,并且启动训练 -因为官方镜像 `paddledev/paddle:cpu-latest` 内已经包含PaddlePaddle的执行程序但是还没上述功能,所以我们可以在这个基础上,添加启动脚本,制作新镜像来完成以上的工作。镜像的*Dockerfile*如下: - -```Dockerfile -FROM paddledev/paddle:cpu-latest - -MAINTAINER zjsxzong89@gmail.com - -COPY start.sh /root/ -COPY start_paddle.py /root/ -CMD ["bash"," -c","/root/start.sh"] -``` - -[start.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/usage/cluster/k8s/start.sh)文件拷贝训练文件到容器内,然后执行[start_paddle.py](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/usage/cluster/k8s/start_paddle.py)脚本启动训练,前文提到的获取其他节点IP地址,分配`trainer_id`等都在`start_paddle.py`脚本中完成。 - -`start_paddle.py`脚本开始时,会先进行参数的初始化与解析。 - -```python -parser = argparse.ArgumentParser(prog="start_paddle.py", - description='simple tool for k8s') - args, train_args_list = parser.parse_known_args() - train_args = refine_unknown_args(train_args_list) - train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2])) - podlist = getPodList() -``` - -然后通过函数`getPodList()`访问Kubernetes的接口来查询此job对应的所有pod信息。当所有pod都处于running状态(容器运行都运行)时,再通过函数`getIdMap(podlist)`获取trainer_id。 - -```python - podlist = getPodList() - # need to wait until all pods are running - while not isPodAllRunning(podlist): - time.sleep(10) - podlist = getPodList() - idMap = getIdMap(podlist) -``` - -在函数`getIdMap(podlist)`内部,我们通过读取`podlist`中每个pod的IP地址,将IP排序生成的序号作为trainer_id。 - -```python -def getIdMap(podlist): - ''' - generate tainer_id by ip - ''' - ips = [] - for pod in podlist["items"]: - ips.append(pod["status"]["podIP"]) - ips.sort() - idMap = {} - for i in range(len(ips)): - idMap[ips[i]] = i - return idMap -``` - -在得到`idMap`后,通过函数`startPaddle(idMap, train_args_dict)`构造`paddle pserver`与`paddle train`的启动参数并执行进程。 - -在函数`startPaddle`中,最主要的工作就是解析出`paddle pserver`与`paddle train`的启动参数。例如`paddle train`参数的解析,解析环境变量得到`PADDLE_NIC`,`PADDLE_PORT`,`PADDLE_PORTS_NUM`等参数,然后通过自身的IP地址在`idMap`中获取`trainerId`。 - -```python - program = 'paddle train' - args = " --nics=" + PADDLE_NIC - args += " --port=" + str(PADDLE_PORT) - args += " --ports_num=" + str(PADDLE_PORTS_NUM) - args += " --comment=" + "paddle_process_by_paddle" - ip_string = "" - for ip in idMap.keys(): - ip_string += (ip + ",") - ip_string = ip_string.rstrip(",") - args += " --pservers=" + ip_string - args_ext = "" - for key, value in train_args_dict.items(): - args_ext += (' --' + key + '=' + value) - localIP = socket.gethostbyname(socket.gethostname()) - trainerId = idMap[localIP] - args += " " + args_ext + " --trainer_id=" + \ - str(trainerId) + " --save_dir=" + JOB_PATH_OUTPUT -``` - -使用 `docker build` 构建镜像: +因为官方镜像 `paddledev/paddle:cpu-latest` 内已经包含PaddlePaddle的执行程序但是还没上述功能,所以我们可以在这个基础上,添加启动脚本,制作新镜像来完成以上的工作。参考镜像的[*Dockerfile*](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/usage/cluster/k8s/src/k8s_train/Dockerfile)。 ```bash -docker build -t your_repo/paddle:mypaddle . +$ cd doc/howto/usage/k8s/src/k8s_train +$ docker build -t [YOUR_REPO]/paddle:mypaddle . ``` 然后将构建成功的镜像上传到镜像仓库。 ```bash -docker push your_repo/paddle:mypaddle +docker push [YOUR_REPO]/paddle:mypaddle ``` -注意上述命令中`your_repo`表示读者所使用的Docker镜像仓库地址,读者需要替换成自己使用的仓库地址。下文使用`your_repo/paddle:mypaddle`这个地址来表示此步骤所构建出的镜像。 +注意上述命令中`[YOUR_REPO]`表示读者所使用的Docker镜像仓库地址,读者需要替换成自己使用的仓库地址。下文使用`[YOUR_REPO]/paddle:mypaddle`这个地址来表示此步骤所构建出的镜像。 -### 上传训练文件 +### 准备训练数据 -本文使用PaddlePaddle官方的[recommendation demo](http://www.paddlepaddle.org/doc/demo/index.html#recommendation)作为这次训练的内容,我们将训练文件与数据放在一个job name命名的目录中,上传到MFS共享存储。完成后MFS上的文件内容大致如下: +这里我们通过在Kubernetes集群上启动一个Job来下载并切割数据,也可以通过修改[k8s_train](./src/k8s_train/README.md)的内容来定制image. -```bash -[root@paddle-kubernetes-node0 mfs]# tree -d +在启动Job之前,需要根据不同的分布式存储来绑定一个[persistentVolumeClaim](https://kubernetes.io/docs/user-guide/persistent-volumes/),生成的数据将会存储在这个volume下. + +```yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: paddle-data +spec: + template: + metadata: + name: pi + spec: + hostNetwork: true + containers: + - name: paddle-data + image: paddledev/paddle-tutorial:k8s_data + imagePullPolicy: Always + volumeMounts: + - mountPath: "/mnt" + name: nfs + env: + - name: OUT_DIR + value: /home/work/mfs/paddle-cluster-job + - name: SPLIT_COUNT + value: "3" + volumes: + - name: nfs + persistentVolumeClaim: + claimName: mfs + restartPolicy: Never +``` + +完成后volume中的文件内容大致如下: +```base +[root@paddle-kubernetes-node0 nfsdir]$ tree -d . -└── paddle-cluster-job - ├── data - │   ├── 0 - │   │ - │   ├── 1 - │   │ - │   └── 2 - ├── output - └── recommendation +`-- paddle-cluster-job + |-- 0 + | `-- data + |-- 1 + | `-- data + |-- 2 + | `-- data + |-- output + |-- quick_start ``` 目录中paddle-cluster-job是本次训练对应的job name,本次训练要求有3个PaddlePaddle节点,在paddle-cluster-job/data目录中存放切分好的数据,文件夹0,1,2分别代表3个节点的trainer_id。recommendation文件夹内存放训练文件,output文件夹存放训练结果与日志。 @@ -205,7 +120,7 @@ spec: path: /home/work/mfs containers: - name: trainer - image: your_repo/paddle:mypaddle + image: [YOUR_REPO]/paddle:mypaddle command: ["bin/bash", "-c", "/root/start.sh"] env: - name: JOB_NAME @@ -236,15 +151,16 @@ spec: `env`字段表示容器的环境变量,我们将`paddle`运行的一些参数通过这种方式传递到容器内。 -`JOB_PATH`表示共享存储挂载的路径,`JOB_NAME`表示job名字,`TRAIN_CONFIG_DIR`表示本次训练文件所在目录,这三个变量组合就可以找到本次训练需要的文件路径。 - -`CONF_PADDLE_NIC`表示`paddle pserver`进程需要的`--nics`参数,即网卡名 - -`CONF_PADDLE_PORT`表示`paddle pserver`的`--port`参数,`CONF_PADDLE_PORTS_NUM`则表示稠密更新的端口数量,也就是`--ports_num`参数。 - -`CONF_PADDLE_PORTS_NUM_SPARSE`表示稀疏更新的端口数量,也就是`--ports_num_for_sparse`参数。 - -`CONF_PADDLE_GRADIENT_NUM`表示训练节点数量,即`--num_gradient_servers`参数 +环境变量 | 说明 +--- | --- +JOB_PATH | 共享存储挂在的路径 +JOB_NAME | Job的名字 +TRAIN_CONFIG_DIR | 本次训练文件所在目录,与JOB_PATH,JOB_NAME组合可以找到本次训练需要的文件路径 +CONF_PADDLE_NIC | `paddle pserver`进程需要的`--nics`参数,即网卡名 +CONF_PADDLE_PORT | `paddle paserver`的`--port`参数 +CONF_PADDLE_PORTS_NUM | 稠密更新的端口数量,即`--ports_num`参数 +CONF_PADDLE_PORTS_NUM_SPARSE | 稀疏更新的端口数量,即`--ports_num_for_sparse`参数 +CONF_PADDLE_GRADIENT_NUM | 训练节点数量,即`--num_gradient_servers参数` 这些参数的具体描述,读者可以查看[这里](http://www.paddlepaddle.org/doc/ui/cmd_argument/detail_introduction.html#parameter-server-and-distributed-communication)。 @@ -289,8 +205,8 @@ I1116 09:10:17.123121 50 Util.cpp:155] commandline: --ports_num=2 --comment=paddle_process_by_paddle --pservers=192.168.129.66,192.168.223.143,192.168.129.71 --ports_num_for_sparse=2 --config=./trainer_config.py - --trainer_count=4 --num_passes=10 --use_gpu=0 - --log_period=50 --dot_period=10 --saving_period=1 + --trainer_count=4 --num_passes=10 --use_gpu=0 + --log_period=50 --dot_period=10 --saving_period=1 --local=0 --trainer_id=0 --save_dir=/home/jobpath/paddle-cluster-job/output I1116 09:10:17.123440 50 Util.cpp:130] Calling runInitFunctions @@ -310,3 +226,90 @@ I1116 09:10:18.019492 50 ParameterClient2.cpp:122] pserver 3 192.168.223.143: I1116 09:10:18.019716 50 ParameterClient2.cpp:122] pserver 4 192.168.129.71:7164 I1116 09:10:18.019836 50 ParameterClient2.cpp:122] pserver 5 192.168.129.71:7165 ``` + + +## 一些细节的补充 + +### 使用环境变量 + +使用容器方式运行训练任务的Kubernetes Job,通常会使用环境变量配置Job的配置信息`start_paddle.py`提供了一个启动脚本,将环境变量转换成paddle的命令行参数: +``` +API = "/api/v1/namespaces/" +JOBSELECTOR = "labelSelector=job-name=" +JOB_PATH = os.getenv("JOB_PATH") + "/" + os.getenv("JOB_NAME") +JOB_PATH_OUTPUT = JOB_PATH + "/output" +JOBNAME = os.getenv("JOB_NAME") +NAMESPACE = os.getenv("JOB_NAMESPACE") +PADDLE_NIC = os.getenv("CONF_PADDLE_NIC") +PADDLE_PORT = os.getenv("CONF_PADDLE_PORT") +PADDLE_PORTS_NUM = os.getenv("CONF_PADDLE_PORTS_NUM") +PADDLE_PORTS_NUM_SPARSE = os.getenv("CONF_PADDLE_PORTS_NUM_SPARSE") +PADDLE_SERVER_NUM = os.getenv("CONF_PADDLE_GRADIENT_NUM") +``` + +### Pod间通信 +`start_paddle.py`脚本开始时,会先进行参数的初始化与解析。 + +```python +parser = argparse.ArgumentParser(prog="start_paddle.py", + description='simple tool for k8s') + args, train_args_list = parser.parse_known_args() + train_args = refine_unknown_args(train_args_list) + train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2])) + podlist = getPodList() +``` + +然后通过函数`getPodList()`访问Kubernetes的接口来查询此job对应的所有pod信息。当所有pod都处于running状态(容器运行都运行)时,再通过函数`getIdMap(podlist)`获取trainer_id。 + +```python + podlist = getPodList() + # need to wait until all pods are running + while not isPodAllRunning(podlist): + time.sleep(10) + podlist = getPodList() + idMap = getIdMap(podlist) +``` +* *注意*: `getPodList()`会获取当前namespace下的所有pod,如果已经有pod运行,可能会导致出错。这种集群节点管理方式会在将来使用[statfulsets](https://kubernetes.io/docs/concepts/abstractions/controllers/statefulsets/)代替。 + +在函数`getIdMap(podlist)`内部,我们通过读取`podlist`中每个pod的IP地址,将IP排序生成的序号作为trainer_id。 + +```python +def getIdMap(podlist): + ''' + generate tainer_id by ip + ''' + ips = [] + for pod in podlist["items"]: + ips.append(pod["status"]["podIP"]) + ips.sort() + idMap = {} + for i in range(len(ips)): + idMap[ips[i]] = i + return idMap +``` + +在得到`idMap`后,通过函数`startPaddle(idMap, train_args_dict)`构造`paddle pserver`与`paddle train`的启动参数并执行进程。 + +### 启动任务 + +在函数`startPaddle`中,最主要的工作就是解析出`paddle pserver`与`paddle train`的启动参数。例如`paddle train`参数的解析,解析环境变量得到`PADDLE_NIC`,`PADDLE_PORT`,`PADDLE_PORTS_NUM`等参数,然后通过自身的IP地址在`idMap`中获取`trainerId`。 + +```python + program = 'paddle train' + args = " --nics=" + PADDLE_NIC + args += " --port=" + str(PADDLE_PORT) + args += " --ports_num=" + str(PADDLE_PORTS_NUM) + args += " --comment=" + "paddle_process_by_paddle" + ip_string = "" + for ip in idMap.keys(): + ip_string += (ip + ",") + ip_string = ip_string.rstrip(",") + args += " --pservers=" + ip_string + args_ext = "" + for key, value in train_args_dict.items(): + args_ext += (' --' + key + '=' + value) + localIP = socket.gethostbyname(socket.gethostname()) + trainerId = idMap[localIP] + args += " " + args_ext + " --trainer_id=" + \ + str(trainerId) + " --save_dir=" + JOB_PATH_OUTPUT +``` diff --git a/doc/howto/usage/k8s/src/add_security_group.png b/doc/howto/usage/k8s/src/add_security_group.png index 50eed4c6573a18d6ae0f9df9bd6a3cae05493e3c..bd34f46c9b0ada7027fd53e553e7d033255d25fc 100644 Binary files a/doc/howto/usage/k8s/src/add_security_group.png and b/doc/howto/usage/k8s/src/add_security_group.png differ diff --git a/doc/howto/usage/k8s/src/create_efs.png b/doc/howto/usage/k8s/src/create_efs.png index f4d448d1518e11a11d535efb9c3a78b56cc13149..e5f1526033d1daf401700989af1d25919bcb7675 100644 Binary files a/doc/howto/usage/k8s/src/create_efs.png and b/doc/howto/usage/k8s/src/create_efs.png differ diff --git a/doc/howto/usage/k8s/src/job.yaml b/doc/howto/usage/k8s/src/job.yaml deleted file mode 100644 index 488aad0bede4f940b25c7be04259f209c3de9f52..0000000000000000000000000000000000000000 --- a/doc/howto/usage/k8s/src/job.yaml +++ /dev/null @@ -1,43 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: paddle-cluster-job -spec: - parallelism: 3 - completions: 3 - template: - metadata: - name: paddle-cluster-job - spec: - volumes: - - name: jobpath - hostPath: - path: /home/work/paddle_output - containers: - - name: trainer - image: registry.baidu.com/public/paddle:mypaddle - command: ["bin/bash", "-c", "/root/start.sh"] - env: - - name: JOB_NAME - value: paddle-cluster-job - - name: JOB_PATH - value: /home/jobpath - - name: JOB_NAMESPACE - value: default - - name: TRAIN_CONFIG_DIR - value: recommendation - - name: CONF_PADDLE_NIC - value: eth0 - - name: CONF_PADDLE_PORT - value: "7164" - - name: CONF_PADDLE_PORTS_NUM - value: "2" - - name: CONF_PADDLE_PORTS_NUM_SPARSE - value: "2" - - name: CONF_PADDLE_GRADIENT_NUM - value: "3" - volumeMounts: - - name: jobpath - mountPath: /home/jobpath - restartPolicy: Never - diff --git a/doc/howto/usage/k8s/src/k8s-paddle-arch.png b/doc/howto/usage/k8s/src/k8s-paddle-arch.png index a8c64550b1fa7f41de1eaa9a037c65cddc0cd30e..2183a232ad402b76f82a67234a5c93e13ce97ac3 100644 Binary files a/doc/howto/usage/k8s/src/k8s-paddle-arch.png and b/doc/howto/usage/k8s/src/k8s-paddle-arch.png differ diff --git a/doc/howto/usage/k8s/src/k8s_data/Dockerfile b/doc/howto/usage/k8s/src/k8s_data/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6d3a12ae393aa594b8e6e9a5f726109426937284 --- /dev/null +++ b/doc/howto/usage/k8s/src/k8s_data/Dockerfile @@ -0,0 +1,7 @@ +FROM alpine + +RUN apk update && apk upgrade && apk add coreutils +ADD quick_start /quick_start +ADD get_data.sh /bin/ +RUN chmod +x /bin/get_data.sh +ENTRYPOINT ["/bin/get_data.sh"] diff --git a/doc/howto/usage/k8s/src/k8s_data/README.md b/doc/howto/usage/k8s/src/k8s_data/README.md new file mode 100644 index 0000000000000000000000000000000000000000..83cef7affd0ac4d3a1ca08ea5b046fa81e1bc630 --- /dev/null +++ b/doc/howto/usage/k8s/src/k8s_data/README.md @@ -0,0 +1,6 @@ +To build PaddlePaddle data preparation image in tutorial [Distributed PaddlePaddle Training on AWS with Kubernetes](../../k8s_aws_en.md), run following commands: + +``` +cp -r ../../../../../../demo/quick_start . +docker build . -t prepare-data-image-name +``` diff --git a/doc/howto/usage/k8s/src/k8s_data/get_data.sh b/doc/howto/usage/k8s/src/k8s_data/get_data.sh new file mode 100755 index 0000000000000000000000000000000000000000..d187ba5ac8d03f69dfdefd4f63610ed7921575be --- /dev/null +++ b/doc/howto/usage/k8s/src/k8s_data/get_data.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +out_dir=$OUT_DIR +split_count=$SPLIT_COUNT + +set -e + +mkdir -p $out_dir +cp -r /quick_start $out_dir/ + +mkdir -p $out_dir/0/data +cd $out_dir/0/data +wget http://paddlepaddle.bj.bcebos.com/demo/quick_start_preprocessed_data/preprocessed_data.tar.gz +tar zxvf preprocessed_data.tar.gz +rm preprocessed_data.tar.gz + +split -d --number=l/$split_count -a 5 train.txt train. +mv train.00000 train.txt + +cd $out_dir +end=$(expr $split_count - 1) +for i in $(seq 1 $end); do + mkdir -p $i/data + cp -r 0/data/* $i/data + mv $i/data/train.`printf %05d $i` $i/data/train.txt +done; diff --git a/doc/howto/usage/k8s/src/k8s_train/Dockerfile b/doc/howto/usage/k8s/src/k8s_train/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..c0fca1f9a945921e6e8899fee2db8845e66136a1 --- /dev/null +++ b/doc/howto/usage/k8s/src/k8s_train/Dockerfile @@ -0,0 +1,6 @@ +FROM paddledev/paddle:cpu-latest + +COPY start.sh /root/ +COPY start_paddle.py /root/ +RUN chmod +x /root/start.sh +CMD ["bash"," -c","/root/start.sh"] diff --git a/doc/howto/usage/k8s/src/k8s_train/README.md b/doc/howto/usage/k8s/src/k8s_train/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96bf65497ffa23e90c4c9350504f86367b48daf2 --- /dev/null +++ b/doc/howto/usage/k8s/src/k8s_train/README.md @@ -0,0 +1,5 @@ +To build PaddlePaddle training image in tutorial [Distributed PaddlePaddle Training on AWS with Kubernetes](../../k8s_aws_en.md), run following command: + +``` +docker build . -t train-image-name +``` diff --git a/doc/howto/usage/k8s/src/start.sh b/doc/howto/usage/k8s/src/k8s_train/start.sh similarity index 55% rename from doc/howto/usage/k8s/src/start.sh rename to doc/howto/usage/k8s/src/k8s_train/start.sh index b3a1334174a20b018d35de3b01b149fc5b10d49d..12dfe1e6386885a6989d3887f21c6922f137a9ae 100755 --- a/doc/howto/usage/k8s/src/start.sh +++ b/doc/howto/usage/k8s/src/k8s_train/start.sh @@ -1,19 +1,19 @@ #!/bin/sh + set -eu jobconfig=${JOB_PATH}"/"${JOB_NAME}"/"${TRAIN_CONFIG_DIR} cd /root -cp -rf $jobconfig . -cd $TRAIN_CONFIG_DIR - +cp -rf $jobconfig/* . python /root/start_paddle.py \ --dot_period=10 \ - --ports_num_for_sparse=$CONF_PADDLE_PORTS_NUM \ + --ports_num=$CONF_PADDLE_PORTS_NUM \ + --ports_num_for_sparse=$CONF_PADDLE_PORTS_NUM_SPARSE \ --log_period=50 \ --num_passes=10 \ - --trainer_count=4 \ + --trainer_count=$TRAINER_COUNT \ --saving_period=1 \ --local=0 \ - --config=./trainer_config.py \ + --config=trainer_config.lr.py \ --use_gpu=0 diff --git a/doc/howto/usage/k8s/src/start_paddle.py b/doc/howto/usage/k8s/src/k8s_train/start_paddle.py similarity index 83% rename from doc/howto/usage/k8s/src/start_paddle.py rename to doc/howto/usage/k8s/src/k8s_train/start_paddle.py index df00d82919faa2acecc79c28e3d773ba3de9672a..935c12bb67e1fe08bc135a7a2220fcd43c548482 100755 --- a/doc/howto/usage/k8s/src/start_paddle.py +++ b/doc/howto/usage/k8s/src/k8s_train/start_paddle.py @@ -23,7 +23,6 @@ import argparse API = "/api/v1/namespaces/" JOBSELECTOR = "labelSelector=job-name=" JOB_PATH = os.getenv("JOB_PATH") + "/" + os.getenv("JOB_NAME") -JOB_PATH_DATA = JOB_PATH + "/data" JOB_PATH_OUTPUT = JOB_PATH + "/output" JOBNAME = os.getenv("JOB_NAME") NAMESPACE = os.getenv("JOB_NAMESPACE") @@ -33,6 +32,8 @@ PADDLE_PORTS_NUM = os.getenv("CONF_PADDLE_PORTS_NUM") PADDLE_PORTS_NUM_SPARSE = os.getenv("CONF_PADDLE_PORTS_NUM_SPARSE") PADDLE_SERVER_NUM = os.getenv("CONF_PADDLE_GRADIENT_NUM") +tokenpath = '/var/run/secrets/kubernetes.io/serviceaccount/token' + def refine_unknown_args(cmd_args): ''' @@ -64,6 +65,7 @@ def isPodAllRunning(podlist): for pod in podlist["items"]: if pod["status"]["phase"] == "Running": running += 1 + print "waiting for pods running, require:", require, "running:", running if require == running: return True return False @@ -79,8 +81,17 @@ def getPodList(): pod = API + NAMESPACE + "/pods?" job = JOBNAME - return requests.get(apiserver + pod + JOBSELECTOR + job, - verify=False).json() + if os.path.isfile(tokenpath): + tokenfile = open(tokenpath, mode='r') + token = tokenfile.read() + Bearer = "Bearer " + token + headers = {"Authorization": Bearer} + return requests.get(apiserver + pod + JOBSELECTOR + job, + headers=headers, + verify=False).json() + else: + return requests.get(apiserver + pod + JOBSELECTOR + job, + verify=False).json() def getIdMap(podlist): @@ -121,9 +132,10 @@ def startPaddle(idMap={}, train_args_dict=None): logDir = JOB_PATH_OUTPUT + "/node_" + str(trainerId) if not os.path.exists(JOB_PATH_OUTPUT): os.makedirs(JOB_PATH_OUTPUT) - os.mkdir(logDir) - copyCommand = 'cp -rf ' + JOB_PATH_DATA + \ - "/" + str(trainerId) + " ./data" + if not os.path.exists(logDir): + os.mkdir(logDir) + copyCommand = 'cp -rf ' + JOB_PATH + \ + "/" + str(trainerId) + "/data/*" + " ./data/" os.system(copyCommand) startPserver = 'nohup paddle pserver' + \ " --port=" + str(PADDLE_PORT) + \ @@ -136,9 +148,9 @@ def startPaddle(idMap={}, train_args_dict=None): print startPserver os.system(startPserver) # wait until pservers completely start - time.sleep(10) - startTrainer = program + args + " > " + \ - logDir + "/train.log 2>&1 < /dev/null" + time.sleep(20) + startTrainer = program + args + " 2>&1 | tee " + \ + logDir + "/train.log" print startTrainer os.system(startTrainer) @@ -152,7 +164,7 @@ if __name__ == '__main__': podlist = getPodList() # need to wait until all pods are running while not isPodAllRunning(podlist): - time.sleep(10) + time.sleep(20) podlist = getPodList() idMap = getIdMap(podlist) startPaddle(idMap, train_args_dict) diff --git a/doc/howto/usage/k8s/src/route53_create_recordset.png b/doc/howto/usage/k8s/src/route53_create_recordset.png new file mode 100644 index 0000000000000000000000000000000000000000..34e476c7beac30fcdde13fccc4cc8d08b4be3d35 Binary files /dev/null and b/doc/howto/usage/k8s/src/route53_create_recordset.png differ diff --git a/doc/howto/usage/k8s/src/route53_create_zone.png b/doc/howto/usage/k8s/src/route53_create_zone.png new file mode 100644 index 0000000000000000000000000000000000000000..25b7ddb831c5cba97f4b2edddd27da3234d621af Binary files /dev/null and b/doc/howto/usage/k8s/src/route53_create_zone.png differ diff --git a/doc/howto/usage/k8s/src/worker_security_group.png b/doc/howto/usage/k8s/src/worker_security_group.png new file mode 100644 index 0000000000000000000000000000000000000000..57eb0265a34ad4223b69600d2a3dd355482e0bf5 Binary files /dev/null and b/doc/howto/usage/k8s/src/worker_security_group.png differ diff --git a/doc/templates/conf.py.cn.in b/doc/templates/conf.py.cn.in index 418d718fbd9c61bff3acb9c2dab0638c0b650bab..6dc48704bc230bd1a573c4b4b2e7c07791e48ced 100644 --- a/doc/templates/conf.py.cn.in +++ b/doc/templates/conf.py.cn.in @@ -15,13 +15,19 @@ import sys import os, subprocess import shlex from recommonmark import parser, transform +try: + import py_paddle + import paddle + import paddle.v2 +except ImportError: + print("Must install paddle python package before generating documentation") + sys.exit(1) MarkdownParser = parser.CommonMarkParser AutoStructify = transform.AutoStructify # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, '@PROJ_ROOT@/python') templates_path = ["@PROJ_ROOT@/doc_theme/templates"] # -- General configuration ------------------------------------------------ diff --git a/doc/templates/conf.py.en.in b/doc/templates/conf.py.en.in index e96c25cb75bee20d2e2949423d80ddab1d3450a1..b477f0120c4fa0544012080b7cfb8572d3c44b04 100644 --- a/doc/templates/conf.py.en.in +++ b/doc/templates/conf.py.en.in @@ -15,14 +15,20 @@ import sys import os, subprocess import shlex from recommonmark import parser, transform +try: + import py_paddle + import paddle + import paddle.v2 +except ImportError: + print("Must install paddle python package before generating documentation") + sys.exit(1) + MarkdownParser = parser.CommonMarkParser AutoStructify = transform.AutoStructify # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, '@PROJ_ROOT@/python') - templates_path = ["@PROJ_ROOT@/doc_theme/templates"] # -- General configuration ------------------------------------------------ diff --git a/doc/tutorials/quick_start/index_en.md b/doc/tutorials/quick_start/index_en.md index 70dec2eb2a8c397bc56b1e6f52a624a3a6877905..ca110431cf921ae0480d3fb2b17c58f90a84cc0e 100644 --- a/doc/tutorials/quick_start/index_en.md +++ b/doc/tutorials/quick_start/index_en.md @@ -156,14 +156,14 @@ define_py_data_sources2(train_list='data/train.list', obj="process", args={"dictionary": word_dict}) ``` -You can refer to the following link for more detailed examples and data formats: PyDataProvider2. +You can refer to the following link for more detailed examples and data formats: PyDataProvider2. ## Network Architecture We will describe four kinds of network architectures in this section.
![](./src/PipelineNetwork_en.jpg)
First, you will build a logistic regression model. Later, you will also get chance to build other more powerful network architectures. -For more detailed documentation, you could refer to: layer documentation. All configuration files are in `demo/quick_start` directory. +For more detailed documentation, you could refer to: layer documentation. All configuration files are in `demo/quick_start` directory. ### Logistic Regression The architecture is illustrated in the following picture: @@ -366,7 +366,7 @@ You can use single layer LSTM model with Dropout for our text classification pro
## Optimization Algorithm -Optimization algorithms include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network. +Optimization algorithms include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network. ```python settings(batch_size=128, @@ -407,7 +407,7 @@ paddle train \ --init_model_path=./output/pass-0000x ``` -We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to Python Prediction API tutorial,or other demo for the prediction process using Python. You can also use the following script for inference or evaluation. +We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to Python Prediction API tutorial,or other demo for the prediction process using Python. You can also use the following script for inference or evaluation. inference script (predict.sh): diff --git a/paddle/api/Arguments.cpp b/paddle/api/Arguments.cpp index 41beed38a87601cb57072c8966cd0fd2ea156524..d49b189e253f7a0792fe3f1fe7c8fdbb7071acd4 100644 --- a/paddle/api/Arguments.cpp +++ b/paddle/api/Arguments.cpp @@ -38,6 +38,13 @@ Arguments* Arguments::createByPaddleArgumentVector(void* ptr) { return args; } +Arguments* Arguments::createByPaddleArgument(const void* ptr) { + auto p = (paddle::Argument*)(ptr); + auto args = new Arguments(); + args->m->outputs.push_back(*p); + return args; +} + Matrix* Arguments::getSlotValue(size_t idx) const throw(RangeError) { auto& a = m->getArg(idx); return Matrix::createByPaddleMatrixPtr(&a.value); @@ -137,9 +144,7 @@ void Arguments::setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError) { a.cpuSequenceDims = m->cast(vec->getSharedPtr()); } -float Arguments::sumCosts() const { - return paddle::Argument::sumCosts(m->outputs); -} +float Arguments::sum() const { return paddle::Argument::sum(m->outputs); } int64_t Arguments::getBatchSize(size_t idx) const throw(RangeError) { auto& a = m->getArg(idx); diff --git a/paddle/api/Evaluator.cpp b/paddle/api/Evaluator.cpp index c30e09876397e37ef9ed4ec3200d1aa372ceb609..681e3a380912339c531c16c88f43255c2f34c32f 100644 --- a/paddle/api/Evaluator.cpp +++ b/paddle/api/Evaluator.cpp @@ -27,3 +27,18 @@ std::string Evaluator::toString() { m->rawPtr->printStats(sout); return sout.str(); } + +std::vector Evaluator::getNames() const { + std::vector retv; + m->rawPtr->getNames(&retv); + return retv; +} + +double Evaluator::getValue(const std::string name) const { + paddle::Error err; + double v = m->rawPtr->getValue(name, &err); + if (err) { + throw std::runtime_error(err.msg()); + } + return v; +} diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp index 66115f8293b905809639afff779abfdb2bb3a54e..dcb5fe086fdccf8ec62ee52cbaaac4b7dbbe2f9d 100644 --- a/paddle/api/GradientMachine.cpp +++ b/paddle/api/GradientMachine.cpp @@ -142,14 +142,28 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) { } } +size_t GradientMachine::getNonStaticParameterSize() const { + return m->machine->getNonStaticParameters().size(); +} + +Parameter* GradientMachine::getNonStaticParameter(size_t i) throw(RangeError) { + auto params = m->machine->getNonStaticParameters(); + if (i < params.size()) { + return Parameter::createFromSharedPtr( + &m->machine->getNonStaticParameters()[i]); + } else { + throw RangeError(); + } +} + void GradientMachine::randParameters() { m->machine->randParameters(); } -Matrix* GradientMachine::getLayerOutput(const std::string& layerName) const +Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const throw(UnsupportError) { - auto nn = std::dynamic_pointer_cast(m->machine); + auto nn = m->machine; if (nn) { - auto mat = nn->getLayerOutput(layerName); - return Matrix::createByPaddleMatrixPtr(&mat); + auto arg = nn->getLayerOutput(layerName); + return Arguments::createByPaddleArgument(&arg); } else { throw UnsupportError(); } diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h index f5af8b0035b44d97832dd90ca2eeba079503715c..c4f5dca26cc6a5e9fdd23ee27b594ced29a25c7a 100644 --- a/paddle/api/PaddleAPI.h +++ b/paddle/api/PaddleAPI.h @@ -47,6 +47,9 @@ void setUseGpu(bool useGpu); /// Return true if this py_paddle is compiled in GPU Version bool isGpuVersion(); +/// Return FLAGS_trainer_count +int getTrainerCount(); + /// The Error of IO Operation. Such as file not found, etc. class IOError {}; @@ -450,10 +453,11 @@ public: IVector* vec) throw(RangeError); void setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError); - float sumCosts() const; + float sum() const; private: static Arguments* createByPaddleArgumentVector(void* ptr); + static Arguments* createByPaddleArgument(const void* ptr); void* getInternalArgumentsPtr() const; private: @@ -767,9 +771,12 @@ public: size_t getParameterSize() const; Parameter* getParameter(size_t i) throw(RangeError); + size_t getNonStaticParameterSize() const; + Parameter* getNonStaticParameter(size_t i) throw(RangeError); + void randParameters(); - Matrix* getLayerOutput(const std::string& layerName) const + Arguments* getLayerOutput(const std::string& layerName) const throw(UnsupportError); /** @@ -900,6 +907,10 @@ public: */ std::string toString(); + std::vector getNames() const; + + double getValue(const std::string name) const; + private: EvaluatorPrivate* m; @@ -952,7 +963,7 @@ public: Arguments* getForwardOutput(); - Matrix* getLayerOutput(const std::string& layerName); + Arguments* getLayerOutput(const std::string& layerName) const; }; /// the N-Best results generated from one input sequence. diff --git a/paddle/api/Trainer.cpp b/paddle/api/Trainer.cpp index d83dc380beeec3747451a483f4811eb833e8c226..84e4ca054abb0100a02c8a40e31c49c17684ef40 100644 --- a/paddle/api/Trainer.cpp +++ b/paddle/api/Trainer.cpp @@ -131,12 +131,11 @@ void Trainer::testOneDataBatch(size_t batchSize, const Arguments& args) { void TrainerPrivate::finishTestPeriod() { tester_->finishTestPeriod(); } void Trainer::finishTestPeriod() { m->finishTestPeriod(); } -Matrix* Trainer::getLayerOutput(const std::string& layerName) { - auto nn = std::dynamic_pointer_cast( - this->m->getGradientMachine()); +Arguments* Trainer::getLayerOutput(const std::string& layerName) const { + auto nn = this->m->getGradientMachine(); CHECK(nn) << "trainerInternal_.getGradientMachine() is not NeuralNetwork"; - auto m = nn->getLayerOutput(layerName); - return Matrix::createByPaddleMatrixPtr(&m); + auto arg = nn->getLayerOutput(layerName); + return Arguments::createByPaddleArgument(&arg); } void Trainer::forwardOneBatch(size_t batchSize) { diff --git a/paddle/api/Util.cpp b/paddle/api/Util.cpp index 54d67aa62f4d87ad03282962c722019698dc621a..d369df5d4e04b4a8d822db0e72a8051150868ce6 100644 --- a/paddle/api/Util.cpp +++ b/paddle/api/Util.cpp @@ -54,5 +54,7 @@ bool isGpuVersion() { #endif } +int getTrainerCount() { return FLAGS_trainer_count; } + static_assert(NUM_PARAMETER_TYPES == paddle::NUM_PARAMETER_TYPES, "The Parameter Type should be same in core/api and core/common"); diff --git a/paddle/api/test/testArguments.py b/paddle/api/test/testArguments.py index a04a805d7a64ef906c8388f1241b9ef823e4d9e0..9fe44de94ea6ddb71d2dfbb2243fc86ede0d0531 100644 --- a/paddle/api/test/testArguments.py +++ b/paddle/api/test/testArguments.py @@ -22,7 +22,7 @@ class TestArguments(unittest.TestCase): args = swig_paddle.Arguments.createArguments(1) args.setSlotValue(0, m) - self.assertAlmostEqual(27.0, args.sumCosts()) + self.assertAlmostEqual(27.0, args.sum()) mat = args.getSlotValue(0) assert isinstance(mat, swig_paddle.Matrix) diff --git a/paddle/api/test/testMatrix.py b/paddle/api/test/testMatrix.py index 37666bdccc9aedfe8f8079124129aad2ade53a43..f08fbf3ccdf5d7c0a5c739868b1bcb516146c23d 100644 --- a/paddle/api/test/testMatrix.py +++ b/paddle/api/test/testMatrix.py @@ -68,7 +68,7 @@ class TestMatrix(unittest.TestCase): def test_numpyCpu(self): numpy_mat = np.matrix([[1, 2], [3, 4], [5, 6]], dtype="float32") - m = swig_paddle.Matrix.createCpuDenseFromNumpy(numpy_mat, copy=False) + m = swig_paddle.Matrix.createCpuDenseFromNumpy(numpy_mat, False) self.assertEqual((int(m.getHeight()), int(m.getWidth())), numpy_mat.shape) diff --git a/paddle/api/test/testTrain.py b/paddle/api/test/testTrain.py index a90d15c272a3a2b56e35c979e053deb2b54eebc1..7061a4c43bf01158b5f084d0c310dedd81773a04 100644 --- a/paddle/api/test/testTrain.py +++ b/paddle/api/test/testTrain.py @@ -89,9 +89,14 @@ def main(): except Exception as e: print e + ev = m.makeEvaluator() + ev.start() m.forwardBackward(inArgs, outArgs, swig_paddle.PASS_TRAIN, update_callback) - + m.eval(ev) + ev.finish() + for name in ev.getNames(): + print name, ev.getValue(name) for optimizer in optimizers: optimizer.finishBatch() diff --git a/paddle/api/test/testVector.py b/paddle/api/test/testVector.py index 1ab095c1d3d0d2c84d2d2f95a03f172b901de209..6339cf8542607bdda99eb9ccaa8b06480f144b78 100644 --- a/paddle/api/test/testVector.py +++ b/paddle/api/test/testVector.py @@ -43,7 +43,7 @@ class TestIVector(unittest.TestCase): def test_cpu_numpy(self): vec = np.array([1, 3, 4, 65, 78, 1, 4], dtype="int32") - iv = swig_paddle.IVector.createCpuVectorFromNumpy(vec, copy=False) + iv = swig_paddle.IVector.createCpuVectorFromNumpy(vec, False) self.assertEqual(vec.shape[0], int(iv.__len__())) vec[4] = 832 for i in xrange(len(iv)): @@ -106,7 +106,7 @@ class TestVector(unittest.TestCase): def testCpuNumpy(self): numpy_arr = np.array([1.2, 2.3, 3.4, 4.5], dtype="float32") - vec = swig_paddle.Vector.createCpuVectorFromNumpy(numpy_arr, copy=False) + vec = swig_paddle.Vector.createCpuVectorFromNumpy(numpy_arr, False) assert isinstance(vec, swig_paddle.Vector) numpy_arr[0] = 0.1 for n, v in zip(numpy_arr, vec): diff --git a/paddle/cuda/include/hl_matrix.h b/paddle/cuda/include/hl_matrix.h index abd5eb3a0cf338c689680dd0f7192be7b2530383..eb454c59c1e58cf2b4817b4cb3230b9d75e320ac 100644 --- a/paddle/cuda/include/hl_matrix.h +++ b/paddle/cuda/include/hl_matrix.h @@ -69,19 +69,6 @@ extern void hl_sequence_softmax_forward(real* A_d, const int* index, int numSequence); -/** - * @brief Matrix classification error. - * - * @param[in] A_d input matrix (M x N). - * @param[in] B_d input vector (M x 1). - * @param[out] C_d output vector (M x 1). - * @param[in] dimM matrix height. - * @param[in] dimN matrix width. - * - */ -extern void hl_matrix_classification_error( - real* A_d, int* B_d, real* C_d, int dimM, int dimN); - /** * @brief Matrix cross entropy. * @@ -188,48 +175,6 @@ extern void hl_param_relu_backward_diff(real* grad_o, int width, int height, int partial_sum); -/** - * @brief cos sim forward - * - * @param[out] output output data - * @param[in] input1 input1 data(matrix) - * @param[in] input2 input2 data(matrix or vector) - * @param[in] width matrix width - * @param[in] input1_height input1_height - * @param[in] input2_height input2_height - * @param[in] scale scale factor - */ -extern void hl_cossim(real* output, - real* input1, - real* input2, - int width, - int input1_height, - int input2_height, - real scale); -/** - * @brief cos sim derivate - * - * @param[in] grad output grad - * @param[in] output output data - * @param[in] prevOutX input1 data - * @param[in] prevOutY input2 data - * @param[out] prevGradX input1 grad - * @param[out] prevGradY input2 grad - * @param[in] width matrix width - * @param[in] input1_height input1 height - * @param[in] input2_height input2 height - * @param[in] scale scale factor - */ -extern void hl_cossim_derivative(real* grad, - real* output, - real* prevOutX, - real* prevOutY, - real* prevGradX, - real* prevGradY, - int width, - int input1_height, - int input2_height, - real scale); /** * @brief Matrix addition: A_d[i][j] += scale * B_d[j/channel]. @@ -267,4 +212,16 @@ extern void hl_matrix_collect_shared_bias(real* B_d, const int dimN, real scale); +/** + * @brief Matrix rotation in 90 degrees + * + * @param[in] mat input matrix (M x N). + * @param[out] matRot output matrix (N x M). + * @param[in] dimM input matrix height. + * @param[in] dimN input matrix width. + * @param[in] clockWise rotation direction + */ +extern void hl_matrix_rotate( + real* mat, real* matRot, int dimM, int dimN, bool clockWise); + #endif /* HL_MATRIX_H_ */ diff --git a/paddle/cuda/include/hl_top_k.h b/paddle/cuda/include/hl_top_k.h index 77949ed295a6eaf7cc535853e53bef066ffac37c..79ae0d0e741de06e622454ccd220e2c749d795b3 100644 --- a/paddle/cuda/include/hl_top_k.h +++ b/paddle/cuda/include/hl_top_k.h @@ -58,4 +58,30 @@ extern void hl_sparse_matrix_top_k(real* topVal, int beamSize, int numSamples); -#endif /* HL_TOP_K_H_ */ +/** + * @brief Matrix classification error. + * + * @param[out] topVal top k element. + * @param[in] ldv leading dimension of topVal. + * @param[out] topIds top k index. + * @param[in] src input value. + * @param[in] lds leading dimension of src. + * @param[in] dim width of input value. + * @param[in] topkSize size of top k element. + * @param[in] numSamples height of input value. + * @param[in] label ground truth label. + * @param[out] recResult top-k classification error. + * + */ +extern void hl_matrix_classification_error(real* topVal, + int ldv, + int* topIds, + real* src, + int lds, + int dim, + int topkSize, + int numSamples, + int* label, + real* recResult); + +#endif // HL_TOP_K_H_ diff --git a/paddle/cuda/include/stub/hl_matrix_stub.h b/paddle/cuda/include/stub/hl_matrix_stub.h index 0b669f6735cb9771fd63ed8e3b45602db0db447c..127cb7e27983e8ff2c1ff6ef5108b5f8c5bd6ca5 100644 --- a/paddle/cuda/include/stub/hl_matrix_stub.h +++ b/paddle/cuda/include/stub/hl_matrix_stub.h @@ -35,8 +35,16 @@ inline void hl_sequence_softmax_forward(real* A_d, inline void hl_matrix_softmax_derivative( real* grad_d, real* output_d, real* sftmaxSum_d, int dimM, int dimN) {} -inline void hl_matrix_classification_error( - real* A_d, int* B_d, real* C_d, int dimM, int dimN) {} +inline void hl_matrix_classification_error(real* topVal, + int ldv, + int* topIds, + real* src, + int lds, + int dim, + int topkSize, + int numSamples, + int* label, + real* recResult) {} inline void hl_matrix_cross_entropy( real* A_d, real* C_d, int* label_d, int dimM, int dimN) {} @@ -74,25 +82,6 @@ inline void hl_param_relu_backward_diff(real* grad_o, int height, int partial_sum) {} -inline void hl_cossim(real* output, - real* input1, - real* input2, - int width, - int input1_height, - int input2_height, - real scale) {} - -inline void hl_cossim_derivative(real* grad, - real* output, - real* prevOutX, - real* prevOutY, - real* prevGradX, - real* prevGradY, - int width, - int input1_height, - int input2_height, - real scale) {} - inline void hl_matrix_add_shared_bias(real* A_d, real* B_d, const int channel, @@ -106,4 +95,8 @@ inline void hl_matrix_collect_shared_bias(real* B_d, const int dimM, const int dimN, real scale) {} + +inline void hl_matrix_rotate( + real* mat, real* matRot, int dimM, int dimN, bool clockWise) {} + #endif // HL_MATRIX_STUB_H_ diff --git a/paddle/cuda/src/hl_cuda_matrix.cu b/paddle/cuda/src/hl_cuda_matrix.cu index 2b4c6f7c39cff78c0e76cc1dfd41e1c7ef334f11..9bcc7fb7de44b2211db450fb164655f7947dcad9 100644 --- a/paddle/cuda/src/hl_cuda_matrix.cu +++ b/paddle/cuda/src/hl_cuda_matrix.cu @@ -265,59 +265,6 @@ void hl_matrix_softmax_derivative(real *grad_d, CHECK_SYNC("hl_matrix_softmax_derivative failed"); } -template -__global__ void KeMatrixClassificationError(real* in_A, - int* in_B, - real* out_C, - int dimN) { - __shared__ real max_s[blockSize]; - __shared__ int max_l[blockSize]; - const int tid = threadIdx.x; - const int rowId = blockIdx.x; - - max_s[tid] = -1e30f; - in_A += rowId * dimN; - real tmp; - for (int colId = tid; colId < dimN; colId += blockSize) { - tmp = in_A[colId]; - if (max_s[tid] < tmp) { - max_s[tid] = tmp; - max_l[tid] = colId; - } - } - __syncthreads(); - - for (int stride = blockSize/2; stride > 0; stride = stride/2) { - if (tid < stride) { - if (max_s[tid] < max_s[tid + stride]) { - max_s[tid] = max_s[tid + stride]; - max_l[tid] = max_l[tid + stride]; - } - } - __syncthreads(); - } - __syncthreads(); - - if (tid == 0) { - out_C[rowId] = (max_l[0] == in_B[rowId] ? 0 : 1.0f); - } -} - -void hl_matrix_classification_error(real* A_d, - int* B_d, - real* C_d, - int dimM, - int dimN) { - CHECK_NOTNULL(A_d); - CHECK_NOTNULL(B_d); - CHECK_NOTNULL(C_d); - - // each sample is calculated by one block - KeMatrixClassificationError<1024><<< dimM, 1024, 0, STREAM_DEFAULT >>> - (A_d, B_d, C_d, dimN); - CHECK_SYNC("hl_matrix_classification_error"); -} - __global__ void KeMatrixMultiBinaryCrossEntropy(real* output, real* entropy, int* row, @@ -584,177 +531,6 @@ void hl_param_relu_backward_diff(real* grad_o, CHECK_SYNC("hl_param_relu_backward_diff failed"); } -template -__global__ void KeCosSim(real* output, - real* input1, - real* input2, - int width, - int input1_height, - int input2_height, - real scale) { - const int ty = blockIdx.y; - int tid = threadIdx.x; - - __shared__ real xx[blockSize]; - __shared__ real yy[blockSize]; - __shared__ real xy[blockSize]; - - xx[tid] = 0.0; - yy[tid] = 0.0; - xy[tid] = 0.0; - __syncthreads(); - - input1 += ty * width; - if (input2_height > 1) { - input2 += ty * width; - } - for (int index = tid; index < width; index += blockSize) { - real x = input1[index]; - real y = input2[index]; - xx[tid] += x * x; - yy[tid] += y * y; - xy[tid] += x * y; - } - __syncthreads(); - - for (int s = blockSize / 2; s > 0; s >>= 1) { - if (tid < s) { - xx[tid] += xx[tid + s]; - yy[tid] += yy[tid + s]; - xy[tid] += xy[tid + s]; - } - __syncthreads(); - } - if (tid == 0) { - output[ty] = scale * xy[0] / (sqrt(xx[0]) * sqrt(yy[0])); - } -} - -void hl_cossim(real* output, - real* input1, - real* input2, - int width, - int input1_height, - int input2_height, - real scale) { - CHECK_NOTNULL(output); - CHECK_NOTNULL(input1); - CHECK_NOTNULL(input2); - const int blockSize = 256; - dim3 threads(blockSize, 1); - dim3 grid(1, input1_height); - - KeCosSim<<>> - (output, input1, input2, width, input1_height, input2_height, scale); - CHECK_SYNC("hl_cossim failed"); -} - -template -__global__ void KeCosSimDerivative(real* grad, - real* output, - real* prevOutX, - real* prevOutY, - real* prevGradX, - real* prevGradY, - int width, - int input1_height, - int input2_height, - real scale) { - const int ty = blockIdx.y; - int tid = threadIdx.x; - - __shared__ real xx[blockSize]; - __shared__ real yy[blockSize]; - __shared__ real xy[blockSize]; - - xx[tid] = 0.0; - yy[tid] = 0.0; - xy[tid] = 0.0; - __syncthreads(); - - prevOutX += ty * width; - prevGradX += ty * width; - if (input2_height > 1) { - prevOutY += ty * width; - prevGradY += ty * width; - } - for (int index = tid; index < width; index += blockSize) { - real x = prevOutX[index]; - real y = prevOutY[index]; - xx[tid] += x * x; - yy[tid] += y * y; - xy[tid] += x * y; - } - __syncthreads(); - - for (int s = blockSize / 2; s > 0; s >>= 1) { - if (tid < s) { - xx[tid] += xx[tid + s]; - yy[tid] += yy[tid + s]; - xy[tid] += xy[tid + s]; - } - __syncthreads(); - } - if (xy[0] == 0) { - real reciprocal = 1.0 / (sqrt(xx[0]) * sqrt(yy[0])); - for (int index = tid; index < width; index += blockSize) { - prevGradX[index] += - scale * grad[ty] * prevOutY[index] * reciprocal; - if (input2_height > 1) { - prevGradY[index] += - scale * grad[ty] * prevOutX[index] * reciprocal; - } else { - paddle::paddleAtomicAdd(prevGradY + index, - scale * grad[ty] * prevOutX[index] * reciprocal); - } - } - } else { - real reciprocalXY = 1.0 / xy[0]; - real reciprocalSquareSumX = 1.0 / xx[0]; - real reciprocalSquareSumY = 1.0 / yy[0]; - for (int index = tid; index < width; index += blockSize) { - prevGradX[index] += output[ty] * grad[ty] * - (prevOutY[index] * reciprocalXY - - prevOutX[index] * reciprocalSquareSumX); - if (input2_height > 1) { - prevGradY[index] += output[ty] * grad[ty] * - (prevOutX[index] * reciprocalXY - - prevOutY[index] * reciprocalSquareSumY); - } else { - paddle::paddleAtomicAdd(prevGradY + index, output[ty] * grad[ty] * - (prevOutX[index] * reciprocalXY - - prevOutY[index] * reciprocalSquareSumY)); - } - } - } -} - - -void hl_cossim_derivative(real* grad, - real* output, - real* prevOutX, - real* prevOutY, - real* prevGradX, - real* prevGradY, - int width, - int input1_height, - int input2_height, - real scale) { - CHECK_NOTNULL(grad); - CHECK_NOTNULL(output); - CHECK_NOTNULL(prevOutX); - CHECK_NOTNULL(prevOutY); - CHECK_NOTNULL(prevGradX); - CHECK_NOTNULL(prevGradY); - const int blockSize = 256; - dim3 threads(blockSize, 1); - dim3 grid(1, input1_height); - KeCosSimDerivative<<>> - (grad, output, prevOutX, prevOutY, prevGradX, prevGradY, width, - input1_height, input2_height, scale); - CHECK_SYNC("hl_cossim_derivate failed"); -} - __global__ void KeMatrixAddSharedBias(real* A, real* B, const int channel, @@ -840,3 +616,28 @@ void hl_matrix_collect_shared_bias(real* B_d, (B_d, A_d, channel, dimM, dimN, dim, limit, scale); CHECK_SYNC("hl_matrix_collect_shared_bias failed"); } + +__global__ void keMatrixRotate(real* mat, real* matRot, + int dimM, int dimN, bool clockWise) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < dimM * dimN) { + int i = idx / dimN; + int j = idx % dimN; + if (clockWise) { + matRot[j * dimM + i] = mat[(dimM - i - 1) * dimN + j]; + } else { + matRot[j * dimM + i] = mat[i * dimN + (dimN - j - 1)]; + } + } +} + +void hl_matrix_rotate(real *mat, real* matRot, + int dimM, int dimN, bool clockWise) { + CHECK_NOTNULL(mat); + CHECK_NOTNULL(matRot); + const int threads = 512; + const int blocks = DIVUP(dimM * dimN, threads); + keMatrixRotate<<< blocks, threads, 0, STREAM_DEFAULT >>> + (mat, matRot, dimM, dimN, clockWise); + CHECK_SYNC("hl_matrix_rotate failed"); +} diff --git a/paddle/cuda/src/hl_top_k.cu b/paddle/cuda/src/hl_top_k.cu index f0ef0cc3c51f9e7935dc3c40f630e4d70960802a..4f0bbfcf4e3aa51dd06acf254af65c62098a1df7 100644 --- a/paddle/cuda/src/hl_top_k.cu +++ b/paddle/cuda/src/hl_top_k.cu @@ -384,3 +384,81 @@ void hl_sparse_matrix_top_k(real* topVal, int ldv, CHECK_SYNC("hl_sparse_matrix_top_k failed"); } +/** + * Each block compute one sample. + * In a block: + * 1. every thread get top maxLength value; + * 2. merge to shTopK, block reduce and get max value; + * 3. go to the second setp, until one thread's topK value is null; + * 4. go to the first setp, until get the topK value. + */ +template +__global__ void KeMatrixTopKClassificationError(real* topVal, int ldv, + int * topIds, + real* src, int lds, + int dim, + int beamSize, + int* label, + real* recResult) { + __shared__ Pair shTopK[blockSize]; + __shared__ int maxId[blockSize / 2]; + const int tid = threadIdx.x; + const int warp = threadIdx.x / 32; + src += blockIdx.x * lds; + topVal += blockIdx.x * ldv; + topIds += blockIdx.x * beamSize; + + Pair topK[maxLength]; // NOLINT + int beam = maxLength; + Pair max; + bool isEmpty = false; + bool firstStep = true; + int topkSize = beamSize; + + for (int k = 0; k < maxLength; k++) { + topK[k].set(-HL_FLOAT_MAX, -1); + } + + while (beamSize) { + threadGetTopK + (topK, beam, beamSize, src, firstStep, isEmpty, max, dim, tid); + + shTopK[tid] = topK[0]; + blockReduce + (shTopK, maxId, topK, &topVal, &topIds, beam, beamSize, tid, warp); + } + + __syncthreads(); + if (tid == 0) { + for (int i = 0; i < topkSize; i++) { + if (*--topIds == label[blockIdx.x]) { + recResult[blockIdx.x] = 0; + break; + } + recResult[blockIdx.x] = 1.0f; + } + } +} + +void hl_matrix_classification_error(real* topVal, int ldv, + int* topIds, + real* src, int lds, + int dim, + int topkSize, + int numSamples, + int* label, + real* recResult) { + CHECK_NOTNULL(topVal); + CHECK_NOTNULL(topIds); + CHECK_NOTNULL(src); + + if (topkSize > dim) topkSize = dim; + + dim3 threads(256, 1); + dim3 grid(numSamples, 1); + KeMatrixTopKClassificationError<5, 256> + <<< grid, threads, 0, STREAM_DEFAULT >>> + (topVal, ldv, topIds, src, lds, dim, topkSize, label, recResult); + + CHECK_SYNC("hl_matrix_top_k classification error failed"); +} diff --git a/paddle/cuda/src/hl_warpctc_wrap.cc b/paddle/cuda/src/hl_warpctc_wrap.cc index 55b940ca67acce2c7ee7c1ee286ab96240652274..f57efb2b46797c303d99a5468ad96163a3e74972 100644 --- a/paddle/cuda/src/hl_warpctc_wrap.cc +++ b/paddle/cuda/src/hl_warpctc_wrap.cc @@ -54,22 +54,26 @@ DYNAMIC_LOAD_WARPCTC_WRAP(get_workspace_size) #define WARPCTC_GET_VERSION dynload::get_warpctc_version #define WARPCTC_GET_STATUS_STRING dynload::ctcGetStatusString +static int g_warpctcVersion = -1; #ifndef PADDLE_TYPE_DOUBLE #define WARPCTC_COMPUTE_LOSS dynload::compute_ctc_loss #define WARPCTC_GET_WORKSPACE_SIZE dynload::get_workspace_size #else -#define WARPCTC_LOG_FATAL \ - LOG(FATAL) << "warp-ctc [version " << g_warpctcVersion \ - << "] Error: not support double precision." -#define WARPCTC_COMPUTE_LOSS(...) WARPCTC_LOG_FATAL(__VA_ARGS__) -#define WARPCTC_GET_WORKSPACE_SIZE(...) WARPCTC_LOG_FATAL(__VA_ARGS__) +hl_warpctc_status_t fatal(...) { + LOG(FATAL) << "warp-ctc [version " << g_warpctcVersion + << "] Error: not support double precision."; + // both of get_warpctc_version() and get_workspace_size() return an ctcStatus + // type value + return CTC_STATUS_EXECUTION_FAILED; +} +#define WARPCTC_COMPUTE_LOSS fatal +#define WARPCTC_GET_WORKSPACE_SIZE fatal #endif /** * Check build-in warp-ctc function using glog and it also * support << operator for more details error info. */ -static int g_warpctcVersion = -1; #define CHECK_WARPCTC(warpctcStat) \ CHECK_EQ(CTC_STATUS_SUCCESS, warpctcStat) \ << "warp-ctc [version " << g_warpctcVersion \ diff --git a/paddle/function/BufferArg.cpp b/paddle/function/BufferArg.cpp index 5d595deb12c6c8ea419dd1f31b3c131a2f6a587a..2b70036e3ff7de9e8786bade03e220a4916db4c2 100644 --- a/paddle/function/BufferArg.cpp +++ b/paddle/function/BufferArg.cpp @@ -32,14 +32,20 @@ const SparseMatrixArg& BufferArg::sparse() const { SparseMatrixArg::SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType) : BufferArg(sparse, argType), row_(reinterpret_cast(sparse.getRows()), VALUE_TYPE_INT32), - col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32) { + col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32), + nnz_(sparse.getElementCnt()), + format_(static_cast(sparse.getFormat())), + type_(static_cast(sparse.getValueType())) { bufferType_ = TENSOR_SPARSE; } SparseMatrixArg::SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType) : BufferArg(sparse, argType), row_(reinterpret_cast(sparse.getRows()), VALUE_TYPE_INT32), - col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32) { + col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32), + nnz_(sparse.getElementCnt()), + format_(static_cast(sparse.getFormat())), + type_(static_cast(sparse.getValueType())) { bufferType_ = TENSOR_SPARSE; } diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index 84209265ce7634121e3e4dde609cd787093c45ec..0dc7792f646457c22ee4791f18814afaa3809f7b 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -30,13 +30,6 @@ enum BufferType { TENSOR_SPARSE = 4 }; -enum SparseDataType { - SPARSE_NO_VALUE = 0, // do not need value pointer, all values are 1 - SPARSE_FLOAT_VALUE = 1 -}; - -enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 }; - class BufferArg; class SequenceArg; class SparseMatrixArg; @@ -79,19 +72,21 @@ public: BufferArg(ValueType valueType, const TensorShape& shape, ArgType argType = UNSPECIFIED) - : buf_(nullptr), - valueType_(valueType), - shape_(shape), - argType_(argType) {} + : buf_(nullptr), valueType_(valueType), shape_(shape), argType_(argType) { + bufferType_ = TENSOR_NORMAL; + } BufferArg(void* buf, ValueType valueType, const TensorShape& shape, ArgType argType = UNSPECIFIED) - : buf_(buf), valueType_(valueType), shape_(shape), argType_(argType) {} + : buf_(buf), valueType_(valueType), shape_(shape), argType_(argType) { + bufferType_ = TENSOR_NORMAL; + } - BufferArg(void* buf, ValueType valueType) - : buf_(buf), valueType_(valueType) {} + BufferArg(void* buf, ValueType valueType) : buf_(buf), valueType_(valueType) { + bufferType_ = TENSOR_NORMAL; + } BufferArg(const Matrix& matrix, ArgType argType = UNSPECIFIED) : buf_( @@ -167,8 +162,9 @@ public: ValueType valueType() const { return valueType_; } BufferType bufferType() const { return bufferType_; } const TensorShape& shape() const { return shape_; } - bool isSparse() const { return (TENSOR_SPARSE == bufferType_); } + bool isSparseArg() const { return TENSOR_SPARSE == bufferType_; } bool isSequenceArg() const { return TENSOR_SEQUENCE_DATA == bufferType_; } + virtual size_t numElements() const { return shape_.getElements(); } const SequenceArg& sequence() const; const SparseMatrixArg& sparse() const; @@ -179,6 +175,7 @@ protected: TensorShape shape_; BufferType bufferType_{TENSOR_UNKNOWN}; ArgType argType_{UNSPECIFIED}; + // TODO(tianbing), add deviceType_ // leading dimensions. The size is dims_.size() // Dims lds_; }; @@ -191,8 +188,9 @@ class SequenceIdArg : public BufferArg { public: SequenceIdArg(const TensorShape& shape, ArgType argType = UNSPECIFIED) : BufferArg(VALUE_TYPE_INT32, shape, argType) { - CHECK_EQ(shape_.ndims(), (size_t)1); - CHECK_GT(shape_[0], 1); + bufferType_ = TENSOR_SEQUENCE_ID; + CHECK_EQ(shape_.ndims(), 1UL); + CHECK_GE(shape_[0], 1UL); numSeqs_ = shape_[0] - 1; } @@ -201,7 +199,7 @@ public: ArgType argType = UNSPECIFIED) : BufferArg(buf, VALUE_TYPE_INT32, shape, argType) { bufferType_ = TENSOR_SEQUENCE_ID; - CHECK_EQ(shape_.ndims(), (size_t)1); + CHECK_EQ(shape_.ndims(), 1UL); numSeqs_ = shape_[0] - 1; } @@ -228,7 +226,10 @@ public: SequenceArg(ValueType valueType, const TensorShape& shape, ArgType argType = UNSPECIFIED) - : BufferArg(valueType, shape, argType), startPositions_(TensorShape()) {} + : BufferArg(valueType, shape, argType), + startPositions_(TensorShape({shape[0]})) { + bufferType_ = TENSOR_SEQUENCE_DATA; + } SequenceArg(void* buf, ValueType valueType, @@ -269,31 +270,75 @@ public: const BufferArg& row, const BufferArg& col, size_t nnz, - SparseDataFormat format, - SparseDataType type, + SparseFormat format, + SparseValueType type, ArgType argType = UNSPECIFIED) : BufferArg(buf, valueType, shape, argType), row_(row), col_(col), nnz_(nnz), - format_(format), - type_(type) { + format_(static_cast(format)), + type_(static_cast(type)) { bufferType_ = TENSOR_SPARSE; CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE)); - CHECK_EQ(shape_.ndims(), (size_t)2); - CHECK_EQ(row_.shape().ndims(), (size_t)1); - CHECK_EQ(col_.shape().ndims(), (size_t)1); - if (format == SPARSE_CSR_FORMAT) { + CHECK_EQ(shape_.ndims(), 2UL); + CHECK_EQ(row_.shape().ndims(), 1UL); + CHECK_EQ(col_.shape().ndims(), 1UL); + if (format_ == T_SPARSE_CSR) { CHECK_EQ(nnz, col.shape()[0]); - } else if (format == SPARSE_CSC_FORMAT) { + } else if (format_ == T_SPARSE_CSC) { CHECK_EQ(nnz, row.shape()[0]); } } + SparseMatrixArg(ValueType valueType, + const TensorShape& shape, + size_t nnz, + SparseFormat format, + SparseValueType type, + ArgType argType = UNSPECIFIED) + : BufferArg(valueType, shape, argType), + row_(BufferArg(nullptr, VALUE_TYPE_INT32)), + col_(BufferArg(nullptr, VALUE_TYPE_INT32)), + nnz_(nnz), + format_(static_cast(format)), + type_(static_cast(type)) { + bufferType_ = TENSOR_SPARSE; + CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE)); + CHECK_EQ(shape_.ndims(), 2UL); + + /// len of row_ : height + 1 (CSR) or nnz (CSC), buf_ == nullptr + row_ = (format_ == T_SPARSE_CSR + ? BufferArg(VALUE_TYPE_INT32, TensorShape{shape_[0] + 1}) + : BufferArg(VALUE_TYPE_INT32, TensorShape{nnz})); + /// len of col_ : width + 1 (CSC) or nnz (CSR), buf_ == nullptr + col_ = (format_ == T_SPARSE_CSR + ? BufferArg(VALUE_TYPE_INT32, TensorShape{nnz}) + : BufferArg(VALUE_TYPE_INT32, TensorShape{shape_[1] + 1})); + } + SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED); SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED); + template + typename Tensor::SparseMatrix SparseMatrix() const { + CHECK(buf_); + CHECK(valueType_ == DataType::value); + // CHECK(deviceType_ == DType); + CHECK_EQ(2UL, shape_.ndims()); + return typename Tensor::SparseMatrix( + reinterpret_cast(buf_), + reinterpret_cast(row_.data()), + reinterpret_cast(col_.data()), + shape_[0], + shape_[1], + nnz_, + static_cast(type_), + static_cast(format_), + false); + } + ~SparseMatrixArg() {} void* getRowBuf() const { return row_.data(); } @@ -302,6 +347,8 @@ public: size_t nnz() const { return nnz_; } + size_t numElements() const override { return nnz_; } + SparseDataFormat dataFormat() const { return format_; } SparseDataType dataType() const { return type_; } diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index a5cf16cb568ee9bafd15a8c9737d933b6fbbd12b..1522510e8bb9816cb468fcf406e22560163950cc 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -25,6 +25,9 @@ if(WITH_TESTING) add_simple_unittest(BufferArgTest) add_simple_unittest(FunctionTest) add_simple_unittest(ContextProjectionOpTest) + add_simple_unittest(PadOpTest) + add_simple_unittest(MulOpTest) + add_simple_unittest(CosSimOpTest) endif() endif() diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index 6cd4e4abee8fccf3a4745b0bfc6701df4ddfa5c0..b87750b74247bd0eb822340bc5a85d41b86ecec2 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -108,26 +108,23 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK(1 == inputs.size() || 2 == inputs.size()); - CHECK_EQ((size_t)1, outputs.size()); + CHECK(1UL == inputs.size() || 2UL == inputs.size()); + CHECK_EQ(1UL, outputs.size()); CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) << "SequenceArg required here"; const auto val_seqs = dynamic_cast(inputs[0]); auto out_seq = dynamic_cast(outputs[0]); CHECK(out_seq.data() && val_seqs.data() && val_seqs.getSequenceId().data()); - CHECK_EQ(out_seq.shape().ndims(), (size_t)2); - CHECK_EQ(val_seqs.shape().ndims(), (size_t)2); - CHECK_EQ(val_seqs.getSequenceId().shape().ndims(), (size_t)1); - if (2 == inputs.size()) { - CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); - } + CHECK_EQ(out_seq.shape().ndims(), 2UL); + CHECK_EQ(val_seqs.shape().ndims(), 2UL); /// dim of output = dim of input * context_length CHECK_EQ(out_seq.shape()[1], val_seqs.shape()[1] * context_length_); /// input and output has the same batch_size CHECK_EQ(val_seqs.shape()[0], out_seq.shape()[0]); - /// dim of input == dim of weight - if (2 == inputs.size()) { + if (2UL == inputs.size()) { + CHECK_EQ(inputs[1].shape().ndims(), 2UL); + /// dim of input == dim of weight CHECK_EQ(val_seqs.shape()[1], inputs[1].shape()[1]); } @@ -135,10 +132,11 @@ public: auto out_mat = out_seq.matrix(); const auto in_mat = val_seqs.matrix(); const auto w_mat = - (2 == inputs.size()) + (2UL == inputs.size() && inputs[1].data()) ? inputs[1].matrix() : typename Tensor::Matrix(nullptr, 0, 0); const auto seq_vec = val_seqs.getSequenceId().vector(); + ContextProjectionForward(out_mat, in_mat, w_mat, @@ -235,36 +233,40 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ((size_t)1, inputs.size()); - CHECK_EQ((size_t)2, outputs.size()); + CHECK_EQ(1UL, inputs.size()); + CHECK(1UL == outputs.size() || 2UL == outputs.size()); CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) << "SequenceArg required here"; const auto in_seq = dynamic_cast(inputs[0]); auto out_seq = dynamic_cast(outputs[0]); CHECK(in_seq.data() && in_seq.getSequenceId().data()); - CHECK_EQ(in_seq.shape().ndims(), (size_t)2); - CHECK_EQ(in_seq.getSequenceId().shape().ndims(), (size_t)1); - CHECK_EQ(out_seq.shape().ndims(), (size_t)2); - CHECK_EQ(out_seq.getSequenceId().shape().ndims(), (size_t)1); - CHECK_EQ(outputs[1].shape().ndims(), (size_t)2); + CHECK_EQ(in_seq.shape().ndims(), 2UL); + CHECK_EQ(out_seq.shape().ndims(), 2UL); + CHECK_EQ(out_seq.getSequenceId().shape().ndims(), 1UL); - /// dim of input grad == dim of weight - CHECK_EQ(out_seq.shape()[1], outputs[1].shape()[1]); /// input and output grad has the same batch_size CHECK_EQ(out_seq.shape()[0], in_seq.shape()[0]); /// dim of output grad = dim of input grad * context_length CHECK_EQ(in_seq.shape()[1], out_seq.shape()[1] * context_length_); CHECK_EQ(out_seq.getArgType(), ADD_TO); - CHECK_EQ(outputs[1].getArgType(), ADD_TO); + + if (2UL == outputs.size()) { + CHECK_EQ(outputs[1].shape().ndims(), 2UL); + /// dim of input grad == dim of weight + CHECK_EQ(out_seq.shape()[1], outputs[1].shape()[1]); + CHECK_EQ(outputs[1].getArgType(), ADD_TO); + } const auto seq_vec = in_seq.getSequenceId().vector(); const auto out_grad_mat = in_seq.matrix(); auto in_grad_mat = !out_seq.data() ? typename Tensor::Matrix(nullptr, 0, 0) : out_seq.matrix(); - auto w_grad_mat = !outputs[1].data() - ? typename Tensor::Matrix(nullptr, 0, 0) - : outputs[1].matrix(); + auto w_grad_mat = + (2UL == outputs.size() && outputs[1].data()) + ? outputs[1].matrix() + : typename Tensor::Matrix(nullptr, 0, 0); + ContextProjectionBackward(out_grad_mat, in_grad_mat, w_grad_mat, @@ -304,17 +306,17 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1, static_cast(inputs.size())); - CHECK_EQ(1, static_cast(outputs.size())); + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) << "SequenceArg required here"; const auto in_seq = dynamic_cast(inputs[0]); const auto out_seq = dynamic_cast(outputs[0]); CHECK(in_seq.data() && out_seq.data() && in_seq.getSequenceId().data()); - CHECK_EQ(static_cast(out_seq.shape().ndims()), 2); - CHECK_EQ(static_cast(in_seq.shape().ndims()), 2); - CHECK_EQ(static_cast(in_seq.getSequenceId().shape().ndims()), 1); + CHECK_EQ(out_seq.shape().ndims(), 2UL); + CHECK_EQ(in_seq.shape().ndims(), 2UL); + CHECK_EQ(in_seq.getSequenceId().shape().ndims(), 1UL); /// output layer grad dim == input layer grad dim * context_length_ CHECK_EQ(in_seq.shape().ndims(), out_seq.shape().ndims() * context_length_); /// input and output has the same batch_size @@ -355,14 +357,14 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1, static_cast(inputs.size())); - CHECK_EQ(1, static_cast(outputs.size())); + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); CHECK(inputs[0].isSequenceArg()) << "SequenceArg required here"; const auto in_seq = dynamic_cast(inputs[0]); CHECK(in_seq.data() && in_seq.getSequenceId().data() && outputs[0].data()); - CHECK_EQ(static_cast(outputs[0].shape().ndims()), 2); - CHECK_EQ(static_cast(in_seq.shape().ndims()), 2); - CHECK_EQ(static_cast(in_seq.getSequenceId().shape().ndims()), 1); + CHECK_EQ(outputs[0].shape().ndims(), 2UL); + CHECK_EQ(in_seq.shape().ndims(), 2UL); + CHECK_EQ(in_seq.getSequenceId().shape().ndims(), 1UL); CHECK_EQ(in_seq.shape()[0], outputs[0].shape()[0]); /// output layer grad dim == weight dim * context_length_ CHECK_EQ(in_seq.shape()[1], outputs[0].shape()[1] * context_length_); diff --git a/paddle/function/ContextProjectionOp.h b/paddle/function/ContextProjectionOp.h index 2bdd47e4e9b02483c2c5af82bf00c4e55d68f93e..6f7d936379a5378e6fd85dd86618d1b6094bd14f 100644 --- a/paddle/function/ContextProjectionOp.h +++ b/paddle/function/ContextProjectionOp.h @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once - #include "Function.h" namespace paddle { diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/function/ContextProjectionOpTest.cpp index c9db2ff8008e0bb0fa04370fb7b3ecd7641d2062..0f5d6a848d406d14984a0b6edad8192dab42e88b 100644 --- a/paddle/function/ContextProjectionOpTest.cpp +++ b/paddle/function/ContextProjectionOpTest.cpp @@ -28,55 +28,26 @@ void testMatrixProjectionForward(int context_start, std::max(0, (int)(context_start + context_length - 1)); if (pad == 0) is_padding = false; - FunctionCompare compare("ContextProjectionForward", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", std::max(0, -context_start))); - - CpuMatrix cpu_in(batch_size, input_dim); - cpu_in.randomizeUniform(); - GpuMatrix gpu_in(batch_size, input_dim); - gpu_in.copyFrom(cpu_in); - auto cpu_weight = - is_padding ? std::make_shared(pad, input_dim) : nullptr; - auto gpu_weight = - is_padding ? std::make_shared(pad, input_dim) : nullptr; - if (is_padding) { - cpu_weight->randomizeUniform(); - gpu_weight->copyFrom(*cpu_weight); + FunctionCompare test("ContextProjectionForward", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", std::max(0, -context_start))); + + // prepare input arguments + test.addSequence(SequenceIdArg(TensorShape{batch_size})); + test.addInputs( + SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim})); + if (is_padding) { // weight + test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{pad, input_dim})); } - IVectorPtr cpu_seq; - generateSequenceStartPositions(batch_size, cpu_seq); - IVectorPtr gpu_seq = IVector::create(cpu_seq->getSize(), true); - gpu_seq->copyFrom(*cpu_seq); - - CpuMatrix cpu_out(batch_size, input_dim * context_length); - GpuMatrix gpu_out(batch_size, input_dim * context_length); - cpu_out.randomizeUniform(); - gpu_out.copyFrom(cpu_out); - - BufferArgs cpu_inputs; - BufferArgs cpu_outputs; - cpu_inputs.addArg(cpu_in, *cpu_seq); - if (cpu_weight) { - cpu_inputs.addArg(*cpu_weight, *cpu_seq); - } - cpu_outputs.addArg(cpu_out, *cpu_seq, ADD_TO); - - compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs); + test.addOutputs( + SequenceArg(VALUE_TYPE_FLOAT, + TensorShape{batch_size, input_dim * context_length}), + ADD_TO); - BufferArgs gpu_inputs; - BufferArgs gpu_outputs; - gpu_inputs.addArg(gpu_in, *gpu_seq); - if (gpu_weight) { - gpu_inputs.addArg(*gpu_weight, *gpu_seq); - } - gpu_outputs.addArg(gpu_out, *gpu_seq, ADD_TO); - - compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs); - - autotest::TensorCheckEqual(cpu_out, gpu_out); + // run Function + test.run(); } void testMatrixProjectionBackward(int context_start, @@ -88,63 +59,31 @@ void testMatrixProjectionBackward(int context_start, std::max(0, (int)(context_start + context_length - 1)); if (pad == 0) is_padding = false; - FunctionCompare compare("ContextProjectionBackward", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", std::max(0, -context_start)) - .set("is_padding", is_padding) - .set("total_pad", pad)); - - CpuMatrix cpu_in_grad(batch_size, input_dim); - cpu_in_grad.randomizeUniform(); - GpuMatrix gpu_in_grad(batch_size, input_dim); - gpu_in_grad.copyFrom(cpu_in_grad); - - CpuMatrix cpu_out_grad(batch_size, input_dim * context_length); - cpu_out_grad.randomizeUniform(); - GpuMatrix gpu_out_grad(batch_size, input_dim * context_length); - gpu_out_grad.copyFrom(cpu_out_grad); - - IVectorPtr cpu_seq; - generateSequenceStartPositions(batch_size, cpu_seq); - IVectorPtr gpu_seq = IVector::create(cpu_seq->getSize(), true); - gpu_seq->copyFrom(*cpu_seq); - - auto cpu_w_grad = - is_padding ? std::make_shared(pad, input_dim) : nullptr; - auto gpu_w_grad = - is_padding ? std::make_shared(pad, input_dim) : nullptr; - if (is_padding) { - cpu_w_grad->randomizeUniform(); - gpu_w_grad->copyFrom(*cpu_w_grad); + FunctionCompare test("ContextProjectionBackward", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", std::max(0, -context_start)) + .set("is_padding", is_padding) + .set("total_pad", pad)); + + // prepare input arguments + test.addSequence(SequenceIdArg(TensorShape{batch_size})); + test.addInputs(SequenceArg( + VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim * context_length})); + test.addOutputs( + SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim}), + ADD_TO); + if (is_padding) { // weight + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{pad, input_dim}), + ADD_TO); } - BufferArgs cpu_inputs; - BufferArgs cpu_outputs; - cpu_inputs.addArg(cpu_out_grad, *cpu_seq); - cpu_outputs.addArg(cpu_in_grad, *cpu_seq, ADD_TO); - cpu_outputs.addArg( - cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO); - - compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs); - - BufferArgs gpu_inputs; - BufferArgs gpu_outputs; - gpu_inputs.addArg(gpu_out_grad, *gpu_seq); - gpu_outputs.addArg(gpu_in_grad, *gpu_seq, ADD_TO); - gpu_outputs.addArg( - gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO); - - compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs); - - autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad); - if (is_padding) { - autotest::TensorCheckErr(*cpu_w_grad, *gpu_w_grad); - } + // run Function + test.run(); } -TEST(ContextProjection, projection) { +TEST(ContextProjection, Projection) { for (auto context_start : {-5, -3, -1, 0, 3}) { for (auto context_length : {1, 2, 5, 7}) { for (auto trainable_padding : {false, true}) { diff --git a/paddle/function/CosSimOp.cpp b/paddle/function/CosSimOp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7ece7b2dfedaf460741c97b5a700eb632d85cabc --- /dev/null +++ b/paddle/function/CosSimOp.cpp @@ -0,0 +1,240 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "CosSimOp.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/Vector.h" + +namespace paddle { +/** + * Cosine Similarity for CpuMatrix + * + * \param out_mat, output value, size: nSamples * 1. + * \param in1_mat, input value 1, size: nSamples * dim. + * \param in2_mat, input value 2, size: n2 * dim (n2 == 1 or n2 == nSamples). + * \param scale, default 1.0 + * + */ +template <> +void CosSimForward(CpuMatrix& out_mat, + const CpuMatrix& in1_mat, + const CpuMatrix& in2_mat, + real scale) { + CHECK(out_mat.getData() && in1_mat.getData() && in2_mat.getData()); + size_t num_samples = out_mat.getHeight(); + size_t dim = in1_mat.getWidth(); + /// column vector [nSamples, 1] + real* out = out_mat.getData(); + const real* x = in1_mat.getData(); + const real* y = in2_mat.getData(); + + /// in2 might only have one row or full rows + CHECK(in2_mat.getHeight() == 1LU || in2_mat.getHeight() == num_samples); + size_t inc = (in2_mat.getHeight() == 1LU) ? 0 : dim; + for (size_t i = 0; i < num_samples; ++i, x += dim, y += inc) { + real square_sum_x = 0; + real square_sum_y = 0; + real xy = 0; + for (size_t j = 0; j < dim; ++j) { + square_sum_x += x[j] * x[j]; + square_sum_y += y[j] * y[j]; + xy += x[j] * y[j]; + } + CHECK(square_sum_x > 0 && square_sum_y > 0); + out[i] = scale * xy / (std::sqrt(square_sum_x) * std::sqrt(square_sum_y)); + } +} + +/** + * Cosine Similarity + * for each row i, + * out[i] = scale * cos(input1[i], input2[i]) + * = scale * /sqrt(|input1[i]|^2 * |input2[i]|^2) + * when input2 only has one row, then for each row i, + * out[i] = cos(input1[i], input2[0]) + * + * \param inputs[0] input matrix 1, size: nSamples * dim. + * \param inputs[1] input matrix 2, size: n2 * dim (n2 == 1 or n2 == nSamples). + * \param outputs[0] output matrix, size : nSamples * 1. + */ + +template +class CosSimForwardFunc : public FunctionBase { + void init(const FuncConfig& config) override { + scale_ = config.get("scale"); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(inputs.size(), 2UL); + CHECK_EQ(outputs.size(), 1UL); + + CHECK_EQ(inputs[0].shape().ndims(), 2UL); + CHECK_EQ(inputs[1].shape().ndims(), 2UL); + CHECK_EQ(outputs[0].shape().ndims(), 2UL); + + CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]); + CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]); + CHECK_EQ(outputs[0].shape()[1], 1UL); + + CHECK(outputs[0].data() && inputs[0].data() && inputs[1].data()); + + CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + auto out_mat = outputs[0].matrix(); + const auto in1_mat = inputs[0].matrix(); + const auto in2_mat = inputs[1].matrix(); + + CosSimForward(out_mat, in1_mat, in2_mat, scale_); + } + +private: + real scale_; +}; + +/** + * Cosine Similarity Derivative for CpuMatrix + * + * \param in1_grad forward input grad 1, size: nSamples * dim. + * \param in2_grad forward input grad 2, + * size: n2 * dim (n2 == 1 or n2 == nSamples). + * + * \param out_grad backward loss output grad, size : nSamples * 1. + * \param out_val forward output value, size: nSamples * 1. + * \param in1_val forward input value 1, size: nSamples * dim. + * \param in2_val forward input value 2, + * size: n2 * dim (n2 == 1 or n2 == nSamples). + * \param scale, default 1.0 + */ +template <> +void CosSimBackward(const CpuMatrix& out_grad, + const CpuMatrix& out_val, + const CpuMatrix& in1_val, + const CpuMatrix& in2_val, + CpuMatrix& in1_grad, + CpuMatrix& in2_grad, + real scale) { + CHECK(out_grad.getData() && out_val.getData() && in1_val.getData() && + in2_val.getData() && in1_grad.getData() && in2_grad.getData()); + CHECK_EQ(out_val.useGpu_, false) << "Matrix type are GPU, CPU required"; + + const real* grad = out_grad.getData(); + const real* out = out_val.getData(); + const real* prev_out_x = in1_val.getData(); + const real* prev_out_y = in2_val.getData(); + real* prev_grad_x = in1_grad.getData(); + real* prev_grad_y = in2_grad.getData(); + + size_t num_samples = out_grad.getHeight(); + size_t dim = in1_val.getWidth(); + CHECK_EQ(in2_val.getHeight(), in2_grad.getHeight()); + CHECK(in2_val.getHeight() == 1LU || in2_val.getHeight() == num_samples); + size_t inc = (in2_val.getHeight() == 1LU) ? 0 : dim; + for (size_t i = 0; i < num_samples; ++i, + prev_out_x += dim, + prev_out_y += inc, + prev_grad_x += dim, + prev_grad_y += inc) { + real square_sum_x = 0; + real square_sum_y = 0; + real xy = 0; + for (size_t j = 0; j < dim; ++j) { + square_sum_x += prev_out_x[j] * prev_out_x[j]; + square_sum_y += prev_out_y[j] * prev_out_y[j]; + xy += prev_out_x[j] * prev_out_y[j]; + } + CHECK(square_sum_x > 0 && square_sum_y > 0); + if (xy == 0) { + real reciprocal = + 1.0f / (std::sqrt(square_sum_x) * std::sqrt(square_sum_y)); + for (size_t j = 0; j < dim; ++j) { + prev_grad_x[j] += scale * grad[i] * prev_out_y[j] * reciprocal; + prev_grad_y[j] += scale * grad[i] * prev_out_x[j] * reciprocal; + } + } else { + real reciprocal_xy = 1.0f / xy; + real reciprocal_square_sum_x = 1.0f / square_sum_x; + real reciprocal_square_sum_y = 1.0f / square_sum_y; + for (size_t j = 0; j < dim; ++j) { + prev_grad_x[j] += + out[i] * grad[i] * (prev_out_y[j] * reciprocal_xy - + prev_out_x[j] * reciprocal_square_sum_x); + prev_grad_y[j] += + out[i] * grad[i] * (prev_out_x[j] * reciprocal_xy - + prev_out_y[j] * reciprocal_square_sum_y); + } + } + } +} + +/** + * Cosine Similarity backward Derivative + * + * \param outputs[0] forward input grad 1, size: nSamples * dim. + * \param outputs[1] forward input grad 2, + * size: n2 * dim (n2 == 1 or n2 == nSamples). + * + * \param inputs[0] backward loss output grad, size : nSamples * 1. + * \param inputs[1] forward output value, size: nSamples * 1. + * \param inputs[2] forward input value 1, size: nSamples * dim. + * \param inputs[3] forward input value 2, + * size: n2 * dim (n2 == 1 or n2 == nSamples). + */ +template +class CosSimBackwardFunc : public FunctionBase { + void init(const FuncConfig& config) override { + scale_ = config.get("scale"); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(inputs.size(), 4UL); + CHECK_EQ(outputs.size(), 2UL); + /// dim of out_grad and out_val == 1, column vector + CHECK_EQ(inputs[0].shape()[1], 1UL); + CHECK_EQ(inputs[1].shape()[1], 1UL); + /// nSamples of out_grad == out_val == in_val1 == in_grad1 + CHECK_EQ(inputs[1].shape()[0], inputs[0].shape()[0]); + CHECK_EQ(inputs[0].shape()[0], inputs[0].shape()[0]); + CHECK_EQ(outputs[0].shape()[0], inputs[0].shape()[0]); + /// dim of in1_val1 == in_val2 == in_grad1 == in_grad2 + CHECK_EQ(inputs[3].shape()[1], inputs[2].shape()[1]); + CHECK_EQ(outputs[0].shape()[1], inputs[2].shape()[1]); + CHECK_EQ(outputs[1].shape()[1], inputs[2].shape()[1]); + + CHECK(inputs[0].data() && inputs[1].data() && inputs[2].data() && + inputs[3].data() && outputs[0].data() && outputs[1].data()); + + CHECK_EQ(outputs[0].getArgType(), ADD_TO); + CHECK_EQ(outputs[1].getArgType(), ADD_TO); + + const auto out_grad = inputs[0].matrix(); + const auto out_val = inputs[1].matrix(); + const auto in1_val = inputs[2].matrix(); + const auto in2_val = inputs[3].matrix(); + auto in1_grad = outputs[0].matrix(); + auto in2_grad = outputs[1].matrix(); + + CosSimBackward( + out_grad, out_val, in1_val, in2_val, in1_grad, in2_grad, scale_); + } + +private: + real scale_; +}; + +REGISTER_TYPED_FUNC(CosSimForward, CPU, CosSimForwardFunc); +REGISTER_TYPED_FUNC(CosSimBackward, CPU, CosSimBackwardFunc); +#ifndef PADDLE_ONLY_CPU +REGISTER_TYPED_FUNC(CosSimForward, GPU, CosSimForwardFunc); +REGISTER_TYPED_FUNC(CosSimBackward, GPU, CosSimBackwardFunc); +#endif +} // namespace paddle diff --git a/paddle/function/CosSimOp.h b/paddle/function/CosSimOp.h new file mode 100644 index 0000000000000000000000000000000000000000..be73064e6375bf1e6c6a7ca6de52e9b9b755880b --- /dev/null +++ b/paddle/function/CosSimOp.h @@ -0,0 +1,61 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Function.h" + +namespace paddle { + +/** + * \brief Cosine Similarity Forward. + * for each row i, + * out[i] = scale * cos(in1[i], in2[i]) + * = scale * \sum_j (in1[i][j] * in2[i][j]) / + * sqrt(sum_j (in1[i][j]^2) * sum_j (in2[i][j])^2) + * + * \param[out] output output value. + * \param[in] intput1 input value. + * \param[in] intput2 input value. + * \param[in] scale default 1.0. + * + */ +template +void CosSimForward(typename Tensor::Matrix& output, + const typename Tensor::Matrix& input1, + const typename Tensor::Matrix& input2, + real scale); + +/** + * \brief Cosine Similarity BackWard for Derivative. + * + * \param[in] output grad backward loss output grad. + * \param[in] output val forward-output value. + * \param[in] input val1 forward input value 1. + * \param[in] input val2 forward input value 2. + * \param[in/out] input grad forward input grad 1. + * \param[in/out] input grad forward input grad 2. + * \param[in] scale default 1.0. + * + */ +template +void CosSimBackward(const typename Tensor::Matrix& out_grad, + const typename Tensor::Matrix& out_value, + const typename Tensor::Matrix& in1_value, + const typename Tensor::Matrix& in2_value, + typename Tensor::Matrix& in1_grad, + typename Tensor::Matrix& in2_grad, + real scale); + +} // namespace paddle diff --git a/paddle/function/CosSimOpGpu.cu b/paddle/function/CosSimOpGpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..c62ab39551f02288618244871ae31c6800df5b42 --- /dev/null +++ b/paddle/function/CosSimOpGpu.cu @@ -0,0 +1,240 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "hl_base.h" +#include "hl_device_functions.cuh" +#include "CosSimOp.h" + +namespace paddle { + +template +__global__ void KeCosSim(real* output, + const real* input1, + const real* input2, + int width, + int input1_height, + int input2_height, + real scale) { + const int ty = blockIdx.y; + int tid = threadIdx.x; + + __shared__ real xx[block_size]; + __shared__ real yy[block_size]; + __shared__ real xy[block_size]; + + xx[tid] = 0.0; + yy[tid] = 0.0; + xy[tid] = 0.0; + __syncthreads(); + + input1 += ty * width; + if (input2_height > 1) { + input2 += ty * width; + } + for (int index = tid; index < width; index += block_size) { + real x = input1[index]; + real y = input2[index]; + xx[tid] += x * x; + yy[tid] += y * y; + xy[tid] += x * y; + } + __syncthreads(); + + for (int s = block_size / 2; s > 0; s >>= 1) { + if (tid < s) { + xx[tid] += xx[tid + s]; + yy[tid] += yy[tid + s]; + xy[tid] += xy[tid + s]; + } + __syncthreads(); + } + if (tid == 0) { + output[ty] = scale * xy[0] / (sqrt(xx[0]) * sqrt(yy[0])); + } +} + +void hlCossim(real* output, + const real* input1, + const real* input2, + size_t width, + size_t input1_height, + size_t input2_height, + real scale) { + CHECK_NOTNULL(output); + CHECK_NOTNULL(input1); + CHECK_NOTNULL(input2); + const int block_size = 256; + dim3 threads(block_size, 1); + dim3 grid(1, input1_height); + + KeCosSim<<>> + (output, input1, input2, width, input1_height, input2_height, scale); + CHECK_SYNC("hlCossim failed"); +} + +template <> +void CosSimForward(GpuMatrix& out_mat, + const GpuMatrix& in1_mat, + const GpuMatrix& in2_mat, + real scale) { + CHECK(out_mat.getData() && in1_mat.getData() && in2_mat.getData()); + CHECK(in1_mat.useGpu_ == true && in2_mat.useGpu_ == true) + << "Matrix type are not GPU"; + + size_t dim = in1_mat.getWidth(); + real* out = out_mat.getData(); + const real* x = in1_mat.getData(); + const real* y = in2_mat.getData(); + hlCossim(out, x, y, dim, in1_mat.getHeight(), in2_mat.getHeight(), scale); +} + +template +__global__ void KeCosSimDerivative(const real* grad, + const real* output, + const real* prev_out_x, + const real* prev_out_y, + real* prev_grad_x, + real* prev_grad_y, + size_t width, + size_t input1_height, + size_t input2_height, + real scale) { + const int ty = blockIdx.y; + int tid = threadIdx.x; + + __shared__ real xx[block_size]; + __shared__ real yy[block_size]; + __shared__ real xy[block_size]; + + xx[tid] = 0.0; + yy[tid] = 0.0; + xy[tid] = 0.0; + __syncthreads(); + + prev_out_x += ty * width; + prev_grad_x += ty * width; + if (input2_height > 1) { + prev_out_y += ty * width; + prev_grad_y += ty * width; + } + for (int index = tid; index < width; index += block_size) { + real x = prev_out_x[index]; + real y = prev_out_y[index]; + xx[tid] += x * x; + yy[tid] += y * y; + xy[tid] += x * y; + } + __syncthreads(); + + for (int s = block_size / 2; s > 0; s >>= 1) { + if (tid < s) { + xx[tid] += xx[tid + s]; + yy[tid] += yy[tid + s]; + xy[tid] += xy[tid + s]; + } + __syncthreads(); + } + if (xy[0] == 0) { + real reciprocal = 1.0 / (sqrt(xx[0]) * sqrt(yy[0])); + for (int index = tid; index < width; index += block_size) { + prev_grad_x[index] += + scale * grad[ty] * prev_out_y[index] * reciprocal; + if (input2_height > 1) { + prev_grad_y[index] += + scale * grad[ty] * prev_out_x[index] * reciprocal; + } else { + paddle::paddleAtomicAdd(prev_grad_y + index, + scale * grad[ty] * prev_out_x[index] * reciprocal); + } + } + } else { + real reciprocalXY = 1.0 / xy[0]; + real reciprocalSquareSumX = 1.0 / xx[0]; + real reciprocalSquareSumY = 1.0 / yy[0]; + for (int index = tid; index < width; index += block_size) { + prev_grad_x[index] += output[ty] * grad[ty] * + (prev_out_y[index] * reciprocalXY - + prev_out_x[index] * reciprocalSquareSumX); + if (input2_height > 1) { + prev_grad_y[index] += output[ty] * grad[ty] * + (prev_out_x[index] * reciprocalXY - + prev_out_y[index] * reciprocalSquareSumY); + } else { + paddle::paddleAtomicAdd(prev_grad_y + index, output[ty] * grad[ty] * + (prev_out_x[index] * reciprocalXY - + prev_out_y[index] * reciprocalSquareSumY)); + } + } + } +} + +void hlCossimDerivative(const real* grad, + const real* output, + const real* prev_out_x, + const real* prev_out_y, + real* prev_grad_x, + real* prev_grad_y, + size_t width, + size_t input1_height, + size_t input2_height, + real scale) { + CHECK_NOTNULL(grad); + CHECK_NOTNULL(output); + CHECK_NOTNULL(prev_out_x); + CHECK_NOTNULL(prev_out_y); + CHECK_NOTNULL(prev_grad_x); + CHECK_NOTNULL(prev_grad_y); + const int block_size = 256; + dim3 threads(block_size, 1); + dim3 grid(1, input1_height); + KeCosSimDerivative<<>> + (grad, output, prev_out_x, prev_out_y, prev_grad_x, prev_grad_y, width, + input1_height, input2_height, scale); + CHECK_SYNC("hlCossimDerivate failed"); +} + +template <> +void CosSimBackward(const GpuMatrix& out_grad, + const GpuMatrix& out_val, + const GpuMatrix& in1_val, + const GpuMatrix& in2_val, + GpuMatrix& in1_grad, + GpuMatrix& in2_grad, + real scale) { + CHECK(out_grad.getData() && out_val.getData() && in1_val.getData() && + in2_val.getData() && in1_grad.getData() && in2_grad.getData()); + CHECK(out_grad.useGpu_ && out_val.useGpu_ && in1_val.useGpu_ + && in2_val.useGpu_ && in1_grad.useGpu_ && in2_grad.useGpu_) + << "Matrix types are not equally GPU"; + + size_t dim = in1_val.getWidth(); + const real* grad = out_grad.getData(); + const real* out = out_val.getData(); + const real* prev_out_x = in1_val.getData(); + const real* prev_out_y = in2_val.getData(); + real* prev_grad_x = in1_grad.getData(); + real* prev_grad_y = in2_grad.getData(); + hlCossimDerivative(grad, + out, + prev_out_x, + prev_out_y, + prev_grad_x, + prev_grad_y, + dim, + in1_val.getHeight(), + in2_val.getHeight(), + scale); +} + +} // namespace paddle diff --git a/paddle/function/CosSimOpTest.cpp b/paddle/function/CosSimOpTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..48c815f027161b48c17ce654ab819156fd856199 --- /dev/null +++ b/paddle/function/CosSimOpTest.cpp @@ -0,0 +1,64 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "FunctionTest.h" +#include "paddle/math/Matrix.h" + +using namespace paddle; // NOLINT + +void testCosSimForward(size_t height_x, + size_t height_y, + size_t width, + real scale) { + FunctionCompare test("CosSimForward", FuncConfig().set("scale", scale)); + // prepare input arguments + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, width})); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_y, width})); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1}), + ASSIGN_TO); + // run Function + test.run(); +} + +void testCosSimBackward(size_t height_x, + size_t height_y, + size_t width, + real scale) { + FunctionCompare test("CosSimBackward", FuncConfig().set("scale", scale)); + // prepare input arguments + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1})); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1})); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, width})); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_y, width})); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, width}), + ADD_TO); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_y, width}), + ADD_TO); + // run Function + test.run(); +} + +TEST(Matrix, cosSim) { + for (auto height_x : {10, 100, 1000}) { + for (auto height_y : {1, height_x}) { + for (auto width : {10, 100, 1000}) { + for (auto scale : {1.0, 2.0}) { + testCosSimForward(height_x, height_y, width, scale); + testCosSimBackward(height_x, height_y, width, scale); + } + } + } + } +} diff --git a/paddle/function/CrossMapNormalOp.cpp b/paddle/function/CrossMapNormalOp.cpp index 8e7dc72524a7680a03ea6eb4770a3e25c09ad913..ef878bfbba961bdd3d5212e19fb83bb1e285e47f 100644 --- a/paddle/function/CrossMapNormalOp.cpp +++ b/paddle/function/CrossMapNormalOp.cpp @@ -112,48 +112,114 @@ void CrossMapNormalGrad(real* inputsGrad, } /** - * \brief {o_0, o_1} = calc(i_0) + * \brief Normalization with across maps. * - * \param inputs[0] input value. - * \param outputs[0] output value. - * \param outputs[1] denoms. + * This Function comes from the paper + * "ImageNet Classification with Deep Convolutional Neural Networks". + * + * The original formula is: + * + * Input(i, x, y) + * Output(i, x, y) = ---------------------------------------------- + * -- upper + * (k + alpha * > (Input(j, x, y))^2) ^ (beta) + * -- j = lower + * + * upper is `min(C, c + N/2)` + * lower if `max(0, c - N/2)` + * + * Function implementation: + * + * inputs and outpus is NCHW format, while input.shape.ndims() is equal 4. + * And the meaning of each dimension(0-3) is respectively batch size, + * feature maps, rows and columns. + * + * Input and Output in the above formula is for each map(i) of one image, and + * Input(i, x, y), Output(i, x, y) represents an element in an image. + * + * C is the number of feature maps of one image, and N is a hyper-parameters + * is configured when Function is initialized. The sum in the denominator + * is the sum of the same position in the neighboring maps. + * + * In the implementation of Function, k is equal to 1, + * so Function has no argument for k. + * + * Function Arguments: + * + * \param size_ represent N + * \param scale_ represent alpha + * \param pow_ represent beta + * \param inputs[0] represent Input + * \param outputs[0] represent Output + * \param outputs[1] represent The denominator in the formula(except beta) + * + * Note: + * Save output[1] is to simplify the backward calculation. + * TODO, if only consider the forward calculation, we can optimize to + * remove the output[1]. */ template class CrossMapNormalFunc : public FunctionBase { public: void init(const FuncConfig& config) override { + // function arguments size_ = config.get("size"); scale_ = config.get("scale"); pow_ = config.get("pow"); + + // number of inputs and outputs + numInputs_ = 1; + numOutputs_ = 2; } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ((size_t)1, inputs.size()); - CHECK_EQ((size_t)2, outputs.size()); - - CHECK_EQ(inputs[0].shape().ndims(), (size_t)4); - CHECK(inputs[0].shape() == outputs[0].shape()); - CHECK(inputs[0].shape() == outputs[1].shape()); - + check(inputs, outputs); + // ArgType check still on here, + // not sure whether it is better to put inside the check. CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); CHECK_EQ(outputs[1].getArgType(), ASSIGN_TO); - size_t samples = inputs[0].shape()[0]; - size_t channels = inputs[0].shape()[1]; - size_t height = inputs[0].shape()[2]; - size_t width = inputs[0].shape()[3]; + size_t batchSize = inputs[0].shape()[0]; + size_t maps = inputs[0].shape()[1]; + size_t rows = inputs[0].shape()[2]; + size_t columns = inputs[0].shape()[3]; CrossMapNormal(outputs[0].data(), outputs[1].data(), inputs[0].data(), - samples, - channels, - height, - width, + batchSize, + maps, + rows, + columns, size_, scale_, pow_); } + void check(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(numInputs_, inputs.size()); + CHECK_EQ(numOutputs_, outputs.size()); + + CHECK_EQ(inputs[0].shape().ndims(), (size_t)4); + CHECK(inputs[0].shape() == outputs[0].shape()); + CHECK(inputs[0].shape() == outputs[1].shape()); + } + + // Only need the shape of the input, can calculate the + // floating-point operation. + size_t ops(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ((size_t)numInputs_, inputs.size()); + size_t batchSize = inputs[0].shape()[0]; + size_t maps = inputs[0].shape()[1]; + size_t rows = inputs[0].shape()[2]; + size_t columns = inputs[0].shape()[3]; + + // number of floating-point operations + // an approximate value + size_t ops = batchSize * maps * rows * columns * (size_ * 2 + 3); + + return ops; + } + private: size_t size_; real scale_; @@ -161,33 +227,53 @@ private: }; /** - * \brief {o_0} = calc(i_0, i_1, i_2, i_3) + * \brief Backward calculation for normalization with across maps. * - * \param inputs[0] input value. - * \param inputs[1] output value. - * \param inputs[2] output grad. - * \param inputs[3] denoms. - * \param outputs[0] input grad. + * Function implementation: + * + * The implementation of this Function is derived from the + * CrossMapNormalFunc implementation. + * + * InputGrad = OutputGrad * denoms ^ (-beta) + * -- upper + * + > (OutputGrad * OutputValue * (-2 * alpha * beta) / denoms) * InputValue + * -- lower + * + * The data of inputs/outputs format is the same as the forward interface + * and is NCHW. + * + * The upper and lower is the same as forward. The logic of the sum + * is also the same as forward. + * + * Function Arguments: + * + * \param size_ represent N + * \param scale_ represent alpha + * \param pow_ represent beta + * \param inputs[0] represent InputValue, inputs[0] of CrossMapNormalFunc + * \param inputs[1] represent OutputValue, outputs[0] of CrossMapNormalFunc + * \param inputs[2] represent OutputGrad + * \param inputs[3] represent denoms, outputs[1] of CrossMapNormalFunc + * This is the intermediate result that is + * preserved in the forward calculation. + * \param outputs[0] represent InputGrad */ template class CrossMapNormalGradFunc : public FunctionBase { public: void init(const FuncConfig& config) override { + // function arguments size_ = config.get("size"); scale_ = config.get("scale"); pow_ = config.get("pow"); + + // number of inputs and outputs + numInputs_ = 4; + numOutputs_ = 1; } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ((size_t)4, inputs.size()); - CHECK_EQ((size_t)1, outputs.size()); - - CHECK_EQ(inputs[0].shape().ndims(), (size_t)4); - CHECK(inputs[0].shape() == inputs[1].shape()); - CHECK(inputs[0].shape() == inputs[2].shape()); - CHECK(inputs[0].shape() == inputs[3].shape()); - CHECK(inputs[0].shape() == outputs[0].shape()); - + check(inputs, outputs); if (outputs[0].getArgType() != ADD_TO) { // Currently, some algorithm implementations are ASSIGN_TO mode, // if need to support the ADD_TO calculation, need to clear the output. @@ -196,25 +282,52 @@ public: tmp.zero(); } - size_t samples = inputs[0].shape()[0]; - size_t channels = inputs[0].shape()[1]; - size_t height = inputs[0].shape()[2]; - size_t width = inputs[0].shape()[3]; + size_t batchSize = inputs[0].shape()[0]; + size_t maps = inputs[0].shape()[1]; + size_t rows = inputs[0].shape()[2]; + size_t columns = inputs[0].shape()[3]; CrossMapNormalGrad(outputs[0].data(), inputs[0].data(), inputs[1].data(), inputs[2].data(), inputs[3].data(), - samples, - channels, - height, - width, + batchSize, + maps, + rows, + columns, size_, scale_, pow_); } + void check(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(numInputs_, inputs.size()); + CHECK_EQ(numOutputs_, outputs.size()); + + CHECK_EQ(inputs[0].shape().ndims(), (size_t)4); + CHECK(inputs[0].shape() == inputs[1].shape()); + CHECK(inputs[0].shape() == inputs[2].shape()); + CHECK(inputs[0].shape() == inputs[3].shape()); + CHECK(inputs[0].shape() == outputs[0].shape()); + } + + // Only need the shape of one input, can calculate the + // floating-point operation. + size_t ops(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_LT((size_t)1, inputs.size()); + size_t batchSize = inputs[0].shape()[0]; + size_t maps = inputs[0].shape()[1]; + size_t rows = inputs[0].shape()[2]; + size_t columns = inputs[0].shape()[3]; + + // number of floating-point operations + // an approximate value + size_t ops = batchSize * maps * rows * columns * (size_ * 4 + 2); + + return ops; + } + private: size_t size_; real scale_; diff --git a/paddle/function/Function.h b/paddle/function/Function.h index 9215c137eb8e85a9a03575104d7f89bbce441eba..3bbeb6e525f85bdde9a54c8d60146eaa30a1bb4d 100644 --- a/paddle/function/Function.h +++ b/paddle/function/Function.h @@ -153,7 +153,36 @@ public: virtual void calc(const BufferArgs& inputs, const BufferArgs& outputs) {} + // This member function is used to check whether the BufferType and shape of + // the inputs and outputs arguments of the Function are correct. + // General calc function which will call this check to do arguments check. + // And before the calc called, the caller can also check their own arguments. + virtual void check(const BufferArgs& inputs, const BufferArgs& outputs) {} + + // Calculate the number of floating-point operations of this Function. + // The inputs and outputs arguments do not need to contain the actual data, + // only the shape. + // And some Functions have the same input and output shapes, + // so you may not need to enter the complete number of arguments. + // But entering the full arguments is always correct for this interface. + virtual size_t ops(const BufferArgs& inputs, const BufferArgs& outputs) { + return 0; + } + + int getNumInputs() const { return numInputs_; } + + int getNumOutputs() const { return numOutputs_; } + static ClassRegistrar funcRegistrar_; + +protected: + // numInputs_ and numOutputs_ represents the maximum + // input and output supported by Function. + // Some functions are optimized for input and output, + // so when comparing the number of arguments, for these functions + // inputs.size() <= numInputs_ or outputs.size() <= numOutputs_ + size_t numInputs_; + size_t numOutputs_; }; #define FUNC_NAME(typeName, deviceName) #typeName "-" #deviceName diff --git a/paddle/function/FunctionTest.h b/paddle/function/FunctionTest.h index 24e7a36a43cfa8941535cb778aa1557ec5a0a6f4..0cfafdb27f55a3e6617d31a968d2a05fc77f5b46 100644 --- a/paddle/function/FunctionTest.h +++ b/paddle/function/FunctionTest.h @@ -13,7 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Function.h" -#include "paddle/math/Vector.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/SparseMatrix.h" #include "paddle/math/tests/TensorCheck.h" #include "paddle/testing/TestUtil.h" @@ -68,8 +69,56 @@ public: gpuMemory_.back()->getBuf(), input.valueType(), input.shape())); } + // assume one copy of sequence is shared by different SequenceArgs + void addSequence(const SequenceIdArg& input) { + CHECK_EQ(input.shape().ndims(), 1UL); + size_t batchSize = input.shape()[0]; + size_t numSeqs = batchSize / 10 + 1; + size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32); + cpuMemory_.emplace_back(std::make_shared(sizeId)); + gpuMemory_.emplace_back(std::make_shared(sizeId)); + cpuSeq_ = std::make_shared(cpuMemory_.back()->getBuf(), + TensorShape{numSeqs + 1}); + gpuSeq_ = std::make_shared(gpuMemory_.back()->getBuf(), + TensorShape{numSeqs + 1}); + /// init sequence Id + initArg(*cpuSeq_, batchSize); + + // todo(tianbing), delete it + CHECK_EQ(cpuSeq_->shape().getElements(), cpuSeq_->numSeqs() + 1); + + CpuIVector cpuSeq(cpuSeq_->shape().getElements(), (int*)cpuSeq_->data()); + GpuIVector gpuSeq(gpuSeq_->shape().getElements(), (int*)gpuSeq_->data()); + gpuSeq.copyFrom(cpuSeq); + } + + void addInputs(const SequenceArg& input) { + CHECK_EQ(input.shape().ndims(), 2UL); + size_t batchSize = input.shape()[0]; + if (!cpuSeq_ || !gpuSeq_) { // sequence not exist + addSequence(SequenceIdArg(TensorShape{batchSize})); + } + + size_t size = + input.shape().getElements() * sizeOfValuType(input.valueType()); + cpuMemory_.emplace_back(std::make_shared(size)); + gpuMemory_.emplace_back(std::make_shared(size)); + + /// SequenceArg + cpuInputs_.emplace_back( + std::make_shared(cpuMemory_.back()->getBuf(), + input.valueType(), + input.shape(), + *cpuSeq_)); + gpuInputs_.emplace_back( + std::make_shared(gpuMemory_.back()->getBuf(), + input.valueType(), + input.shape(), + *gpuSeq_)); + } + // output need only contains shape, do not contains data. - void addOutputs(const BufferArg& output) { + void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) { size_t size = output.shape().getElements() * sizeOfValuType(output.valueType()); cpuMemory_.emplace_back(std::make_shared(size)); @@ -79,38 +128,99 @@ public: std::make_shared(cpuMemory_.back()->getBuf(), output.valueType(), output.shape(), - ASSIGN_TO)); + argType)); gpuOutputs_.emplace_back( std::make_shared(gpuMemory_.back()->getBuf(), output.valueType(), output.shape(), - ASSIGN_TO)); + argType)); } - void addInputs(const SequenceArg& input) { - size_t batchSize = input.shape()[0]; - size_t numSeqs = batchSize / 10 + 1; + /// add and init output sparse matrix + void addOutputs(const SparseMatrixArg& output, ArgType argType = ASSIGN_TO) { + cpuSparse_ = std::make_shared( + output.shape()[0], + output.shape()[1], + output.nnz(), + static_cast(output.dataType()), + static_cast(output.dataFormat())); + + gpuSparse_ = std::make_shared( + output.shape()[0], + output.shape()[1], + output.nnz(), + static_cast(output.dataType()), + static_cast(output.dataFormat())); + + /// init sparse matrix + hl_stream_t stream(HPPL_STREAM_1); + cpuSparse_->randomizeUniform(); + gpuSparse_->copyFrom(*cpuSparse_, stream); + hl_stream_synchronize(stream); - size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32); - cpuMemory_.emplace_back(std::make_shared(sizeId)); - gpuMemory_.emplace_back(std::make_shared(sizeId)); + cpuOutputs_.emplace_back( + std::make_shared(*cpuSparse_, argType)); + gpuOutputs_.emplace_back( + std::make_shared(*gpuSparse_, argType)); + } - TensorShape seqsId({numSeqs + 1}); - // void* cpuBuffer = cpuMemory_.back()->getBuf(); - // void* gpuBuffer = gpuMemory_.back()->getBuf(); + void addOutputs(const SequenceArg& output, ArgType argType = ASSIGN_TO) { + CHECK_EQ(output.shape().ndims(), 2UL); + size_t batchSize = output.shape()[0]; + if (!cpuSeq_ || !gpuSeq_) { // sequence not exist + addSequence(SequenceIdArg(TensorShape{batchSize})); + } size_t size = - input.shape().getElements() * sizeOfValuType(input.valueType()); + output.shape().getElements() * sizeOfValuType(output.valueType()); cpuMemory_.emplace_back(std::make_shared(size)); gpuMemory_.emplace_back(std::make_shared(size)); - // TODO: need be implemented. + /// SequenceArg + cpuOutputs_.emplace_back( + std::make_shared(cpuMemory_.back()->getBuf(), + output.valueType(), + output.shape(), + *cpuSeq_, + argType)); + gpuOutputs_.emplace_back( + std::make_shared(gpuMemory_.back()->getBuf(), + output.valueType(), + output.shape(), + *gpuSeq_, + argType)); + } + + void addInputs(const SparseMatrixArg& input) { + cpuSparse_ = std::make_shared( + input.shape()[0], + input.shape()[1], + input.nnz(), + static_cast(input.dataType()), + static_cast(input.dataFormat())); + + gpuSparse_ = std::make_shared( + input.shape()[0], + input.shape()[1], + input.nnz(), + static_cast(input.dataType()), + static_cast(input.dataFormat())); + + /// init sparse matrix + hl_stream_t stream(HPPL_STREAM_1); + cpuSparse_->randomizeUniform(); + gpuSparse_->copyFrom(*cpuSparse_, stream); + hl_stream_synchronize(stream); + + cpuInputs_.emplace_back(std::make_shared(*cpuSparse_)); + gpuInputs_.emplace_back(std::make_shared(*gpuSparse_)); } void run() { // prepare cpu/gpu arguments initInputs(); + initOutputs(); // function calculate auto callFunction = [](FunctionBase* function, std::vector& inputs, @@ -129,7 +239,7 @@ public: callFunction(cpuFunc_.get(), cpuInputs_, cpuOutputs_); callFunction(gpuFunc_.get(), gpuInputs_, gpuOutputs_); - // check outputs and inouts + // check outputs compareOutputs(); } @@ -138,10 +248,44 @@ public: std::shared_ptr getGpuFunction() const { return gpuFunc_; } protected: + // only init cpu argument, gpu argument copy from cpu argument. + void initArg(BufferArg& arg) { + CpuVector vector(arg.shape().getElements(), (real*)arg.data()); + vector.uniform(0.001, 1); + } + + void initArg(SequenceArg& arg) { + /// init only matrix + CpuVector vector(arg.shape().getElements(), (real*)arg.data()); + vector.uniform(0.001, 1); + } + + void initArg(SequenceIdArg& arg, size_t batchSize) { + size_t numSeqs = arg.numSeqs(); + int* buf = reinterpret_cast(arg.data()); + int pos = 0; + size_t maxLen = 2 * batchSize / numSeqs; + for (int i = 0; i < (int)numSeqs; ++i) { + int len = 1 + uniformRandom(std::min( + maxLen, batchSize - pos - numSeqs + i)); + buf[i] = pos; + pos += len; + VLOG(1) << " len=" << len; + } + buf[numSeqs] = batchSize; + } + void initInputs() { for (size_t i = 0; i < cpuInputs_.size(); i++) { - initArg(*cpuInputs_[i]); + if (cpuInputs_[i]->isSparseArg()) { + continue; /// sparse matrix already init + } + if (cpuInputs_[i]->isSequenceArg()) { + initArg(dynamic_cast(*cpuInputs_[i])); + } else { + initArg(*cpuInputs_[i]); + } // TODO: Need a BufferCopy used to copy from one BufferArg to another. CpuVector cpuVector(cpuInputs_[i]->shape().getElements(), (real*)cpuInputs_[i]->data()); @@ -152,38 +296,38 @@ protected: } } - void compareOutputs() { + void initOutputs() { for (size_t i = 0; i < cpuOutputs_.size(); i++) { - // TODO, Need a BufferCheck used to compare the two buffers. - auto cpu = cpuOutputs_[i]; - auto gpu = gpuOutputs_[i]; - CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data()); - GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data()); + if (cpuOutputs_[i]->isSparseArg()) { + continue; /// sparse matrix already init + } - autotest::TensorCheckErr(cpuVector, gpuVector); - } - } + if (cpuOutputs_[i]->isSequenceArg()) { + initArg(dynamic_cast(*cpuOutputs_[i])); + } else { + initArg(*cpuOutputs_[i]); + } - // only init cpu argument, gpu argument copy from cpu argument. - void initArg(BufferArg& arg) { - CpuVector vector(arg.shape().getElements(), (real*)arg.data()); - vector.uniform(0.001, 1); + // TODO: Need a BufferCopy used to copy from one BufferArg to another. + CpuVector cpuVector(cpuOutputs_[i]->shape().getElements(), + (real*)cpuOutputs_[i]->data()); + GpuVector gpuVector(gpuOutputs_[i]->shape().getElements(), + (real*)gpuOutputs_[i]->data()); + + gpuVector.copyFrom(cpuVector); + } } - void initArg(SequenceIdArg& arg, size_t batchSize) { - size_t numSeqs = arg.numSeqs(); - int* buf = reinterpret_cast(arg.data()); - int pos = 0; - size_t maxLen = 2 * batchSize / numSeqs; - for (int i = 0; i < (int)numSeqs; ++i) { - int len = uniformRandom( - std::min(maxLen, batchSize - pos - numSeqs + i)) + - 1; - buf[i] = pos; - pos += len; - VLOG(1) << " len=" << len; + void compareOutputs() { + for (size_t i = 0; i < cpuOutputs_.size(); i++) { + // TODO, Need a BufferCheck used to compare the two buffers. + const auto cpu = cpuOutputs_[i]; + const auto gpu = gpuOutputs_[i]; + CHECK_EQ(cpu->numElements(), gpu->numElements()); + CpuVector cpuVector(cpu->numElements(), (real*)cpu->data()); + GpuVector gpuVector(gpu->numElements(), (real*)gpu->data()); + autotest::TensorCheckErr(cpuVector, gpuVector); } - buf[numSeqs] = batchSize; } protected: @@ -195,6 +339,10 @@ protected: std::vector cpuOutputs_; std::vector gpuInputs_; std::vector gpuOutputs_; + std::shared_ptr cpuSparse_; + std::shared_ptr gpuSparse_; + std::shared_ptr cpuSeq_; + std::shared_ptr gpuSeq_; }; } // namespace paddle diff --git a/paddle/function/MulOp.cpp b/paddle/function/MulOp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..91b4b8ed91b6055babcfbab8f7adb2c55e2747d0 --- /dev/null +++ b/paddle/function/MulOp.cpp @@ -0,0 +1,354 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MulOp.h" +/// todo(tianbing), delete it +#include +#include "paddle/math/MathFunctions.h" +#include "paddle/math/SIMDFunctions.h" +#include "paddle/utils/ThreadLocal.h" + +#ifndef PADDLE_TYPE_DOUBLE +#define GEMM paddle::gemm +#else +#define GEMM paddle::gemm +#endif + +namespace { +inline void vecAddTo(real* a, const real* b, real scaleB, size_t len) { + for (unsigned int i = 0; i < len; ++i) { + a[i] += (1.0 == scaleB) ? b[i] : scaleB * b[i]; + } +} + +inline void colVecAddTo( + real* a, real* b, real c, size_t len, size_t aWidth, size_t bWidth) { + for (unsigned int i = 0; i < len; ++i) { + a[i * aWidth] += (1.0 == c) ? b[i * bWidth] : b[i * bWidth] * c; + } +} +} // namespace + +namespace paddle { +/// sparse matrix (+)= dense matrix * dense matrix +template <> +void MulOp(CpuSparseMatrix& out, + const CpuMatrix& a, + const CpuMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans) { + CHECK_EQ(out.getValueType(), FLOAT_VALUE); + if (scaleT == 0) { + out.zeroMem(); + } + const real* A = a.getData(); + const real* B = b.getData(); + real* C = out.getValue(); + int* rows = out.getRows(); + int* cols = out.getCols(); + size_t width = out.getWidth(); + size_t height = out.getHeight(); + + /// SPARSE_CSC, {a any, b not trans} + if (out.getFormat() == SPARSE_CSC) { + /// b not trans and a any + CHECK(!bTrans); + size_t m = !aTrans ? a.getWidth() : a.getHeight(); + for (size_t i = 0; i < width; i++) { + size_t start = out.getColStartIdx(i); + size_t end = out.getColStartIdx(i + 1); + for (size_t j = start; j < end; j++) { + real sum = 0; + size_t rowIdx = rows[j]; + for (size_t k = 0; k < m; k++) { + sum += (!aTrans ? A[rowIdx * m + k] : A[k * height + rowIdx]) * + B[k * width + i]; + } + C[j] = scaleAB * sum + scaleT * C[j]; + } + } + return; + } + + /// SPARSE_CSR, {a any, b not trans} or {a not trans, b trans} + if (out.getFormat() == SPARSE_CSR) { + /// a and b can not both transpose + CHECK(!(aTrans && bTrans)); + size_t m = a.getWidth(); + for (size_t i = 0; i < height; i++) { + size_t start = out.getRowStartIdx(i); + size_t end = out.getRowStartIdx(i + 1); + for (size_t j = start; j < end; j++) { + real sum = 0; + size_t colIdx = cols[j]; + for (size_t k = 0; k < m; k++) { + sum += (!aTrans ? A[i * m + k] : A[k * height + i]) * + (!bTrans ? B[k * width + colIdx] : B[colIdx * m + k]); + } + C[j] = scaleAB * sum + scaleT * C[j]; + } + } + return; + } +} + +/// dense matrix (+)= dense matrix * dense matrix +template <> +void MulOp(CpuMatrix& out, + const CpuMatrix& a, + const CpuMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans) { + GEMM(aTrans ? CblasTrans : CblasNoTrans, + bTrans ? CblasTrans : CblasNoTrans, + out.getHeight(), + out.getWidth(), + !aTrans ? a.getWidth() : a.getHeight(), + scaleAB, + a.getData(), + a.getStride(), + b.getData(), + b.getStride(), + scaleT, + out.getData(), + out.getStride()); +} + +/// dense matrix (+)= sparse matrix * dense matrix +template <> +void MulOp(CpuMatrix& out, + const CpuSparseMatrix& a, + const CpuMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans) { + if (scaleT == 0) { + out.zeroMem(); + } + const real* B = b.getData(); + real* C = out.getData(); + if (out.getWidth() % 32 == 0) { + CHECK_EQ((size_t)B % 32, 0UL); + CHECK_EQ((size_t)C % 32, 0UL); + } + + int* cols = a.getCols(); + real* values = a.getValue(); + for (size_t i = 0; i < a.getHeight(); ++i) { + const int start = a.getRowStartIdx(i); + const int end = a.getRowStartIdx(i + 1); + for (int j = start; j < end; ++j) { + vecAddTo(!aTrans ? out.getRow(i) : out.getRow(cols[j]), + !aTrans ? const_cast(b).getRow(cols[j]) + : const_cast(b).getRow(i), + (a.getValueType() == FLOAT_VALUE) ? values[j] : (real)1.0, + out.getWidth()); + } + } +} + +/// dense matrix (+)= dense matrix * sparse matrix +template <> +void MulOp(CpuMatrix& out, + const CpuMatrix& a, + const CpuSparseMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans) { + if (scaleT == 0) { + out.zeroMem(); + } + real* A = const_cast(a.getData()); + real* B = const_cast(b.getValue()); + real* C = out.getData(); + int* rows = b.getRows(); + int* cols = b.getCols(); + + /// SPARSE_CSC format + if (b.getFormat() == SPARSE_CSC) { + for (size_t j = 0; j < b.getWidth(); ++j) { + int start = b.getColStartIdx(j); + int end = b.getColStartIdx(j + 1); + for (int i = start; i < end; ++i) { + colVecAddTo(!bTrans ? C + j : C + rows[i], + !bTrans ? A + rows[i] : A + j, + (b.getValueType() == NO_VALUE) ? (real)1.0 : B[i], + out.getHeight(), + out.getWidth(), + a.getWidth()); + } + } + return; + } + + /// SPARSE_CSR format + if (b.getFormat() == SPARSE_CSR) { + for (size_t j = 0; j < b.getHeight(); ++j) { + int start = b.getRowStartIdx(j); + int end = b.getRowStartIdx(j + 1); + for (int i = start; i < end; ++i) { + colVecAddTo(!bTrans ? C + cols[i] : C + j, + !bTrans ? A + j : A + cols[i], + (b.getValueType() == NO_VALUE) ? (real)1.0 : B[i], + out.getHeight(), + out.getWidth(), + a.getWidth()); + } + } + return; + } +} + +/** + * mul operator + * out = scaleT * out + scaleAB * (A * B) + * here, scaleT in {0, 1}, scaleAB == 1, + * out = A * B, ASSIGN_TO + * out += A * B, ADD_TO + * + * + * \param outputs[0] output matrix (out), M * N, + * could be either Sparse or Dense Matrix + * M is num of rows, N is num of columns + * \param inputs[0] first input matrix (A), M * K (if non-trans) + * could be either Sparse or Dense Matrix + * M is num of rows, K is num of columns + * \param inputs[1] second input matrix (B), K * N (if non-trans) + * could be either Sparse or Dense Matrix + * K is num of rows, N is num of columns + * + * Support eight Mul operators, with both GPU and CPU devices + * For each device, four Mul operators are supported: + * 1. dense (out) = dense (A) * dense (B) + * 2. dense (out) = sparse (A) * dense (B) + * sparse matrix only support SPARSE_CSR format + * 3. dense (out) = dense (A) * sparse (B) + * sparse matrix support SPARSE_CSC and SPARSE_CSR formats + * 4. sparse (out) = dense (A) * dense (B) + * sparse matrix support SPARSE_CSC and SPARSE_CSR formats + * + */ +template +class MulFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + aTrans_ = config.get("aTrans"); + bTrans_ = config.get("bTrans"); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK(!aTrans_ || !bTrans_) + << "Not support both a and b are transpose matrices"; + + CHECK_EQ((size_t)2, inputs.size()); + CHECK_EQ((size_t)1, outputs.size()); + CHECK(inputs[0].data() && inputs[1].data() && outputs[0].data()); + CHECK_EQ(inputs[0].shape().ndims(), (size_t)2); + CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); + CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); + + size_t aRow = !aTrans_ ? inputs[0].shape()[0] : inputs[0].shape()[1]; + size_t aCol = !aTrans_ ? inputs[0].shape()[1] : inputs[0].shape()[0]; + size_t bRow = !bTrans_ ? inputs[1].shape()[0] : inputs[1].shape()[1]; + size_t bCol = !bTrans_ ? inputs[1].shape()[1] : inputs[1].shape()[0]; + /// C = A * B, or C += A * B, for matrix format + CHECK_EQ(aCol, bRow); + CHECK_EQ(aRow, outputs[0].shape()[0]); + CHECK_EQ(bCol, outputs[0].shape()[1]); + + /// only support C = A * B (ASSIGN_TO) or C += A * B (ADD_TO) + real scaleT = (outputs[0].getArgType() == ADD_TO) ? 1.0 : 0.0; + + /// support dense = not both sparse * sparse + /// or sparse = dense * dense + CHECK((!outputs[0].isSparseArg() && + !(inputs[0].isSparseArg() && inputs[1].isSparseArg())) || + (outputs[0].isSparseArg() && !inputs[0].isSparseArg() && + !inputs[1].isSparseArg())); + + auto outMat = outputs[0].matrix(); + /// dense matrix = dense matrix * dense matrix + if (!inputs[0].isSparseArg() && !inputs[1].isSparseArg() && + !outputs[0].isSparseArg()) { + MulOp(outMat, + inputs[0].matrix(), + inputs[1].matrix(), + 1.0, // scaleAB + scaleT, + aTrans_, + bTrans_); + return; + } + + /// dense matrix = dense matrix * sparse matrix + if (!inputs[0].isSparseArg() && inputs[1].isSparseArg() && + !outputs[0].isSparseArg()) { + CHECK(!aTrans_) << "Not supported a transpose"; + MulOp(outMat, + inputs[0].matrix(), + inputs[1].sparse().SparseMatrix(), + 1.0, // scaleAB + scaleT, + aTrans_, + bTrans_); + return; + } + + /// dense matrix = sparse matrix * dense matrix + if (inputs[0].isSparseArg() && !inputs[1].isSparseArg() && + !outputs[0].isSparseArg()) { + CHECK(!bTrans_) << "Not supported b transpose"; + CHECK_EQ(inputs[0].sparse().dataFormat(), T_SPARSE_CSR) + << "Only supported SPARSE_CSR format for sparse matrix a"; + MulOp(outMat, + inputs[0].sparse().SparseMatrix(), + inputs[1].matrix(), + 1.0, // scaleAB + scaleT, + aTrans_, + bTrans_); + return; + } + + /// sparse matrix = dense matrix * dense matrix + auto outSparseMat = outputs[0].sparse().SparseMatrix(); + if (!inputs[0].isSparseArg() && !inputs[1].isSparseArg() && + outputs[0].isSparseArg()) { + MulOp(outSparseMat, + inputs[0].matrix(), + inputs[1].matrix(), + 1.0, // scaleAB + scaleT, + aTrans_, + bTrans_); + return; + } + } + +private: + bool aTrans_; + bool bTrans_; +}; + +REGISTER_TYPED_FUNC(MulOp, CPU, MulFunc); +#ifndef PADDLE_ONLY_CPU +REGISTER_TYPED_FUNC(MulOp, GPU, MulFunc); +#endif +} // namespace paddle diff --git a/paddle/function/MulOp.h b/paddle/function/MulOp.h new file mode 100644 index 0000000000000000000000000000000000000000..b6016a6ab6e9d6549b359573ecc2b33900a58365 --- /dev/null +++ b/paddle/function/MulOp.h @@ -0,0 +1,102 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Function.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/SparseMatrix.h" + +namespace paddle { +/// CPU, dense matrix (+)= dense matrix * dense matrix +template +void MulOp(CpuMatrix& out, + const CpuMatrix& a, + const CpuMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans); + +/// CPU, dense matrix (+)= sparse matrix * dense matrix +template +void MulOp(CpuMatrix& out, + const CpuSparseMatrix& a, + const CpuMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans); + +/// CPU, dense matrix (+)= dense matrix * sparse matrix +template +void MulOp(CpuMatrix& out, + const CpuMatrix& a, + const CpuSparseMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans); + +/// CPU, sparse matrix (+)= dense matrix * dense matrix +template +void MulOp(CpuSparseMatrix& out, + const CpuMatrix& a, + const CpuMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans); + +/// GPU, dense matrix (+)= dense matrix * dense matrix +template +void MulOp(GpuMatrix& out, + const GpuMatrix& a, + const GpuMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans); + +/// GPU, dense matrix (+)= sparse matrix * dense matrix +template +void MulOp(GpuMatrix& out, + const GpuSparseMatrix& a, + const GpuMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans); + +/// GPU, dense matrix (+)= dense matrix * sparse matrix +template +void MulOp(GpuMatrix& out, + const GpuMatrix& a, + const GpuSparseMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans); + +/// GPU, sparse matrix (+)= dense matrix * dense matrix +template +void MulOp(GpuSparseMatrix& out, + const GpuMatrix& a, + const GpuMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans); + +} // namespace paddle diff --git a/paddle/function/MulOpGpu.cu b/paddle/function/MulOpGpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..dcfcb2325d7dae22e0e0e78fc0bddf061fc0940c --- /dev/null +++ b/paddle/function/MulOpGpu.cu @@ -0,0 +1,130 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "hl_base.h" +#include "MulOp.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/SparseMatrix.h" + +namespace paddle { +/// dense matrix (+)= dense matrix * dense matrix +template <> +void MulOp(GpuMatrix& out, + const GpuMatrix& a, + const GpuMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans) { + CHECK(a.useGpu_ && b.useGpu_) << "matrix device type not match"; + hl_matrix_mul(const_cast(a.getData()), + !aTrans ? HPPL_OP_N : HPPL_OP_T, + const_cast(b.getData()), + !bTrans ? HPPL_OP_N : HPPL_OP_T, + const_cast(out.getData()), + out.getHeight(), + out.getWidth(), + !aTrans ? a.getWidth() : a.getHeight(), + scaleAB, + scaleT, + a.getStride(), + b.getStride(), + out.getStride()); +} + +/// dense matrix (+)= sparse matrix * dense matrix +template <> +void MulOp(GpuMatrix& out, + const GpuSparseMatrix& a, + const GpuMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans) { + CHECK(out.isContiguous()); + CHECK(b.isContiguous()); + CHECK(a.useGpu_ && b.useGpu_) << "matrix device type not match"; + hl_matrix_csr_mul_dense(a.sMatrix_.get(), + aTrans ? HPPL_OP_T : HPPL_OP_N, + const_cast(b.getData()), + HPPL_OP_N, + const_cast(out.getData()), + out.getHeight(), + out.getWidth(), + b.getHeight(), + scaleAB, + scaleT); +} + +/// dense matrix (+)= dense matrix * sparse matrix +template <> +void MulOp(GpuMatrix& out, + const GpuMatrix& a, + const GpuSparseMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans) { + CHECK(out.isContiguous()); + CHECK(a.isContiguous()); + CHECK(a.useGpu_ && b.useGpu_) << "matrix device type not match"; + + if (b.format_ == SPARSE_CSC) { + hl_matrix_dense_mul_csc(const_cast(a.getData()), + HPPL_OP_N, + b.sMatrix_.get(), + bTrans ? HPPL_OP_T : HPPL_OP_N, + const_cast(out.getData()), + out.getHeight(), + out.getWidth(), + a.getWidth(), + scaleAB, + scaleT); + } else { + hl_matrix_dense_mul_csr(const_cast(a.getData()), + HPPL_OP_N, + b.sMatrix_.get(), + bTrans ? HPPL_OP_T : HPPL_OP_N, + const_cast(out.getData()), + out.getHeight(), + out.getWidth(), + a.getWidth(), + scaleAB, + scaleT); + } +} + +/// sparse matrix (+)= dense matrix * dense matrix +template <> +void MulOp(GpuSparseMatrix& out, + const GpuMatrix& a, + const GpuMatrix& b, + real scaleAB, + real scaleT, + bool aTrans, + bool bTrans) { + CHECK(a.useGpu_ && b.useGpu_) << "matrix device type not match"; + hl_sparse_matrix_mul(const_cast(a.getData()), + aTrans ? HPPL_OP_T : HPPL_OP_N, + const_cast(b.getData()), + bTrans ? HPPL_OP_T : HPPL_OP_N, + out.sMatrix_.get(), + out.getHeight(), + out.getWidth(), + !bTrans ? b.getHeight() : b.getWidth(), + scaleAB, + scaleT); +} + +} // namespace paddle diff --git a/paddle/function/MulOpTest.cpp b/paddle/function/MulOpTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8748eb0d79fa0fcb0935eac5bb37b44274128aa0 --- /dev/null +++ b/paddle/function/MulOpTest.cpp @@ -0,0 +1,212 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "FunctionTest.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/SparseMatrix.h" +#include "paddle/math/tests/test_matrixUtil.h" +#include "paddle/testing/TestUtil.h" + +using namespace paddle; // NOLINT + +/** + * C += A * B, A, B, C dense matrix + * dense = dense * dense + */ +void testFuncDDDMatrix( + bool transa, bool transb, size_t dimM, size_t dimN, size_t dimK) { + real scaleT = 1.0; + size_t heightA = (transa == false) ? dimM : dimK; + size_t widthA = (transa == false) ? dimK : dimM; + size_t heightB = (transb == false) ? dimK : dimN; + size_t widthB = (transb == false) ? dimN : dimK; + size_t heightC = dimM; + size_t widthC = dimN; + // init Test object + FunctionCompare test( + "MulOp", FuncConfig().set("aTrans", transa).set("bTrans", transb)); + // prepare input arguments + /// matrix A : HA * WA + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{heightA, widthA})); + /// matrix B: HB * WB + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{heightB, widthB})); + + /// output matrix C: HC * WC + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{heightC, widthC}), + scaleT == 1.0 ? ADD_TO : ASSIGN_TO); + // run Function + test.run(); +} + +TEST(MulOp, DDDMatrixMul) { + LOG(INFO) << "function test for dense = dense * dense matrix"; + for (const auto transa : {false, true}) { + for (const auto transb : {false, true}) { + for (const auto dimM : {1, 10, 100}) { + for (const auto dimN : {1, 10}) { + for (const auto dimK : {8}) { + if (transa && transb) { + continue; + } + VLOG(3) << std::setiosflags(std::ios::left) << std::setfill(' ') + << " transa=" << transa << " transb=" << transb + << " dimM=" << std::setw(5) << dimM + << " dimN=" << std::setw(5) << dimN + << " dimK=" << std::setw(5) << dimK; + testFuncDDDMatrix(transa, transb, dimM, dimN, dimK); + } + } + } + } + } +} + +/** + * C += A * B, B, C dense, A sparse + * dense = sparse * dense + */ +void testFuncDSparseDMatrix( + size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { + real scaleT = 1.0; + // init Test object + FunctionCompare test("MulOp", + FuncConfig().set("aTrans", false).set("bTrans", false)); + // prepare input arguments + /// sparse matrix A : M * K + test.addInputs(SparseMatrixArg( + VALUE_TYPE_FLOAT, TensorShape{dimM, dimK}, nnz, FORMAT, FLOAT_VALUE)); + /// matrix B: K * N + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimK, dimN})); + + /// output matrix C: M * N + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimN}), + scaleT == 1.0 ? ADD_TO : ASSIGN_TO); + // run Function + test.run(); +} + +TEST(MuLOp, DSparseDMul) { + LOG(INFO) << "function test for dense = sparse * dense matrix"; + for (const auto dimM : {10, 100, 1000}) { + for (const auto dimN : {10, 100}) { + for (const auto dimK : {3, 10}) { + for (const auto nnz : {3, 10}) { + for (const auto FORMAT : {SPARSE_CSR}) { + VLOG(3) << std::setiosflags(std::ios::left) << std::setfill(' ') + << " dimM=" << std::setw(5) << dimM + << " dimN=" << std::setw(5) << dimN + << " dimK=" << std::setw(5) << dimK + << " nnz=" << std::setw(5) << nnz + << " format=" << std::setw(5) << FORMAT; + testFuncDSparseDMatrix(dimM, dimN, dimK, nnz, FORMAT); + } + } + } + } + } +} + +/** + * C += A * B, A, C dense, B sparse + * dense = dense * sparse + */ +void testFuncDDSparseMatrix( + size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { + real scaleT = 1.0; + // init Test object + FunctionCompare test("MulOp", + FuncConfig().set("aTrans", false).set("bTrans", false)); + // prepare input arguments + /// matrix A : M * K + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK})); + + /// matrix B: K * N + test.addInputs(SparseMatrixArg( + VALUE_TYPE_FLOAT, TensorShape{dimK, dimN}, nnz, FORMAT, FLOAT_VALUE)); + + /// output matrix C: M * N + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimN}), + scaleT == 1.0 ? ADD_TO : ASSIGN_TO); + // run Function + test.run(); +} + +TEST(MulOp, DDSparseMul) { + LOG(INFO) << "function test for dense = dense * sparse matrix"; + for (const auto dimM : {10, 100, 1000}) { + for (const auto dimN : {10, 100}) { + for (const auto dimK : {3, 10}) { + for (const auto nnz : {3, 10}) { + for (const auto FORMAT : {SPARSE_CSR, SPARSE_CSC}) { + VLOG(3) << std::setiosflags(std::ios::left) << std::setfill(' ') + << " dimM=" << std::setw(5) << dimM + << " dimN=" << std::setw(5) << dimN + << " dimK=" << std::setw(5) << dimK + << " nnz=" << std::setw(5) << nnz + << " format=" << std::setw(5) << FORMAT; + testFuncDDSparseMatrix(dimM, dimN, dimK, nnz, FORMAT); + } + } + } + } + } +} + +/** + * C += A * B, A sparse, B, C dense + * sparse = dense * dense + */ +void testFuncSparseDDMatrix( + size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { + real scaleT = 1.0; + // init Test object + FunctionCompare test("MulOp", + FuncConfig().set("aTrans", false).set("bTrans", false)); + // prepare input arguments + /// matrix A : M * K + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK})); + + /// matrix B: K * N + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimK, dimN})); + + /// output sparse matrix C: M * N + test.addOutputs( + SparseMatrixArg( + VALUE_TYPE_FLOAT, TensorShape{dimM, dimN}, nnz, FORMAT, FLOAT_VALUE), + scaleT == 1.0 ? ADD_TO : ASSIGN_TO); + // run Function + test.run(); +} + +TEST(MulOp, SparseDDMul) { + LOG(INFO) << "function test for sparse = dense * dense matrix"; + for (const auto dimM : {10, 100, 1000}) { + for (const auto dimN : {10, 100}) { + for (const auto dimK : {3, 10}) { + for (const auto nnz : {3, 10}) { + for (const auto FORMAT : {SPARSE_CSC, SPARSE_CSR}) { + VLOG(3) << std::setiosflags(std::ios::left) << std::setfill(' ') + << " dimM=" << std::setw(5) << dimM + << " dimN=" << std::setw(5) << dimN + << " dimK=" << std::setw(5) << dimK + << " nnz=" << std::setw(5) << nnz + << " format=" << std::setw(5) << FORMAT; + testFuncSparseDDMatrix(dimM, dimN, dimK, nnz, FORMAT); + } + } + } + } + } +} diff --git a/paddle/function/PadOp.cpp b/paddle/function/PadOp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f1a0d2a1a96f24ddff8cd120681a8bc8cddaf40a --- /dev/null +++ b/paddle/function/PadOp.cpp @@ -0,0 +1,223 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "PadOp.h" +#include "paddle/math/Vector.h" + +namespace paddle { + +template <> +void Pad(real* outputs, + const real* inputs, + const int num, + const int inC, + const int inH, + const int inW, + const PadConf& pad) { + int cstart = pad.channelStart, cend = pad.channelEnd; + int hstart = pad.heightStart, hend = pad.heightEnd; + int wstart = pad.widthStart, wend = pad.widthEnd; + int outC = inC + cstart + cend; + int outH = inH + hstart + hend; + int outW = inW + wstart + wend; + for (int i = 0; i < num; i++) { + for (int c = 0; c < inC; c++) { + for (int h = 0; h < inH; h++) { + int inoff = ((i * inC + c) * inH + h) * inW; + int outoff = + ((i * outC + c + cstart) * outH + h + hstart) * outW + wstart; + memcpy(outputs + outoff, inputs + inoff, inW * sizeof(real)); + } + } + } +} + +template <> +void PadGrad(real* inGrad, + const real* outGrad, + const int num, + const int inC, + const int inH, + const int inW, + const PadConf& pad) { + int cstart = pad.channelStart, cend = pad.channelEnd; + int hstart = pad.heightStart, hend = pad.heightEnd; + int wstart = pad.widthStart, wend = pad.widthEnd; + int outC = inC + cstart + cend; + int outH = inH + hstart + hend; + int outW = inW + wstart + wend; + for (int i = 0; i < num; i++) { + for (int c = 0; c < inC; c++) { + for (int h = 0; h < inH; h++) { + int inoff = ((i * inC + c) * inH + h) * inW; + int outoff = + ((i * outC + c + cstart) * outH + h + hstart) * outW + wstart; + CpuVector inG = CpuVector(inW, inGrad + inoff); + CpuVector outG = CpuVector(inW, const_cast(outGrad + outoff)); + inG += outG; + } + } + } +} + +/** + * \brief Padding zeros to input according to the specify dimension. + * The struct pad_ contains the padding size in each dimension. + * The input and output is a 4D tensor. In PadFunc, we only + * pad zeros to the 2nd to 4th dimension. + * + * Argument in this Function: + * \param pad_ A struct object contains the padding size in each dimension. + * It has six integers. The channelStart and channelEnd indicate + * how many zeros to add before and after the input in channel + * dimension. And the heightStart and heightEnd indicate padding + * in height dimension. The widthStart and widthEnd indicate the + * padding in width dimension. + * \param inputs A 4D tensor, only one input. + * \param outputs A 4D tensor, the output value after padding. + * + * For example, + * Input(2,2,2,3) = [ + * [ [[1,2,3], [3,4,5]], + * [[2,3,5], [1,6,7]] ], + * [ [[4,3,1], [1,8,7]], + * [[3,8,9], [2,3,5]] ] + * ] # the shape is (1,2,2,3) + * + * pad_: if channelStart = channelEnd = 1, others are 0. + * Output(2,4,2,3) = [ + * [ [[0,0,0], [0,0,0]], + * [[1,2,3], [3,4,5]], + * [[2,3,5], [1,6,7]], + * [[0,0,0], [0,0,0]] ], + * [ [[0,0,0], [0,0,0]], + * [[4,3,1], [1,8,7]], + * [[3,8,9], [2,3,5]], + * [[0,0,0], [0,0,0]] ] + * ] # the shape is (2,4,2,3) + * + * pad_: if widthStart = 1, widthEnd = 2, others are 0. + * Output(2,2,2,6) = [ + * [ [[0,1,2,3,0,0], [0,3,4,5,0,0]], + * [[0,2,3,5,0,0], [0,1,6,7,0,0]] ], + * [ [[0,4,3,1,0,0], [0,1,8,7,0,0]], + * [[0,3,8,9,0,0], [0,2,3,5,0,0]] ], + * ] # the shape is (2,2,2,6) + * + * pad_: if heightStart = 1, heightEnd = 1, others are 0. + * Output(2,2,4,3) = [ + * [ [[0,0,0], [1,2,3], [3,4,5], [0,0,0]], + * [[0,0,0], [2,3,5], [1,6,7], [0,0,0]] ], + * [ [[0,0,0], [4,3,1], [1,8,7], [0,0,0]], + * [[0,0,0], [3,8,9], [2,3,5], [0,0,0]] ], + * ] # the shape is (2,2,4,3) + */ + +template +class PadFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + pad_.channelStart = config.get("cstart"); + pad_.channelEnd = config.get("cend"); + pad_.heightStart = config.get("hstart"); + pad_.heightEnd = config.get("hend"); + pad_.widthStart = config.get("wstart"); + pad_.widthEnd = config.get("wend"); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); + CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + + size_t num = inputs[0].shape()[0]; + size_t inC = inputs[0].shape()[1]; + size_t inH = inputs[0].shape()[2]; + size_t inW = inputs[0].shape()[3]; + typename Tensor::Vector vec(outputs[0].shape().getElements(), + outputs[0].data()); + vec.zero(); + + Pad(outputs[0].data(), + inputs[0].data(), + num, + inC, + inH, + inW, + pad_); + } + +private: + PadConf pad_; +}; + +/** + * \brief The backward propagation of padding Function. Remove the elements + * in the padding positions of forward. + * + * Argument in this Function: + * \param pad_ The same meaning as it in PadFunc. + * \param inputs The gradient with respect to the output value of PadFunc. + * \param outputs The gradient with respect to the input value of PadFunc. + */ + +template +class PadGradFunc : public FunctionBase { +public: + void init(const FuncConfig& config) override { + pad_.channelStart = config.get("cstart"); + pad_.channelEnd = config.get("cend"); + pad_.heightStart = config.get("hstart"); + pad_.heightEnd = config.get("hend"); + pad_.widthStart = config.get("wstart"); + pad_.widthEnd = config.get("wend"); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); + + size_t num = outputs[0].shape()[0]; + size_t inC = outputs[0].shape()[1]; + size_t inH = outputs[0].shape()[2]; + size_t inW = outputs[0].shape()[3]; + + if (outputs[0].getArgType() != ADD_TO) { + // for unit test + typename Tensor::Vector tmp( + outputs[0].shape().getElements(), outputs[0].data()); + tmp.zero(); + } + + PadGrad(outputs[0].data(), + inputs[0].data(), + num, + inC, + inH, + inW, + pad_); + } + +private: + PadConf pad_; +}; + +REGISTER_TYPED_FUNC(Pad, CPU, PadFunc); +REGISTER_TYPED_FUNC(PadGrad, CPU, PadGradFunc); +#ifndef PADDLE_ONLY_CPU +REGISTER_TYPED_FUNC(Pad, GPU, PadFunc); +REGISTER_TYPED_FUNC(PadGrad, GPU, PadGradFunc); +#endif + +} // namespace paddle diff --git a/paddle/function/PadOp.h b/paddle/function/PadOp.h new file mode 100644 index 0000000000000000000000000000000000000000..7b5c730a6a0fa57833e63beba085cb17054ae2f5 --- /dev/null +++ b/paddle/function/PadOp.h @@ -0,0 +1,79 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Function.h" + +namespace paddle { + +struct PadConf { + /// how many values to add before the data along channel dimension. + int channelStart; + /// how many values to add after the data along channel dimension. + int channelEnd; + /// how many values to add before the data along height dimension. + int heightStart; + /// how many values to add after the data along height dimension. + int heightEnd; + /// how many values to add before the data along width dimension. + int widthStart; + /// how many values to add after the data along width dimension. + int widthEnd; +}; + +/** + * \brief This funtion pads zeros to inputs according to the specify dimension. + * The input and output is a 4D tensor. Padding zeros from the 2nd to + * the 4th dimenstion according argument of pad. + * + * \param[out] outputs save results. + * \param[in] inputs input data. + * \param[in] num batch size of input data. + * \param[in] inC channel number of input data. + * \param[in] inH height of input data. + * \param[in] inH with of input data. + * \param[in] pad the padding config, contains the size along the + * specify dimension. + */ +template +void Pad(real* outputs, + const real* inputs, + const int num, + const int inC, + const int inH, + const int inW, + const PadConf& pad); + +/** + * \brief Padding operation backward. + * + * \param[out] inGrad gradients of previous layer. + * \param[in] outGrad output gradients. + * \param[in] num batch size of input data. + * \param[in] inC channel number of input data. + * \param[in] inH height of input data. + * \param[in] inH with of input data. + * \param[in] pad the padding config, contains the size along the + * specify dimension. + */ +template +void PadGrad(real* inGrad, + const real* outGrad, + const int num, + const int inC, + const int inH, + const int inW, + const PadConf& pad); +} // namespace paddle diff --git a/paddle/function/PadOpGpu.cu b/paddle/function/PadOpGpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..9104b1aca507c526858c2117e0a5db59f535091e --- /dev/null +++ b/paddle/function/PadOpGpu.cu @@ -0,0 +1,98 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "hl_base.h" +#include "PadOp.h" + +namespace paddle { + +__global__ void KePad(real* outputs, const real* inputs, + int inC, int inH, int inW, + int padc, int padh, int padw, + int outC, int outH, int outW, int nthreads) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < nthreads) { + const int w = idx % inW; + const int h = (idx / inW) % inH; + const int c = (idx / inW / inH) % inC; + const int n = idx / inW / inH / inC; + + const int off = ((n * outC + c + padc) * outH + h + padh) * outW + padw + w; + outputs[off] = inputs[idx]; + } +} + +template <> +void Pad(real* outputs, + const real* inputs, + const int num, + const int inC, + const int inH, + const int inW, + const PadConf& pad) { + size_t nth = num * inC * inH * inW; + int blockSize = 1024; + int gridSize = (nth + 1024 - 1) / 1024; + int cstart = pad.channelStart, cend = pad.channelEnd; + int hstart = pad.heightStart, hend = pad.heightEnd; + int wstart = pad.widthStart, wend = pad.widthEnd; + int outC = inC + cstart + cend; + int outH = inH + hstart + hend; + int outW = inW + wstart + wend; + KePad<<>> + (outputs, inputs, inC, inH, inW, cstart, hstart, wstart, + outC, outH, outW, nth); + CHECK_SYNC("Pad"); +} + +__global__ void KePadDiff(real* inGrad, const real* outGrad, + int inC, int inH, int inW, + int padc, int padh, int padw, + int outC, int outH, int outW, int nthreads) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < nthreads) { + const int w = idx % inW; + const int h = (idx / inW) % inH; + const int c = (idx / inW / inH) % inC; + const int n = idx / inW / inH / inC; + + const int off = ((n * outC + c + padc) * outH + h + padh) * outW + padw + w; + inGrad[idx] += outGrad[off]; + } +} + +template <> +void PadGrad(real* inGrad, + const real* outGrad, + const int num, + const int inC, + const int inH, + const int inW, + const PadConf& pad) { + int nth = num * inC * inH * inW; + int blockSize = 1024; + int gridSize = (nth + 1024 - 1) / 1024; + int cstart = pad.channelStart, cend = pad.channelEnd; + int hstart = pad.heightStart, hend = pad.heightEnd; + int wstart = pad.widthStart, wend = pad.widthEnd; + int outC = inC + cstart + cend; + int outH = inH + hstart + hend; + int outW = inW + wstart + wend; + KePadDiff <<>> + (inGrad, outGrad, inC, inH, inW, cstart, hstart, wstart, + outC, outH, outW, nth); + CHECK_SYNC("PadGrad"); +} + +} // namespace paddle diff --git a/paddle/function/PadOpTest.cpp b/paddle/function/PadOpTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cd22d9113567912f7694e05e5d631e49d940e3ac --- /dev/null +++ b/paddle/function/PadOpTest.cpp @@ -0,0 +1,75 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "FunctionTest.h" + +namespace paddle { + +TEST(Pad, real) { + for (size_t numSamples : {5, 32}) { + for (size_t channels : {1, 5, 32}) { + for (size_t imgSizeH : {5, 33, 100}) { + for (size_t imgSizeW : {5, 32, 96}) { + VLOG(3) << " numSamples=" << numSamples << " channels=" << channels + << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; + + FunctionCompare compare("Pad", + FuncConfig() + .set("cstart", 2) + .set("cend", 3) + .set("hstart", 1) + .set("hend", 2) + .set("wstart", 3) + .set("wend", 2)); + TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW}; + TensorShape outDims{ + numSamples, channels + 5, imgSizeH + 3, imgSizeW + 5}; + compare.addInputs(BufferArg(VALUE_TYPE_FLOAT, inDims)); + compare.addOutputs(BufferArg(VALUE_TYPE_FLOAT, outDims, ASSIGN_TO)); + compare.run(); + } + } + } + } +} + +TEST(PadGrad, real) { + for (size_t numSamples : {5, 32}) { + for (size_t channels : {1, 5, 32}) { + for (size_t imgSizeH : {5, 33, 100}) { + for (size_t imgSizeW : {5, 32, 96}) { + VLOG(3) << " numSamples=" << numSamples << " channels=" << channels + << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; + FunctionCompare compare("PadGrad", + FuncConfig() + .set("cstart", 2) + .set("cend", 3) + .set("hstart", 1) + .set("hend", 2) + .set("wstart", 3) + .set("wend", 2)); + TensorShape inDims{numSamples, channels, imgSizeH, imgSizeW}; + TensorShape outDims{ + numSamples, channels + 5, imgSizeH + 3, imgSizeW + 5}; + compare.addInputs(BufferArg(VALUE_TYPE_FLOAT, outDims)); + compare.addOutputs(BufferArg(VALUE_TYPE_FLOAT, inDims, ASSIGN_TO)); + compare.run(); + } + } + } + } +} + +} // namespace paddle diff --git a/paddle/function/TensorShape.h b/paddle/function/TensorShape.h index e491e3f1d6b26e14a5273b3b5a38aec941f5a9e5..cda58f19dfa4a8b80efc97570c83ca38fd7adf27 100644 --- a/paddle/function/TensorShape.h +++ b/paddle/function/TensorShape.h @@ -55,6 +55,15 @@ public: numElements(); } + void reshape(std::initializer_list dims) { + ndims_ = dims.size(); + if (ndims_ > kMinDims) { + dims_.resize(ndims_); + } + dims_.assign(dims); + numElements(); + } + // number of dimensions of the tensor size_t ndims() const { return ndims_; } @@ -82,7 +91,7 @@ private: // init dims_ void initDims(size_t ndims) { - size_t count = ndims < 4 ? 4 : ndims; + size_t count = ndims < kMinDims ? kMinDims : ndims; dims_.assign(count, 1); } @@ -92,6 +101,7 @@ private: // number of elements size_t nelements_; std::vector dims_; + static const size_t kMinDims = 4; }; } // namespace paddle diff --git a/paddle/function/TensorType.h b/paddle/function/TensorType.h index 98942cff9e2ea44e78727d66a059ab8cf5f0ef7c..8308bbd8ad4fe1b97b35b779f27d2bf4534f0fa6 100644 --- a/paddle/function/TensorType.h +++ b/paddle/function/TensorType.h @@ -31,6 +31,10 @@ enum DeviceType { DEVICE_TYPE_GPU = 2 }; +enum SparseDataType { T_NO_VALUE = 0, T_FLOAT_VALUE = 1 }; + +enum SparseDataFormat { T_SPARSE_CSR = 0, T_SPARSE_CSC = 1 }; + inline int sizeOfValuType(ValueType valueType) { if (valueType == VALUE_TYPE_INT32) { return 4; @@ -87,6 +91,29 @@ struct MatrixT { using type = void; // Not implemented }; +template +struct SparseMatrixT; + +template <> +struct SparseMatrixT { + using type = CpuSparseMatrix; +}; + +template <> +struct SparseMatrixT { + using type = GpuSparseMatrix; +}; + +template <> +struct SparseMatrixT { + using type = void; // Not implemented +}; + +template <> +struct SparseMatrixT { + using type = void; // Not implemented +}; + template struct VectorT; @@ -114,8 +141,9 @@ struct VectorT { template struct Tensor { - typedef typename detail::MatrixT::type Matrix; typedef typename detail::VectorT::type Vector; + typedef typename detail::MatrixT::type Matrix; + typedef typename detail::SparseMatrixT::type SparseMatrix; }; } // namespace paddle diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp index f8c4bcac2f8eb41400659dc24ba81768e7ae3640..c541b72e104bf2b81e2ac222d4af13ea2f90d289 100644 --- a/paddle/gserver/activations/ActivationFunction.cpp +++ b/paddle/gserver/activations/ActivationFunction.cpp @@ -69,8 +69,14 @@ static ClassRegistrar gActivationRegistrar; class IdentityActivation : public ActivationFunction { public: static const std::string name; - void forward(Argument& act) { (void)act; } - void backward(Argument& act) { (void)act; } + Error __must_check forward(Argument& act) { + (void)act; + return Error(); + } + Error __must_check backward(Argument& act) { + (void)act; + return Error(); + } const std::string& getName() const { return name; } }; const std::string IdentityActivation::name = ""; @@ -86,8 +92,14 @@ static InitFunction __reg_activation__identity([] { * \f] */ BEGIN_DEFINE_ACTIVATION(sigmoid) -void forward(Argument& act) { act.value->sigmoid(*act.value); } -void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); } +Error __must_check forward(Argument& act) { + act.value->sigmoid(*act.value); + return Error(); +} +Error __must_check backward(Argument& act) { + act.grad->sigmoidDerivative(*act.value); + return Error(); +} END_DEFINE_ACTIVATION(sigmoid) /** @@ -103,9 +115,12 @@ MatrixPtr sftMaxDot_; MatrixPtr one_; public: -void forward(Argument& act) { act.value->softmax(*act.value); } +Error __must_check forward(Argument& act) { + act.value->softmax(*act.value); + return Error(); +} -void backward(Argument& act) { +Error __must_check backward(Argument& act) { MatrixPtr outputV = act.value; MatrixPtr outputG = act.grad; @@ -137,6 +152,7 @@ void backward(Argument& act) { act.grad->softmaxDerivative(*act.value, *sftMaxSum_); } + return Error(); } END_DEFINE_ACTIVATION(softmax) @@ -151,8 +167,11 @@ ACTIVATION_CLASS_NAME(softmax) softmax_; Argument argument_; public: -void forward(Argument& act) { - CHECK_EQ(act.value->getWidth(), 1UL); +Error __must_check forward(Argument& act) { + if (act.value->getWidth() != 1UL) { + return Error( + "Input width for each timestep of sequence softmax should be 1"); + } if (!argument_.value) { argument_.value = Matrix::create(nullptr, @@ -169,10 +188,14 @@ void forward(Argument& act) { auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId)); act.value->sequenceSoftmax(*act.value, *starts); + return Error(); } -void backward(Argument& act) { - CHECK_EQ(act.grad->getWidth(), 1UL); +Error __must_check backward(Argument& act) { + if (act.value->getWidth() != 1UL) { + return Error( + "Input width for each timestep of sequence softmax should be 1"); + } size_t numSequences = act.getNumSequences(); const int* starts = act.sequenceStartPositions->getData(false); @@ -184,8 +207,10 @@ void backward(Argument& act) { argument_.value->setData(act.value->getData() + offset, 1UL, size); argument_.grad->setData(act.grad->getData() + offset, 1UL, size); - softmax_.backward(argument_); + Error status = softmax_.backward(argument_); + if (!status) return status; } + return Error(); } END_DEFINE_ACTIVATION(sequence_softmax) @@ -200,9 +225,15 @@ END_DEFINE_ACTIVATION(sequence_softmax) * 0 otherwise. */ BEGIN_DEFINE_ACTIVATION(relu) -void forward(Argument& act) { act.value->relu(*act.value); } +Error __must_check forward(Argument& act) { + act.value->relu(*act.value); + return Error(); +} -void backward(Argument& act) { act.grad->reluDerivative(*act.value); } +Error __must_check backward(Argument& act) { + act.grad->reluDerivative(*act.value); + return Error(); +} END_DEFINE_ACTIVATION(relu) /** @@ -219,9 +250,15 @@ END_DEFINE_ACTIVATION(relu) * TODO(yuyang18): Remove magic number 24 or make it configuable. */ BEGIN_DEFINE_ACTIVATION(brelu) -void forward(Argument& act) { act.value->brelu(*act.value); } +Error __must_check forward(Argument& act) { + act.value->brelu(*act.value); + return Error(); +} -void backward(Argument& act) { act.grad->breluDerivative(*act.value); } +Error __must_check backward(Argument& act) { + act.grad->breluDerivative(*act.value); + return Error(); +} END_DEFINE_ACTIVATION(brelu) /** @@ -231,9 +268,15 @@ END_DEFINE_ACTIVATION(brelu) * \f] */ BEGIN_DEFINE_ACTIVATION(tanh) -void forward(Argument& act) { act.value->tanh(*act.value); } +Error __must_check forward(Argument& act) { + act.value->tanh(*act.value); + return Error(); +} -void backward(Argument& act) { act.grad->tanhDerivative(*act.value); } +Error __must_check backward(Argument& act) { + act.grad->tanhDerivative(*act.value); + return Error(); +} END_DEFINE_ACTIVATION(tanh) /** @@ -248,10 +291,14 @@ real a, b; public: ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {} -void forward(Argument& act) { act.value->scaledTanh(*act.value, a, b); } +Error __must_check forward(Argument& act) { + act.value->scaledTanh(*act.value, a, b); + return Error(); +} -void backward(Argument& act) { +Error __must_check backward(Argument& act) { act.grad->scaledTanhDerivative(*act.value, a, b); + return Error(); } END_DEFINE_ACTIVATION(stanh) @@ -262,9 +309,15 @@ END_DEFINE_ACTIVATION(stanh) * \f] */ BEGIN_DEFINE_ACTIVATION(softrelu) -void forward(Argument& act) { act.value->softrelu(*act.value); } +Error __must_check forward(Argument& act) { + act.value->softrelu(*act.value); + return Error(); +} -void backward(Argument& act) { act.grad->softreluDerivative(*act.value); } +Error __must_check backward(Argument& act) { + act.grad->softreluDerivative(*act.value); + return Error(); +} END_DEFINE_ACTIVATION(softrelu) /** @@ -280,7 +333,7 @@ END_DEFINE_ACTIVATION(softrelu) * 0 if z=0 */ BEGIN_DEFINE_ACTIVATION(abs) -void forward(Argument& act) { +Error __must_check forward(Argument& act) { SetDevice device(act.deviceId); Matrix::resizeOrCreate(act.in, act.value->getHeight(), @@ -290,9 +343,13 @@ void forward(Argument& act) { act.in->copyFrom(*act.value); act.value->abs2(*act.value); + return Error(); } -void backward(Argument& act) { act.grad->absDerivative(*act.in); } +Error __must_check backward(Argument& act) { + act.grad->absDerivative(*act.in); + return Error(); +} END_DEFINE_ACTIVATION(abs) /** @@ -302,7 +359,7 @@ END_DEFINE_ACTIVATION(abs) * \f] */ BEGIN_DEFINE_ACTIVATION(square) -void forward(Argument& act) { +Error __must_check forward(Argument& act) { SetDevice device(act.deviceId); Matrix::resizeOrCreate(act.in, act.value->getHeight(), @@ -312,9 +369,13 @@ void forward(Argument& act) { act.in->copyFrom(*act.value); act.value->square2(*act.value); + return Error(); } -void backward(Argument& act) { act.grad->squareDerivative(*act.in); } +Error __must_check backward(Argument& act) { + act.grad->squareDerivative(*act.in); + return Error(); +} END_DEFINE_ACTIVATION(square) /** @@ -324,9 +385,15 @@ END_DEFINE_ACTIVATION(square) * \f] */ BEGIN_DEFINE_ACTIVATION(exponential) -void forward(Argument& act) { act.value->exp2(*act.value); } +Error __must_check forward(Argument& act) { + act.value->exp2(*act.value); + return Error(); +} -void backward(Argument& act) { act.grad->expDerivative(*act.value); } +Error __must_check backward(Argument& act) { + act.grad->expDerivative(*act.value); + return Error(); +} END_DEFINE_ACTIVATION(exponential) /** @@ -336,7 +403,7 @@ END_DEFINE_ACTIVATION(exponential) * \f] */ BEGIN_DEFINE_ACTIVATION(log) -void forward(Argument& act) { +Error __must_check forward(Argument& act) { SetDevice device(act.deviceId); Matrix::resizeOrCreate(act.in, act.value->getHeight(), @@ -346,9 +413,13 @@ void forward(Argument& act) { act.in->copyFrom(*act.value); act.value->log2(*act.value); + return Error(); } -void backward(Argument& act) { act.grad->dotDiv(*act.grad, *act.in); } +Error __must_check backward(Argument& act) { + act.grad->dotDiv(*act.grad, *act.in); + return Error(); +} END_DEFINE_ACTIVATION(log) ActivationFunction* ActivationFunction::create(const std::string& type) { diff --git a/paddle/gserver/activations/ActivationFunction.h b/paddle/gserver/activations/ActivationFunction.h index 601e3b6c0cd401ec007e8cf51e44416f82832e58..f208224e304a79125679c6f3a5c0be09552465ef 100644 --- a/paddle/gserver/activations/ActivationFunction.h +++ b/paddle/gserver/activations/ActivationFunction.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include +#include "paddle/utils/Error.h" namespace paddle { @@ -48,7 +49,7 @@ public: * * Usually, act is Layer::output_ */ - virtual void forward(Argument& act) = 0; + virtual Error __must_check forward(Argument& act) = 0; /** * @brief Backward propagaion @@ -57,7 +58,7 @@ public: * - Before calling backward(), act.grad = dE / dy, where E is the error/cost * - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx) */ - virtual void backward(Argument& act) = 0; + virtual Error __must_check backward(Argument& act) = 0; virtual const std::string& getName() const = 0; }; diff --git a/paddle/gserver/dataproviders/PyDataProvider2.cpp b/paddle/gserver/dataproviders/PyDataProvider2.cpp index c26e242534f2afcff396762adb085bf99303e2b5..b8079dc0796d0e300e65ac6b6b8d3bc826b1e504 100644 --- a/paddle/gserver/dataproviders/PyDataProvider2.cpp +++ b/paddle/gserver/dataproviders/PyDataProvider2.cpp @@ -647,7 +647,7 @@ public: DataBatch& gpuBatch = *batch; std::vector& gpuArguments = gpuBatch.getStreams(); gpuArguments.resize(cpuArguments.size()); - gpuBatch.setSize(size); + gpuBatch.setSize(bsize); for (size_t i = 0; i < headers_.size(); ++i) { gpuArguments[i].resizeAndCopyFrom( cpuArguments[i], useGpu_, HPPL_STREAM_1); diff --git a/paddle/gserver/evaluators/CTCErrorEvaluator.cpp b/paddle/gserver/evaluators/CTCErrorEvaluator.cpp index 05aa6c012ae2bc0afcbaf23f8ff78b3c782d050c..132119015f967c6e8d055792de8afe8450df5ec6 100644 --- a/paddle/gserver/evaluators/CTCErrorEvaluator.cpp +++ b/paddle/gserver/evaluators/CTCErrorEvaluator.cpp @@ -20,7 +20,7 @@ namespace paddle { /** * calculate sequence-to-sequence edit distance */ -class CTCErrorEvaluator : public Evaluator { +class CTCErrorEvaluator : public NotGetableEvaluator { private: MatrixPtr outActivations_; int numTimes_, numClasses_, numSequences_, blank_; diff --git a/paddle/gserver/evaluators/Evaluator.cpp b/paddle/gserver/evaluators/Evaluator.cpp index ae7508e2bb117a60492e0c28230f2fbb4b14915e..9db6d252d97bfeee3fe376bcda431fe94c65a678 100644 --- a/paddle/gserver/evaluators/Evaluator.cpp +++ b/paddle/gserver/evaluators/Evaluator.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/gserver/evaluators/Evaluator.h" -#include "paddle/utils/Stat.h" - #include "paddle/gserver/gradientmachines/NeuralNetwork.h" +#include "paddle/utils/Stat.h" +#include "paddle/utils/StringUtil.h" DECLARE_int32(trainer_id); @@ -39,6 +39,14 @@ void Evaluator::eval(const NeuralNetwork& nn) { */ class ClassificationErrorEvaluator : public Evaluator { public: + /* + ClassificationErrorEvaluator() : totalScore2_(0) {} + + virtual void start() { + Evaluator::start(); + totalScore2_ = 0; + } */ + virtual void updateSamplesNum(const std::vector& arguments) { if (3 == arguments.size()) { numSamples_ += arguments[2].value->getSum(); @@ -76,9 +84,11 @@ public: 1, /* trans= */ false, useGpu(arguments[0].deviceId)); + errorMat->zeroMem(); + if (label != nullptr) { - errorMat->classificationError(*output, *label); + errorMat->classificationError(*output, *label, config_.top_k()); } else if (dynamic_cast(multiBinaryLabel.get()) || dynamic_cast(multiBinaryLabel.get())) { errorMat->classificationErrorMulti( @@ -94,6 +104,16 @@ public: return errorMat; } + void printStats(std::ostream& os) const { + if (config_.top_k() == 1) { + os << config_.name() << "=" + << (numSamples_ ? totalScore_ / numSamples_ : 0); + } else { + os << " top_" << config_.top_k() + << "_error=" << (numSamples_ ? totalScore_ / numSamples_ : 0); + } + } + virtual real evalImp(std::vector& arguments) { MatrixPtr errorMat = calcError(arguments); return errorMat->getSum(); @@ -102,6 +122,10 @@ public: virtual void distributeEval(ParameterClient2* client) { mergeResultsOfAllClients(client); } + + // Evaluator interface +protected: + std::string getTypeImpl() const { return "classification_error"; } }; /** @@ -140,6 +164,10 @@ public: virtual void distributeEval(ParameterClient2* client) { mergeResultsOfAllClients(client); } + + // Evaluator interface +protected: + std::string getTypeImpl() const { return "seq_classification_error"; } }; REGISTER_EVALUATOR(seq_classification_error, SequenceClassificationErrorEvaluator); @@ -230,6 +258,10 @@ public: private: IVectorPtr cpuLabel_; MatrixPtr cpuWeight_; + + // Evaluator interface +protected: + std::string getTypeImpl() const { return "sum"; } }; /** * @brief column sum Evaluator @@ -337,10 +369,18 @@ public: } private: - ColumnSumEvaluator() {} int32_t colIdx_; size_t colNum_; MatrixPtr sum_; /* cpu matrix */ + + // Evaluator interface +protected: + std::string getTypeImpl() const { + if (colIdx_ == -1) + return "last-column-sum"; + else + return "column-sum"; + } }; void AucEvaluator::start() { @@ -449,6 +489,16 @@ double AucEvaluator::calcAuc() const { } } +real AucEvaluator::getValueImpl() const { return calcAuc(); } + +std::string AucEvaluator::getTypeImpl() const { + if (colIdx_ == -1) { + return "last-column-auc"; + } else { + return "auc"; + } +} + // class RankAucEvaluator REGISTER_EVALUATOR(rankauc, RankAucEvaluator); @@ -528,12 +578,15 @@ double RankAucEvaluator::calcRankAuc(real* outputData, : aucTmp / (clickSum * noClickSum); } +std::string RankAucEvaluator::getTypeImpl() const { return "rankauc"; } + // class PrecisionRecallEvaluator REGISTER_EVALUATOR(precision_recall, PrecisionRecallEvaluator); void PrecisionRecallEvaluator::start() { Evaluator::start(); statsInfo_.clear(); + values_.clear(); } real PrecisionRecallEvaluator::evalImp(std::vector& arguments) { @@ -594,52 +647,23 @@ real PrecisionRecallEvaluator::evalImp(std::vector& arguments) { } void PrecisionRecallEvaluator::printStats(std::ostream& os) const { - int label = config_.positive_label(); - if (label != -1) { - CHECK(label >= 0 && label < (int)statsInfo_.size()) - << "positive_label [" << label << "] should be in range [0, " - << statsInfo_.size() << ")"; - double precision = - calcPrecision(statsInfo_[label].TP, statsInfo_[label].FP); - double recall = calcRecall(statsInfo_[label].TP, statsInfo_[label].FN); - os << "positive_label=" << label << " precision=" << precision - << " recall=" << recall - << " F1-score=" << calcF1Score(precision, recall); - return; - } - - // micro average method: precision = (TP1+TP2)/(TP1+FP1+TP2+FP2) - // macro average method: precision = (precision1+precision2)/2 - double microTotalTP = 0; - double microTotalFP = 0; - double microTotalFN = 0; - double macroAvgPrecision = 0; - double macroAvgRecall = 0; - size_t numLabels = statsInfo_.size(); - for (size_t i = 0; i < numLabels; ++i) { - microTotalTP += statsInfo_[i].TP; - microTotalFP += statsInfo_[i].FP; - microTotalFN += statsInfo_[i].FN; - macroAvgPrecision += calcPrecision(statsInfo_[i].TP, statsInfo_[i].FP); - macroAvgRecall += calcRecall(statsInfo_[i].TP, statsInfo_[i].FN); - } - macroAvgPrecision /= numLabels; - macroAvgRecall /= numLabels; - double macroAvgF1Score = calcF1Score(macroAvgPrecision, macroAvgRecall); - os << "macro-average-precision=" << macroAvgPrecision - << " macro-average-recall=" << macroAvgRecall - << " macro-average-F1-score=" << macroAvgF1Score; - - double microAvgPrecision = calcPrecision(microTotalTP, microTotalFP); - double microAvgRecall = calcPrecision(microTotalTP, microTotalFN); - double microAvgF1Score = calcF1Score(microAvgPrecision, microAvgRecall); - if (!isMultiBinaryLabel_) { - // precision and recall are equal in this case - os << " micro-average-precision=" << microAvgPrecision; - } else { - os << " micro-average-precision=" << microAvgPrecision - << " micro-average-recall=" << microAvgRecall - << " micro-average-F1-score=" << microAvgF1Score; + PrintStatsInfo info; + bool containMacroMicroInfo = getStatsInfo(&info); + os << "positive_label=" << config_.positive_label() + << " precision=" << info.precision << " recall=" << info.recall + << " F1-score=" << info.f1; + if (containMacroMicroInfo) { + os << "macro-average-precision=" << info.macroAvgPrecision + << " macro-average-recall=" << info.macroAvgRecall + << " macro-average-F1-score=" << info.macroAvgF1Score; + if (!isMultiBinaryLabel_) { + // precision and recall are equal in this case + os << " micro-average-precision=" << info.microAvgPrecision; + } else { + os << " micro-average-precision=" << info.microAvgPrecision + << " micro-average-recall=" << info.microAvgRecall + << " micro-average-F1-score=" << info.microAvgF1Score; + } } } @@ -721,6 +745,60 @@ void PrecisionRecallEvaluator::calcStatsInfoMulti(const MatrixPtr& output, } } +void PrecisionRecallEvaluator::storeLocalValues() const { + if (this->values_.size() == 0) { + PrintStatsInfo info; + bool containMacroMicroInfo = getStatsInfo(&info); + values_["precision"] = info.precision; + values_["recal"] = info.recall; + values_["F1-score"] = info.f1; + if (containMacroMicroInfo) { + values_["macro-average-precision"] = info.macroAvgPrecision; + values_["macro-average-recall"] = info.macroAvgRecall; + values_["macro-average-F1-score"] = info.macroAvgF1Score; + if (!isMultiBinaryLabel_) { + // precision and recall are equal in this case + values_["micro-average-precision"] = info.microAvgPrecision; + } else { + values_["micro-average-precision"] = info.microAvgPrecision; + values_["micro-average-recall"] = info.microAvgRecall; + values_["micro-average-F1-score"] = info.microAvgF1Score; + } + } + } +} + +void PrecisionRecallEvaluator::getNames(std::vector* names) { + this->storeLocalValues(); + names->reserve(this->values_.size()); + for (auto it = this->values_.begin(); it != this->values_.end(); ++it) { + names->push_back(this->config_.name() + "." + it->first); + } +} + +real PrecisionRecallEvaluator::getValue(const std::string& name, + Error* err) const { + this->storeLocalValues(); + std::vector buffers; + paddle::str::split(name, '.', &buffers); + auto it = this->values_.find(buffers[buffers.size() - 1]); + if (it == this->values_.end()) { // not found + *err = Error("No such key %s", name.c_str()); + return .0f; + } + + return it->second; +} + +std::string PrecisionRecallEvaluator::getType(const std::string& name, + Error* err) const { + this->getValue(name, err); + if (!err->isOK()) { + return ""; + } + return "precision_recall"; +} + void PrecisionRecallEvaluator::distributeEval(ParameterClient2* client) { size_t size = 4 * statsInfo_.size(); double* buf = new double[size]; @@ -740,6 +818,47 @@ void PrecisionRecallEvaluator::distributeEval(ParameterClient2* client) { delete[] buf; } +bool PrecisionRecallEvaluator::getStatsInfo( + PrecisionRecallEvaluator::PrintStatsInfo* info) const { + int label = config_.positive_label(); + if (label != -1) { + CHECK(label >= 0 && label < (int)statsInfo_.size()) + << "positive_label [" << label << "] should be in range [0, " + << statsInfo_.size() << ")"; + info->precision = calcPrecision(statsInfo_[label].TP, statsInfo_[label].FP); + info->recall = calcRecall(statsInfo_[label].TP, statsInfo_[label].FN); + info->f1 = calcF1Score(info->precision, info->recall); + return false; + } + + // micro average method: precision = (TP1+TP2)/(TP1+FP1+TP2+FP2) + // macro average method: precision = (precision1+precision2)/2 + double microTotalTP = 0; + double microTotalFP = 0; + double microTotalFN = 0; + info->macroAvgPrecision = 0; + info->macroAvgRecall = 0; + size_t numLabels = statsInfo_.size(); + for (size_t i = 0; i < numLabels; ++i) { + microTotalTP += statsInfo_[i].TP; + microTotalFP += statsInfo_[i].FP; + microTotalFN += statsInfo_[i].FN; + info->macroAvgPrecision += + calcPrecision(statsInfo_[i].TP, statsInfo_[i].FP); + info->macroAvgRecall += calcRecall(statsInfo_[i].TP, statsInfo_[i].FN); + } + info->macroAvgPrecision /= numLabels; + info->macroAvgRecall /= numLabels; + info->macroAvgF1Score = + calcF1Score(info->macroAvgPrecision, info->macroAvgRecall); + + info->microAvgPrecision = calcPrecision(microTotalTP, microTotalFP); + info->microAvgRecall = calcPrecision(microTotalTP, microTotalFN); + info->microAvgF1Score = + calcF1Score(info->microAvgPrecision, info->microAvgRecall); + return true; +} + REGISTER_EVALUATOR(pnpair, PnpairEvaluator); void PnpairEvaluator::start() { Evaluator::start(); @@ -864,56 +983,35 @@ void PnpairEvaluator::calc(std::vector& predictArray) { << " calc total special pair: " << special; } +std::string PnpairEvaluator::getTypeImpl() const { return "pnpair"; } + ClassRegistrar Evaluator::registrar_; Evaluator* Evaluator::create(const EvaluatorConfig& config) { - Evaluator* evaluator = nullptr; - if (config.type() == "classification_error") { - evaluator = new ClassificationErrorEvaluator(); - } else if (config.type() == "sum") { - evaluator = new SumEvaluator(); - } else if (config.type() == "last-column-sum") { - evaluator = new ColumnSumEvaluator(-1); - } else if (config.type() == "last-column-auc") { - evaluator = new AucEvaluator(-1); - } else { - evaluator = registrar_.createByType(config.type()); - } + Evaluator* evaluator = registrar_.createByType(config.type()); evaluator->init(config); return evaluator; } + +REGISTER_EVALUATOR(classification_error, ClassificationErrorEvaluator); +REGISTER_EVALUATOR(sum, SumEvaluator); +static InitFunction __reg_type_auc_sum__([]() { + Evaluator::registrar_.registerClass( + "last-column-sum", [] { return new ColumnSumEvaluator(-1); }); + Evaluator::registrar_.registerClass("last-column-auc", + [] { return new AucEvaluator(-1); }); +}); + /** * @brief print value of each layer. * * The config file api is value_printer_evaluator. */ -class ValuePrinter : public Evaluator { +class ValuePrinter : public NotGetableEvaluator { public: - ValuePrinter() {} - virtual void eval(const NeuralNetwork& nn) { for (const std::string& name : config_.input_layers()) { - const Argument& argu = nn.getLayer(name)->getOutput(); - if (argu.value) { - std::ostringstream os; - argu.value->print(os); - LOG(INFO) << "layer=" << name << " value matrix:\n" << os.str(); - } - if (argu.ids) { - std::ostringstream os; - argu.ids->print(os, argu.ids->getSize()); - LOG(INFO) << "layer=" << name << " ids vector:\n" << os.str(); - } - if (auto startPos = argu.sequenceStartPositions) { - std::ostringstream os; - startPos->getVector(false)->print(os, startPos->getSize()); - LOG(INFO) << "layer=" << name << " sequence pos vector:\n" << os.str(); - } - if (auto subStartPos = argu.subSequenceStartPositions) { - std::ostringstream os; - subStartPos->getVector(false)->print(os, subStartPos->getSize()); - LOG(INFO) << "layer=" << name << " sub-sequence pos vector:\n" - << os.str(); - } + nn.getLayer(name)->getOutput().printValueString(LOG(INFO), + "layer=" + name + " "); } } @@ -922,15 +1020,14 @@ public: virtual real evalImp(std::vector& arguments) { return 0; } }; REGISTER_EVALUATOR(value_printer, ValuePrinter); + /** * @brief print gradient of each layer. * * The config file api is gradient_printer_evaluator. */ -class GradientPrinter : public Evaluator { +class GradientPrinter : public NotGetableEvaluator { public: - GradientPrinter() {} - virtual void eval(const NeuralNetwork& nn) { for (const std::string& name : config_.input_layers()) { const Argument& argu = nn.getLayer(name)->getOutput(); @@ -939,11 +1036,6 @@ public: argu.grad->print(os); LOG(INFO) << "layer=" << name << " grad matrix:\n" << os.str(); } - if (auto startPos = argu.sequenceStartPositions) { - std::ostringstream os; - startPos->getVector(false)->print(os, startPos->getSize()); - LOG(INFO) << "layer=" << name << " sequence pos vector:\n" << os.str(); - } } } @@ -957,7 +1049,7 @@ REGISTER_EVALUATOR(gradient_printer, GradientPrinter); * * The config file api is maxid_printer_evaluator. */ -class MaxIdPrinter : public Evaluator { +class MaxIdPrinter : public NotGetableEvaluator { private: IVectorPtr maxIds_; MatrixPtr maxValues_; @@ -999,7 +1091,7 @@ REGISTER_EVALUATOR(max_id_printer, MaxIdPrinter); * * The config file api is maxframe_printer_evaluator. */ -class MaxFramePrinter : public Evaluator { +class MaxFramePrinter : public NotGetableEvaluator { private: IVectorPtr maxIds_; MatrixPtr maxValues_; @@ -1086,7 +1178,7 @@ REGISTER_EVALUATOR(max_frame_printer, MaxFramePrinter); * The config file api is seqtext_printer_evaluator. * */ -class SequenceTextPrinter : public Evaluator { +class SequenceTextPrinter : public NotGetableEvaluator { private: /// dict_file, which contains a list of tokens std::vector dict_; @@ -1253,4 +1345,6 @@ public: }; REGISTER_EVALUATOR(classification_error_printer, ClassificationErrorPrinter); +std::string DummyEvaluator::getTypeImpl() const { return "dummy"; } + } // namespace paddle diff --git a/paddle/gserver/evaluators/Evaluator.h b/paddle/gserver/evaluators/Evaluator.h index 5770847309670ef1856cfb9255fa847c24513b56..b114500e2b7c1e460a02c78b99b5f1a8fb63b8c3 100644 --- a/paddle/gserver/evaluators/Evaluator.h +++ b/paddle/gserver/evaluators/Evaluator.h @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/parameter/Argument.h" #include "paddle/pserver/ParameterClient2.h" #include "paddle/utils/ClassRegistrar.h" +#include "paddle/utils/Error.h" namespace paddle { @@ -117,12 +118,105 @@ public: static ClassRegistrar registrar_; + /** + * @brief getNames will return all field names of current evaluator. + * + * The format of name is `evaluator_name.evaluator_fields`. If the evaluator + * has multiple field, the name could be `evaluator_name.field1`. For example + * the PrecisionRecallEvaluator contains `precision`, `recall` fields. The get + * names will return `precision_recall_evaluator.precision`, + * `precision_recall_evaluator.recal`, etc. + * + * Also, if current Evaluator is a combined evaluator. getNames will return + * all names of all evaluators inside the combined evaluator. + * + * @param names [out]: the field names of current evaluator. + * @note Never clear the names parameter inside getNames. + */ + virtual void getNames(std::vector* names) { + names->push_back(config_.name()); + } + + /** + * @brief getValue will return the current evaluate value of one field. + * + * @param name: The field name of current evaluator. + * @param err [out]: The error state. + * + * @return The evaluate value(metric). + */ + virtual real getValue(const std::string& name, Error* err) const { + if (name != config_.name()) { + *err = Error("no such name of evaluator %s", name.c_str()); + return .0f; + } + return this->getValueImpl(); + } + + /** + * @brief getType will return the evaluator type by field name. + * + * Evaluate Type is the current type of evaluator in string. Such as 'auc', + * 'precision_recall'. In combined evaluator, different name may get different + * evaluate type because it could be evaluated by different evaluator inside. + * + * @param name: The field name of current Evaluator. + * @param err: The error state. nullptr means don't care. + * @return the evaluator type string. + */ + virtual std::string getType(const std::string& name, Error* err) const { + if (name != config_.name()) { + *err = Error("no such name of evaluator %s", name.c_str()); + return std::string(); + } + return this->getTypeImpl(); + } + +protected: + /** + * @brief getValueImpl The simplest way to define getValue result. If this + * evaluator doesn't contain multiple fields, and do not throw any error, just + * implemented this method to get the evaluate result(metric). + * @return Evaluate result(metric). + */ + virtual real getValueImpl() const { + return numSamples_ != .0 ? totalScore_ / numSamples_ : .0; + } + + /** + * @brief getTypeImpl The simplest way to define getType result. If this + * evaluator doesn't combine many evaluators, the get type should only return + * itself type. + * @return Evaluator type. + */ + virtual std::string getTypeImpl() const { return "base"; } + protected: EvaluatorConfig config_; double numSamples_; double totalScore_; }; +/** + * @brief The NotGetableEvaluator class is the base class of evaluator that + * cannot get value in runtime. The most NotGetableEvaluator is Printer + * Evaluator, which is only used to debug network configuration. + */ +class NotGetableEvaluator : public Evaluator { + // Evaluator interface +public: + void getNames(std::vector* names) {} + + real getValue(const std::string& name, Error* err) const { + *err = Error("Not implemented"); + return .0f; + } + std::string getType(const std::string& name, Error* err) const { + *err = Error("Not implemented"); + return ""; + } +}; + class DummyEvaluator : public Evaluator { public: DummyEvaluator() {} @@ -135,6 +229,10 @@ public: } virtual void finish() {} virtual void printStats(std::ostream&) const {} + + // Evaluator interface +protected: + std::string getTypeImpl() const; }; /** * @brief evaluate AUC using colIdx-th column as prediction. @@ -191,6 +289,11 @@ private: } double calcAuc() const; + + // Evaluator interface +protected: + real getValueImpl() const; + std::string getTypeImpl() const; }; /** @@ -223,6 +326,10 @@ private: real* clickData, real* pvData, size_t size); + + // Evaluator interface +protected: + std::string getTypeImpl() const; }; /** * @brief precision, recall and f1 score Evaluator @@ -272,6 +379,20 @@ private: IVectorPtr cpuLabel_; MatrixPtr cpuWeight_; + struct PrintStatsInfo { + double precision; + double recall; + double f1; + double macroAvgPrecision; + double macroAvgRecall; + double macroAvgF1Score; + double microAvgPrecision; + double microAvgRecall; + double microAvgF1Score; + }; + + bool getStatsInfo(PrintStatsInfo* info) const; + void calcStatsInfo(const MatrixPtr& output, const IVectorPtr& label, const MatrixPtr& weight); @@ -303,6 +424,15 @@ private: return 0; } } + + mutable std::unordered_map values_; + + void storeLocalValues() const; + // Evaluator interface +public: + void getNames(std::vector* names); + real getValue(const std::string& name, Error* err) const; + std::string getType(const std::string& name, Error* err) const; }; /* @@ -349,8 +479,7 @@ public: virtual void finish() { calc(predictArray_); } virtual void printStats(std::ostream& os) const { - os << " pos/neg" - << "=" << pairArray_[0] / ((pairArray_[1] <= 0) ? 1.0 : pairArray_[1]); + os << " pos/neg=" << this->getValueImpl(); } virtual void distributeEval(ParameterClient2* client) { @@ -366,6 +495,13 @@ private: IVectorPtr cpuLabel_; IVectorPtr cpuInfo_; MatrixPtr cpuWeight_; + + // Evaluator interface +protected: + real getValueImpl() const { + return pairArray_[0] / ((pairArray_[1] <= 0) ? 1.0 : pairArray_[1]); + } + std::string getTypeImpl() const; }; } // namespace paddle diff --git a/paddle/gserver/gradientmachines/GradientMachine.cpp b/paddle/gserver/gradientmachines/GradientMachine.cpp index 36ca05b919b136c162105cf4f1fb7705ae7ca7f3..3eb87d9b85c8207a23046fdb4bda06ba8185e2a3 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.cpp +++ b/paddle/gserver/gradientmachines/GradientMachine.cpp @@ -60,55 +60,6 @@ GradientMachine* GradientMachine::create( return nullptr; } -GradientMachine* GradientMachine::create(const std::string& modelFile, - DataConfig* dataConfig) { - std::ifstream is(modelFile); - CHECK(is) << "Fail to open " << modelFile; - return create(is, dataConfig); -} - -GradientMachine* GradientMachine::create(std::istream& is, - DataConfig* dataConfig) { - TrainerConfig trainerConfig; - GradientMachine* ret = create(is, &trainerConfig); - if (dataConfig && trainerConfig.has_data_config()) { - *dataConfig = trainerConfig.data_config(); - } - return ret; -} - -GradientMachine* GradientMachine::create(const std::string& modelFile, - TrainerConfig* trainerConfig) { - std::ifstream is(modelFile); - CHECK(is) << "Fail to open " << modelFile; - return create(is, trainerConfig); -} - -GradientMachine* GradientMachine::create(std::istream& is, - TrainerConfig* trainerConfig) { - TrainerConfig trainerConfigTemp; - int64_t size; - CHECK(is.read((char*)&size, sizeof(size))) << "Fail to read "; - std::string buf; - buf.resize(size); - CHECK(is.read(&buf[0], size)) << "Fail to read "; - CHECK(trainerConfigTemp.ParseFromString(buf)) << "Fail to parse config"; - std::unique_ptr machine( - create(trainerConfigTemp.model_config())); - std::vector& parameters = machine->getParameters(); - for (auto& para : parameters) { - para->load(is); - } - - machine->onLoadParameter(); - - if (trainerConfig) { - *trainerConfig = trainerConfigTemp; - } - - return machine.release(); -} - void GradientMachine::saveParameters(const std::string& dir) const { LOG(INFO) << "Saving parameters to " << dir; diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index 1e35c7e2b8d185e45f33f6287ad4e32ccad2d5a6..bc2f2f8563526aa045ea89f15152ee2d639b5774 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -89,39 +89,6 @@ public: std::vector{ PARAMETER_VALUE, PARAMETER_GRADIENT, PARAMETER_MOMENTUM}); - /** - * Create a gradient machine from the merged model file. - * The merged model file can be generated using tools/merge_model - * If dataConfig is not null, it will be filled with the DataConfig - * from the TrainerConfig - */ - static GradientMachine* create(const std::string& modelFile, - DataConfig* dataConfig); - - /** - * Create a gradient machine from a stream which contains the merged - * model file. The merged model file can be generated using tools/merge_model - * If dataConfig is not null, it will be filled with the DataConfig - * from the TrainerConfig - */ - static GradientMachine* create(std::istream& is, DataConfig* dataConfig); - - /** - * Create a gradient machine from the merged model file. - * The merged model file can be generated using tools/merge_model - * If trainerConfig is not null, it will be filled with the TrainerConfig - */ - static GradientMachine* create(const std::string& modelFile, - TrainerConfig* trainerConfig); - - /** - * Create a gradient machine from a stream which contains the merged - * model file. The merged model file can be generated using tools/merge_model - * If trainerConfig is not null, it will be filled with the TrainerConfig - */ - static GradientMachine* create(std::istream& is, - TrainerConfig* trainerConfig); - virtual ~GradientMachine() {} /** @@ -167,6 +134,10 @@ public: backward(callback); } + virtual Argument getLayerOutput(const std::string& layerName) { + return *((Argument*)nullptr); + } + // see comment in Layer.h for the function with the same name virtual void resetState() {} diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp index 80f223824d8dccfb0e9386f4c076b28f9332a958..123273f916f5d33e2543d9f5f28573c3b5761e28 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.cpp @@ -282,6 +282,18 @@ void MultiGradientMachine::forwardBackward(const std::vector& inArgs, backwardImp(callback); } +Argument MultiGradientMachine::getLayerOutput(const std::string& layerName) { + std::vector args; + args.reserve(threads_.size()); + + for (auto& thread : threads_) { + args.push_back(thread->getGradientMachine()->getLayerOutput(layerName)); + } + outLayerArgs_.concat(args, false /* use_gpu */, outArgStream_, passType_); + + return outLayerArgs_; +} + void MultiGradientMachine::backwardImp(const UpdateCallback& callback) { for (size_t i = 0; i < parameters_.size(); i++) { if (!parameters_[i]->useGpu() || parameters_[i]->isStatic()) continue; diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/gserver/gradientmachines/MultiGradientMachine.h index 9be15ef4bcf34f26b7eceb9047252e537f20a4a8..838a52b5153af63adbce5788824b9f541f22517c 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/gserver/gradientmachines/MultiGradientMachine.h @@ -189,6 +189,8 @@ public: PassType passType, const UpdateCallback& callback); + virtual Argument getLayerOutput(const std::string& layerName); + virtual void onPassEnd(); virtual void finish(); @@ -314,6 +316,8 @@ protected: std::vector outArgs_; hl_stream_t outArgStream_; + Argument outLayerArgs_; + /// ParameterType which needs to be merged from each GPU std::vector mergeTypes_; int numDevices_; /* number of gpu devices */ diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 22051e07ee0026bc3c44a8767e265a56b415b8e4..4512aacc81f86bf87fc9ea30adcf081327663f16 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -293,11 +293,10 @@ void NeuralNetwork::backward(const UpdateCallback& callback) { } } -MatrixPtr NeuralNetwork::getLayerOutput(const std::string& layerName) { - auto it = layerMap_.find(layerName); - CHECK(it != layerMap_.end()) << "Cannot find layer: " << layerName; - return it->second->getOutputValue(); +Argument NeuralNetwork::getLayerOutput(const std::string& layerName) { + return getLayer(layerName)->getOutput(); } + void NeuralNetwork::onPassEnd() { for (auto& layer : layers_) { layer->onPassEnd(); @@ -306,7 +305,6 @@ void NeuralNetwork::onPassEnd() { class CombinedEvaluator : public Evaluator { public: - CombinedEvaluator() {} void addEvaluator(std::unique_ptr&& evaluator) { evaluators_.emplace_back(std::move(evaluator)); } @@ -346,6 +344,55 @@ public: protected: std::vector> evaluators_; + + // Evaluator interface +public: + /** + * @brief getNames will return all inside evaluators' names. + * @param names [out]: return names. + */ + void getNames(std::vector* names) { + for (auto& eval : evaluators_) { + eval->getNames(names); + } + } + + /** + * @brief getValue could get all inside evaluators' value. + */ + real getValue(const std::string& name, Error* err) const { + return this->getMethodHelper( + name, err, [&name, err](const std::unique_ptr& eval) { + return eval->getValue(name, err); + }); + } + + /** + * @brief getType could get all inside evaluators' type. + */ + std::string getType(const std::string& name, Error* err) const { + return this->getMethodHelper( + name, err, [&name, err](const std::unique_ptr& eval) { + return eval->getType(name, err); + }); + } + +private: + template + T getMethodHelper(const std::string& name, + Error* err, + const std::function&)>& + callback) const { + for (auto& eval : evaluators_) { + std::vector names; + eval->getNames(&names); + if (std::find(names.begin(), names.end(), name) != names.end()) { + return callback(eval); + } + } + *err = Error("No such key %s", name.c_str()); + return T(); + } }; Evaluator* NeuralNetwork::makeEvaluator() const { diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index 25af4abcf81700e200feea806fa3daed19df1275..e7b6c438407e7eab6eab1f6ed496f35caa9f2177 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -87,7 +87,8 @@ public: virtual void backward(const UpdateCallback& callback = nullptr); - MatrixPtr getLayerOutput(const std::string& layerName); + virtual Argument getLayerOutput(const std::string& layerName); + const LayerPtr& getLayer(const std::string& layerName) const { auto it = layerMap_.find(layerName); CHECK(it != layerMap_.end()) << "Unknown layer " << layerName; diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index a9a9f4f903e305bfe0ee3dd089a85ba524022faa..2ab964b8fc2e080282aa03db4ee6836540e666d7 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -155,7 +155,8 @@ protected: public: explicit BootBiasLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { if (!Layer::init(layerMap, parameterMap)) return false; if (biasParameter_) { @@ -174,7 +175,7 @@ public: } } - virtual void forward(PassType passType) { + void forward(PassType passType) override { if (biases_) { MatrixPtr outV = getOutputValue(); outV->addBias(*(biases_->getW()), 1); @@ -182,7 +183,7 @@ public: } } - virtual void backward(const UpdateCallback& callback) { + void backward(const UpdateCallback& callback) override { if (biases_) { backwardActivation(); biases_->getWGrad()->collectBias(*getOutputGrad(), 1); diff --git a/paddle/gserver/layers/AddtoLayer.h b/paddle/gserver/layers/AddtoLayer.h index 53d3f99cdd3439a1ba85f54526ca65005986c634..4e98c174b462763d3c2714770f66951981afa9f8 100644 --- a/paddle/gserver/layers/AddtoLayer.h +++ b/paddle/gserver/layers/AddtoLayer.h @@ -44,19 +44,20 @@ public: /** * Intialization of AddtoLayer. */ - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; /** * Forward propagation. * @note There is no weight matrix for each input, * because it just a simple add operation. */ - void forward(PassType passType); + void forward(PassType passType) override; /** * Backward propagation. */ - void backward(const UpdateCallback& callback = nullptr); + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/AgentLayer.h b/paddle/gserver/layers/AgentLayer.h index 41683ad6712d5df710737cf71c600790fcc8786f..b6dac7ae6fec2d61c60c9548d466233efe9febd5 100644 --- a/paddle/gserver/layers/AgentLayer.h +++ b/paddle/gserver/layers/AgentLayer.h @@ -35,7 +35,8 @@ public: ~AgentLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; // if *numSamples* set, // real layer output will only use first *numSamples* rows @@ -44,8 +45,8 @@ public: numSamples_ = numSamples; } - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr) {} + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override {} }; /** @@ -56,8 +57,8 @@ public: explicit SequenceAgentLayer(const LayerConfig& config) : AgentLayer(config) {} ~SequenceAgentLayer() {} - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr) {} + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override {} }; /** @@ -78,7 +79,8 @@ public: virtual ~GatherAgentLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; // call before addRealLayer void copyIdAndSequenceInfo(const Argument& input, @@ -88,8 +90,8 @@ public: // add one real layer, can call many times void addRealLayer(LayerPtr layer) { realLayers_.push_back(layer); } - void forward(PassType passType); - void backward(const UpdateCallback& callback); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; }; /** @@ -133,7 +135,8 @@ public: virtual ~ScatterAgentLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; /** * @brief set real layer in generation @@ -182,8 +185,8 @@ public: numSequences_ = numSequences; } - void forward(PassType passType); - void backward(const UpdateCallback& callback); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; }; /** diff --git a/paddle/gserver/layers/AverageLayer.h b/paddle/gserver/layers/AverageLayer.h index b3c4ecec8bc6f56b4563ee9f1ada91e4d8f2cbb5..621e1d7bb12ec5b8c7a6173bd601835d9406e814 100644 --- a/paddle/gserver/layers/AverageLayer.h +++ b/paddle/gserver/layers/AverageLayer.h @@ -38,12 +38,11 @@ public: explicit AverageLayer(const LayerConfig& config) : SequencePoolLayer(config) {} - ~AverageLayer() {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; protected: MatrixPtr outMtx_; diff --git a/paddle/gserver/layers/BatchNormBaseLayer.h b/paddle/gserver/layers/BatchNormBaseLayer.h index 75bda95de1472b08538b48072ddf9ea607b83299..230bafc31d96bbd49481a7ed135be6888688627e 100644 --- a/paddle/gserver/layers/BatchNormBaseLayer.h +++ b/paddle/gserver/layers/BatchNormBaseLayer.h @@ -52,7 +52,8 @@ public: */ static Layer* create(const LayerConfig& config); - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; /** * @brief Calculate feature map size. Some input uses frameHeight and diff --git a/paddle/gserver/layers/BatchNormalizationLayer.h b/paddle/gserver/layers/BatchNormalizationLayer.h index 195acbbfc58db8368f6db1c1595dd6b04801ee26..f6115801fc6b341c0718f8851617de43bdeeec09 100644 --- a/paddle/gserver/layers/BatchNormalizationLayer.h +++ b/paddle/gserver/layers/BatchNormalizationLayer.h @@ -33,9 +33,10 @@ public: ~BatchNormalizationLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; protected: /// Epsilon value used in the batch normalization formula. @@ -58,7 +59,7 @@ protected: /// to batch, channels* imagePixels. void shrinkMat(const MatrixPtr& in, MatrixPtr& out); - void onPassEnd() { firstTest_ = true; } + void onPassEnd() override { firstTest_ = true; } MatrixPtr tmpMat_, tmpGrad_; MatrixPtr expandedIn_, expandedOut_; diff --git a/paddle/gserver/layers/BilinearInterpLayer.h b/paddle/gserver/layers/BilinearInterpLayer.h index 4ff4b0ea793dc901d099bf73d55aa15463e62094..27c269f2781c99e4f166ef1052cbf03a773ad57e 100644 --- a/paddle/gserver/layers/BilinearInterpLayer.h +++ b/paddle/gserver/layers/BilinearInterpLayer.h @@ -38,9 +38,10 @@ public: virtual ~BilinearInterpLayer() {} size_t getSize(); - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/BlockExpandLayer.h b/paddle/gserver/layers/BlockExpandLayer.h index cc96fdd03fcac6925a16f0fb91045f065f74e803..8f347400e60ec84fc1b5fdbc1c911a8768b306d0 100644 --- a/paddle/gserver/layers/BlockExpandLayer.h +++ b/paddle/gserver/layers/BlockExpandLayer.h @@ -58,10 +58,11 @@ public: ~BlockExpandLayer() {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/CRFDecodingLayer.h b/paddle/gserver/layers/CRFDecodingLayer.h index 1fd444ad10e71df2bb6d8bdb839e6f02b33d647f..3cbcac6cf62decd43844cc442fc5e4f973d0acfc 100644 --- a/paddle/gserver/layers/CRFDecodingLayer.h +++ b/paddle/gserver/layers/CRFDecodingLayer.h @@ -32,9 +32,10 @@ namespace paddle { class CRFDecodingLayer : public CRFLayer { public: explicit CRFDecodingLayer(const LayerConfig& config) : CRFLayer(config) {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; protected: std::unique_ptr crf_; diff --git a/paddle/gserver/layers/CRFLayer.h b/paddle/gserver/layers/CRFLayer.h index d21b32b68c1a40c814af3aa2c285612a5f938d79..de36a85083b6b293fd2d8522ec279a38cc4f8be3 100644 --- a/paddle/gserver/layers/CRFLayer.h +++ b/paddle/gserver/layers/CRFLayer.h @@ -29,9 +29,10 @@ namespace paddle { class CRFLayer : public Layer { public: explicit CRFLayer(const LayerConfig& config) : Layer(config) {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; protected: size_t numClasses_; diff --git a/paddle/gserver/layers/CTCLayer.h b/paddle/gserver/layers/CTCLayer.h index 70d429bad656ade3c05256472d799ae72e128be5..f7a515f312d075c54b4aab2557175c70fdbd9875 100644 --- a/paddle/gserver/layers/CTCLayer.h +++ b/paddle/gserver/layers/CTCLayer.h @@ -22,10 +22,11 @@ namespace paddle { class CTCLayer : public Layer { public: explicit CTCLayer(const LayerConfig& config) : Layer(config) {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - virtual void forward(PassType passType); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; void forwardImp(const Argument& softmaxSeqs, const Argument& labelSeqs); - virtual void backward(const UpdateCallback& callback); + void backward(const UpdateCallback& callback) override; void backwardImp(const UpdateCallback& callback, const Argument& softmaxSeqs, const Argument& labelSeqs); diff --git a/paddle/gserver/layers/ConcatenateLayer.cpp b/paddle/gserver/layers/ConcatenateLayer.cpp index d19adace7d58af16736fc2b6e536f5fd69a19863..c5fc4cf4f81a55a4c57e92dce64c06acd404badd 100644 --- a/paddle/gserver/layers/ConcatenateLayer.cpp +++ b/paddle/gserver/layers/ConcatenateLayer.cpp @@ -28,10 +28,11 @@ public: ~ConcatenateLayer() {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(concat, ConcatenateLayer); @@ -101,10 +102,11 @@ public: ~ConcatenateLayer2() {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; protected: std::vector> projections_; diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h index aedf4100e32fa1294c361b6163c14eab7869b803..e9d15d94f806a5d2e6f11cbbfc29e291dfe8538f 100644 --- a/paddle/gserver/layers/ConvBaseLayer.h +++ b/paddle/gserver/layers/ConvBaseLayer.h @@ -80,7 +80,8 @@ protected: public: explicit ConvBaseLayer(const LayerConfig& config) : Layer(config) {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; /** * imgSizeH_ and imgSizeW_ will be set according to the previous input layers diff --git a/paddle/gserver/layers/ConvShiftLayer.cpp b/paddle/gserver/layers/ConvShiftLayer.cpp index 9bfb1ab7a47b11a6793159aefcb4f9fa12b81a6b..002be415691f0b3df93835915dcbc9d455231422 100644 --- a/paddle/gserver/layers/ConvShiftLayer.cpp +++ b/paddle/gserver/layers/ConvShiftLayer.cpp @@ -47,10 +47,11 @@ public: ~ConvShiftLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(conv_shift, ConvShiftLayer); diff --git a/paddle/gserver/layers/ConvexCombinationLayer.cpp b/paddle/gserver/layers/ConvexCombinationLayer.cpp index ed57f2af3c6455fb89fd05b37bb205e8da0bf7e1..32eb3bf604acaa8f2060882b545efeeb40f8218d 100644 --- a/paddle/gserver/layers/ConvexCombinationLayer.cpp +++ b/paddle/gserver/layers/ConvexCombinationLayer.cpp @@ -49,10 +49,11 @@ public: ~ConvexCombinationLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(convex_comb, ConvexCombinationLayer); diff --git a/paddle/gserver/layers/CosSimLayer.cpp b/paddle/gserver/layers/CosSimLayer.cpp index 254120443dc3d41bf2422be2e88cb376d70c93d4..57ba124e40cbd098fa8b0012ff31d6935b16862a 100644 --- a/paddle/gserver/layers/CosSimLayer.cpp +++ b/paddle/gserver/layers/CosSimLayer.cpp @@ -26,15 +26,23 @@ bool CosSimLayer::init(const LayerMap& layerMap, Layer::init(layerMap, parameterMap); CHECK_EQ(inputLayers_.size(), 2LU); + + createFunction(forward_, + "CosSimForward", + FuncConfig().set("scale", (real)config_.cos_scale())); + createFunction(backward_, + "CosSimBackward", + FuncConfig().set("scale", (real)config_.cos_scale())); + return true; } void CosSimLayer::forward(PassType passType) { Layer::forward(passType); - /* malloc memory for the output_ if necessary */ int batchSize = getInputValue(0)->getHeight(); int size = getSize(); + CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; { REGISTER_TIMER_INFO("CosFwResetTimer", getName().c_str()); @@ -42,26 +50,43 @@ void CosSimLayer::forward(PassType passType) { } MatrixPtr outV = getOutputValue(); - /* activation */ { REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str()); MatrixPtr prevOut1 = getInputValue(0); MatrixPtr prevOut2 = getInputValue(1); - outV->cosSim(*prevOut1, *prevOut2, config_.cos_scale()); + + CHECK(outV && prevOut1 && prevOut2); + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*prevOut1); + inputs.addArg(*prevOut2); + outputs.addArg(*outV, ASSIGN_TO); + forward_[0]->calc(inputs, outputs); } } void CosSimLayer::backward(const UpdateCallback& callback) { /* activation */ { REGISTER_TIMER_INFO("CosBpAtvTimer", getName().c_str()); - MatrixPtr outG = this->getOutputGrad(); - - outG->cosSimDerivative(*this->getOutputValue(), - *getInputValue(0), - *getInputValue(1), - *getInputGrad(0), - *getInputGrad(1), - config_.cos_scale()); + CHECK_EQ(backward_.size(), 1UL) << "Only one backward function needed"; + + const auto outG = this->getOutputGrad(); + const auto outV = this->getOutputValue(); + const auto inV1 = this->getInputValue(0); + const auto inV2 = this->getInputValue(1); + auto inG1 = this->getInputGrad(0); + auto inG2 = this->getInputGrad(1); + CHECK(outG && outV && inV1 && inV2 && inG1 && inG2); + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*outG); + inputs.addArg(*outV); + inputs.addArg(*inV1); + inputs.addArg(*inV2); + outputs.addArg(*inG1, ADD_TO); + outputs.addArg(*inG2, ADD_TO); + + backward_[0]->calc(inputs, outputs); } } diff --git a/paddle/gserver/layers/CosSimLayer.h b/paddle/gserver/layers/CosSimLayer.h index 5dcc5d8a5b4dc76cb6cea023a874049731a26516..8afaee62c2dcacba006846df0111fcbe8f7575e4 100644 --- a/paddle/gserver/layers/CosSimLayer.h +++ b/paddle/gserver/layers/CosSimLayer.h @@ -28,7 +28,7 @@ namespace paddle { * * - Input1: A vector (batchSize * dataDim) * * - Input2: A vector (batchSize * dataDim) or (1 * dataDim) * - * - Output: A vector (dataDim * 1) + * - Output: A vector (batchSize * 1) * * The config file api is cos_sim. */ @@ -38,10 +38,11 @@ public: ~CosSimLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/CosSimVecMatLayer.cpp b/paddle/gserver/layers/CosSimVecMatLayer.cpp index ad490b0b8c4656c1eabf519233f2386b4b6e9417..0f887d8adfa053e8fe88ac4fa4e2a9ba08ac07b5 100644 --- a/paddle/gserver/layers/CosSimVecMatLayer.cpp +++ b/paddle/gserver/layers/CosSimVecMatLayer.cpp @@ -18,7 +18,6 @@ limitations under the License. */ #include "paddle/utils/Stat.h" namespace paddle { - /** * @brief A layer for computing cosine similarity between a vector * and each row of a matrix @@ -46,10 +45,11 @@ public: ~CosSimVecMatLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(cos_vm, CosSimVecMatLayer); @@ -97,11 +97,22 @@ bool CosSimVecMatLayer::init(const LayerMap& layerMap, dataDim, /* trans= */ false, useGpu_); + + CHECK(tmpRow0 && tmpRow1 && tmpRow2 && tmpRow3 && tmpMtx0 && tmpMtx1); + + createFunction(forward_, + "CosSimForward", + FuncConfig().set("scale", (real)config_.cos_scale())); + createFunction(backward_, + "CosSimBackward", + FuncConfig().set("scale", (real)config_.cos_scale())); + return true; } void CosSimVecMatLayer::forward(PassType passType) { Layer::forward(passType); + CHECK_EQ(forward_.size(), 1UL) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); @@ -117,17 +128,25 @@ void CosSimVecMatLayer::forward(PassType passType) { } MatrixPtr outV = getOutputValue(); - + CHECK(outV && inV0 && inV1); REGISTER_TIMER_INFO("FwCosVMTimer", getName().c_str()); for (size_t i = 0; i < batchSize; i++) { tmpRow0->setData(inV0->rowBuf(i)); tmpMtx0->setData(inV1->rowBuf(i)); tmpRow2->setData(outV->rowBuf(i)); - tmpRow2->cosSim(*(tmpMtx0), *(tmpRow0), config_.cos_scale()); + + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*tmpMtx0); + inputs.addArg(*tmpRow0); + outputs.addArg(*tmpRow2, ASSIGN_TO); + forward_[0]->calc(inputs, outputs); } } void CosSimVecMatLayer::backward(const UpdateCallback& callback) { + CHECK_EQ(backward_.size(), 1UL) << "Only one forward function needed"; + MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); MatrixPtr inG0 = getInputGrad(0); @@ -136,27 +155,27 @@ void CosSimVecMatLayer::backward(const UpdateCallback& callback) { MatrixPtr outG = getOutputGrad(); size_t batchSize = inV0->getHeight(); - + CHECK(inV0 && inV1 && inG0 && inG1 && outV && outG); REGISTER_TIMER_INFO("BwCosVMTimer", getName().c_str()); - if (inG0 && inG1) { - for (size_t i = 0; i < batchSize; i++) { - tmpRow0->setData(inV0->rowBuf(i)); - tmpRow1->setData(inG0->rowBuf(i)); - tmpMtx0->setData(inV1->rowBuf(i)); - tmpMtx1->setData(inG1->rowBuf(i)); - tmpRow2->setData(outV->rowBuf(i)); - tmpRow3->setData(outG->rowBuf(i)); - - tmpRow3->cosSimDerivative(*(tmpRow2), - *(tmpMtx0), - *(tmpRow0), - *(tmpMtx1), - *(tmpRow1), - config_.cos_scale()); - } - } else { - CHECK(!inG0 || !inG1) << "Not supported"; + for (size_t i = 0; i < batchSize; i++) { + tmpRow0->setData(inV0->rowBuf(i)); + tmpRow1->setData(inG0->rowBuf(i)); + tmpMtx0->setData(inV1->rowBuf(i)); + tmpMtx1->setData(inG1->rowBuf(i)); + tmpRow2->setData(outV->rowBuf(i)); + tmpRow3->setData(outG->rowBuf(i)); + + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*tmpRow3); + inputs.addArg(*tmpRow2); + inputs.addArg(*tmpMtx0); + inputs.addArg(*tmpRow0); + outputs.addArg(*tmpMtx1, ADD_TO); + outputs.addArg(*tmpRow1, ADD_TO); + + backward_[0]->calc(inputs, outputs); } } diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 7e9519f6b3af50bf47b660b285c3593087f80271..998b8d7d3034cb18fbab242c66656092bfc50fcb 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -367,8 +367,6 @@ void LambdaCost::backward(const UpdateCallback& callback) { getInputGrad(0)->add(*marginGrad_); } -void LambdaCost::onPassEnd() {} - void LambdaCost::calcGrad(const real* outputScore, const real* score, real* gradData, @@ -611,14 +609,15 @@ class SumCostLayer : public Layer { public: explicit SumCostLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { bool ret = Layer::init(layerMap, parameterMap); if (!ret) return ret; CHECK_EQ(inputLayers_.size(), 1UL); return true; } - virtual void forward(PassType passType) { + void forward(PassType passType) override { Layer::forward(passType); const MatrixPtr& input = getInputValue(0); @@ -629,7 +628,7 @@ public: output_.value->sumRows(*input, /* scaleSum= */ 1, /* scaleDest= */ 0); } - virtual void backward(const UpdateCallback& callback = nullptr) { + void backward(const UpdateCallback& callback = nullptr) override { getInputGrad(0)->add((real)1); } }; diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index 7f73bdb3f7d63ef1c8d76deb64f40d19d20f87c7..b3045e0b31308abf2caa90cbd21f105e685ef341 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -32,15 +32,16 @@ class CostLayer : public Layer { public: explicit CostLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; LayerPtr getOutputLayer() { return inputLayers_[0]; } LayerPtr getLabelLayer() { return inputLayers_[1]; } - virtual void forward(PassType passType); + void forward(PassType passType) override; - virtual void backward(const UpdateCallback& callback = nullptr); + void backward(const UpdateCallback& callback = nullptr) override; virtual void forwardImp(Matrix& outputValue, Argument& label, @@ -68,11 +69,14 @@ public: explicit MultiClassCrossEntropy(const LayerConfig& config) : CostLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forwardImp(Matrix& output, Argument& label, Matrix& cost); + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad); + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; }; /** @@ -95,11 +99,14 @@ public: explicit MultiClassCrossEntropyWithSelfNorm(const LayerConfig& config) : CostLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forwardImp(Matrix& output, Argument& label, Matrix& cost); + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad); + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; protected: MatrixPtr sftMaxSum_; @@ -117,11 +124,14 @@ public: explicit SoftBinaryClassCrossEntropy(const LayerConfig& config) : CostLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forwardImp(Matrix& output, Argument& label, Matrix& cost); + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad); + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; protected: MatrixPtr targetPerDim_; @@ -139,11 +149,14 @@ public: explicit SumOfSquaresCostLayer(const LayerConfig& config) : CostLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forwardImp(Matrix& output, Argument& label, Matrix& cost); + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad); + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; }; /** @@ -162,17 +175,18 @@ class RankingCost : public Layer { public: explicit RankingCost(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; LayerPtr getOutputLayer(size_t i) { return inputLayers_[i]; } LayerPtr getLabelLayer() { return inputLayers_[2]; } - void forward(PassType passType); + void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr); + void backward(const UpdateCallback& callback = nullptr) override; - void onPassEnd(); + void onPassEnd() override; void forwardImp(Matrix& output, Argument& label, Matrix& cost) { (void)output; @@ -214,17 +228,16 @@ class LambdaCost : public Layer { public: explicit LambdaCost(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; LayerPtr getOutputLayer() { return inputLayers_[0]; } LayerPtr getScoreLayer() { return inputLayers_[1]; } - void forward(PassType passType); + void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr); - - void onPassEnd(); + void backward(const UpdateCallback& callback = nullptr) override; real calcNDCG(const real* outputScore, const real* score, int size); void calcGrad(const real* outputScore, @@ -256,11 +269,14 @@ public: explicit MultiBinaryLabelCrossEntropy(const LayerConfig& config) : CostLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forwardImp(Matrix& output, Argument& label, Matrix& cost); + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad); + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; }; /** @@ -282,13 +298,16 @@ class HuberTwoClass : public CostLayer { public: explicit HuberTwoClass(const LayerConfig& config) : CostLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forwardImp(Matrix& output, Argument& label, Matrix& cost); + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; void forwardImpIn(Matrix& output, Argument& label, Matrix& cost); - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad); + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; void backwardImpIn(Matrix& outputValue, Argument& label, Matrix& outputGrad); }; diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.h b/paddle/gserver/layers/CudnnBatchNormLayer.h index b1e7d2082f1443313bfc858a17adfd737ecff98f..413efd4d3ecd734b343efbcf8328ac0592daddda 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.h +++ b/paddle/gserver/layers/CudnnBatchNormLayer.h @@ -35,14 +35,15 @@ public: ~CudnnBatchNormLayer(); - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; /** * reshape tensor of ioDesc_. */ void reshape(int batchSize); - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; protected: /** diff --git a/paddle/gserver/layers/CudnnConvLayer.h b/paddle/gserver/layers/CudnnConvLayer.h index b869c695bd753076c6501a1253fcad22139ccadf..919b1efc4e453219a6c2ab1a11c61ccb99404084 100644 --- a/paddle/gserver/layers/CudnnConvLayer.h +++ b/paddle/gserver/layers/CudnnConvLayer.h @@ -45,9 +45,10 @@ public: ~CudnnConvLayer(); - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - void forward(PassType passType); - void backward(const UpdateCallback& callback); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; void addBiases(); void bpropBiases(); }; diff --git a/paddle/gserver/layers/CudnnPoolLayer.h b/paddle/gserver/layers/CudnnPoolLayer.h index 072b2f9513f4ef8aed03ecfa7a9014667bb2ce9e..f0aa22fe3af90c9233330c15fc56c3696a624446 100644 --- a/paddle/gserver/layers/CudnnPoolLayer.h +++ b/paddle/gserver/layers/CudnnPoolLayer.h @@ -45,7 +45,8 @@ public: hl_pooling_mode_t* mode = nullptr); explicit CudnnPoolLayer(const LayerConfig& config); ~CudnnPoolLayer(); - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; /** * Reshape input and output tensor descriptor. @@ -53,8 +54,8 @@ public: * So reshaping is needed. */ void reshape(int batchSize); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/DataLayer.h b/paddle/gserver/layers/DataLayer.h index d3bc97bb6cd0b8faf8ae108a0147d77854596e25..a9cf1f943c260a934564a19aecda28c24ccff43c 100644 --- a/paddle/gserver/layers/DataLayer.h +++ b/paddle/gserver/layers/DataLayer.h @@ -33,13 +33,13 @@ public: /** * Prefetch sparse matrix/ids only. */ - void prefetch() { output_ = data_; } + void prefetch() override { output_ = data_; } /** * Forward propagation. Copy data_ (value, in, grad, ids, cpuSequenceDims, * sequenceStartPositions, subSequenceStartPositions, strs) to output_. */ - virtual void forward(PassType passType) { + void forward(PassType passType) override { Layer::forward(passType); copyDataToOutput(output_); if (FLAGS_show_layer_stat) { @@ -50,9 +50,9 @@ public: /** * Data layer's backward propagation do nothing. */ - virtual void backward(const UpdateCallback& callback) { (void)callback; } + void backward(const UpdateCallback& callback) override { (void)callback; } - virtual void copyOutputToOtherDevice() { + void copyOutputToOtherDevice() override { for (size_t i = 0; i != outputOtherDevice_.size(); i++) { copyDataToOutput(outputOtherDevice_[i]); } diff --git a/paddle/gserver/layers/DataNormLayer.h b/paddle/gserver/layers/DataNormLayer.h index b3043cffd210feaf9ddaed096de762aa7e2a6139..f0fd044e5b83430a4028a227c7d5a31b6fa86f20 100644 --- a/paddle/gserver/layers/DataNormLayer.h +++ b/paddle/gserver/layers/DataNormLayer.h @@ -44,10 +44,11 @@ public: ~DataNormLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; protected: int mode_; diff --git a/paddle/gserver/layers/EosIdCheckLayer.cpp b/paddle/gserver/layers/EosIdCheckLayer.cpp index fa53e2e4cfc8a220eeb2a637d7fe759f1744f9d5..686f1fa0543cb3629ac223316e595e642a9e7d76 100644 --- a/paddle/gserver/layers/EosIdCheckLayer.cpp +++ b/paddle/gserver/layers/EosIdCheckLayer.cpp @@ -27,14 +27,14 @@ class EosIdCheckLayer : public Layer { public: explicit EosIdCheckLayer(const LayerConfig& config) : Layer(config) {} - virtual bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { bool ret = Layer::init(layerMap, parameterMap); CHECK_EQ(1UL, inputLayers_.size()); return ret; } - virtual void forward(PassType passType) { + void forward(PassType passType) override { Layer::forward(passType); const Argument& input = getInput(0); @@ -42,7 +42,7 @@ public: output_.ids->isEqualTo(*input.ids, config_.eos_id()); } - virtual void backward(const UpdateCallback& callback) {} + void backward(const UpdateCallback& callback) override {} }; REGISTER_LAYER(eos_id, EosIdCheckLayer); diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.h b/paddle/gserver/layers/ExpandConvBaseLayer.h index 8445642217cf3e83441ddd9beec80f99faf946bc..aabcdfc392d3e242df84c820c336d8b32c7cb04f 100644 --- a/paddle/gserver/layers/ExpandConvBaseLayer.h +++ b/paddle/gserver/layers/ExpandConvBaseLayer.h @@ -48,7 +48,8 @@ public: ~ExpandConvBaseLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; size_t getOutputSize(); /** diff --git a/paddle/gserver/layers/ExpandConvLayer.h b/paddle/gserver/layers/ExpandConvLayer.h index de81a017e1bac38a5717e8c83a028f5408c0e084..60681690e5dd55b2e9aa4e1f25758db6033665a6 100644 --- a/paddle/gserver/layers/ExpandConvLayer.h +++ b/paddle/gserver/layers/ExpandConvLayer.h @@ -35,10 +35,11 @@ public: ~ExpandConvLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvTransLayer.h b/paddle/gserver/layers/ExpandConvTransLayer.h index 4a527d67995e255c65fea1f310551f8de5630030..00b8f241889fdd3f423d75dedd9068aa3674f190 100644 --- a/paddle/gserver/layers/ExpandConvTransLayer.h +++ b/paddle/gserver/layers/ExpandConvTransLayer.h @@ -34,10 +34,11 @@ public: ~ExpandConvTransLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/ExpandLayer.h b/paddle/gserver/layers/ExpandLayer.h index 5c636144235cdb3800aa921464985616f8ee9203..c21b3350e2bc4b136eaf50f96799f479a13df6bd 100644 --- a/paddle/gserver/layers/ExpandLayer.h +++ b/paddle/gserver/layers/ExpandLayer.h @@ -53,10 +53,11 @@ public: ~ExpandLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/FeatureMapExpandLayer.cpp b/paddle/gserver/layers/FeatureMapExpandLayer.cpp index d023074c52167554358d0d4df7ec40cfba9da2a6..b3850f543af74abbddaac5bb0a32851f2d3297d0 100644 --- a/paddle/gserver/layers/FeatureMapExpandLayer.cpp +++ b/paddle/gserver/layers/FeatureMapExpandLayer.cpp @@ -46,10 +46,11 @@ public: ~FeatureMapExpandLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(featmap_expand, FeatureMapExpandLayer); @@ -95,6 +96,9 @@ void FeatureMapExpandLayer::forward(PassType passType) { void FeatureMapExpandLayer::backward(const UpdateCallback& callback) { MatrixPtr inGrad = getInputGrad(0); + if (NULL == inGrad) { + return; + } MatrixPtr outGrad = getOutputGrad(); size_t batchSize = getInput(0).getBatchSize(); int imgSize = inGrad->getWidth(); diff --git a/paddle/gserver/layers/FullyConnectedLayer.h b/paddle/gserver/layers/FullyConnectedLayer.h index ccd584585c97cb679332cbd10d6f3a1306ca5a54..64e7a050125aa92b414e58c7678bf87efd01103f 100644 --- a/paddle/gserver/layers/FullyConnectedLayer.h +++ b/paddle/gserver/layers/FullyConnectedLayer.h @@ -36,13 +36,14 @@ public: explicit FullyConnectedLayer(const LayerConfig& config) : Layer(config) {} ~FullyConnectedLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; Weight& getWeight(int idx) { return *weights_[idx]; } - void prefetch(); - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void prefetch() override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/GatedRecurrentLayer.cpp b/paddle/gserver/layers/GatedRecurrentLayer.cpp index 930d9a056164e7c677adb53b7b67901364da1309..d3aeea921801da301b2829736059130aec14cef6 100644 --- a/paddle/gserver/layers/GatedRecurrentLayer.cpp +++ b/paddle/gserver/layers/GatedRecurrentLayer.cpp @@ -314,13 +314,13 @@ void GatedRecurrentLayer::forwardBatch(int batchSize, batchValue_->resizeOrCreate(*output_.value); batchValue_->copy(*inputValue, *gate_.value, /* seq2batch */ true); - if (bias_ && bias_->getWGrad()) { + if (bias_) { gate_.value->addBias(*(bias_->getW()), 1); } { int numBatch = batchValue_->getNumBatch(); - int batchSize = 0; + int curBatchSize = 0; AsyncGpuBlock asyncGpuBlock; for (int n = 0; n < numBatch; n++) { MatrixPtr outputValueTmp = batchValue_->getBatchValue(n); @@ -330,16 +330,17 @@ void GatedRecurrentLayer::forwardBatch(int batchSize, gruValue.resetOutputValue = (batchValue_->getBatchValue(*resetOutput_.value, n))->getData(); - batchSize = outputValueTmp->getHeight(); + curBatchSize = outputValueTmp->getHeight(); gruValue.prevOutValue = - (n == 0 ? nullptr - : (batchValue_->getBatchValue(n - 1, batchSize))->getData()); + (n == 0 + ? nullptr + : (batchValue_->getBatchValue(n - 1, curBatchSize))->getData()); { if (useGpu_) { - GruCompute::forward<1>(gruValue, getSize(), batchSize); + GruCompute::forward<1>(gruValue, getSize(), curBatchSize); } else { - GruCompute::forward<0>(gruValue, getSize(), batchSize); + GruCompute::forward<0>(gruValue, getSize(), curBatchSize); } } } diff --git a/paddle/gserver/layers/GatedRecurrentLayer.h b/paddle/gserver/layers/GatedRecurrentLayer.h index 25770ce57fbaa4d16c9454d824800f2f0c7f957d..58dd760eb870e9570f8a406f098f69c5fdf6477a 100644 --- a/paddle/gserver/layers/GatedRecurrentLayer.h +++ b/paddle/gserver/layers/GatedRecurrentLayer.h @@ -50,17 +50,18 @@ class GatedRecurrentLayer : public Layer, public GruCompute { public: explicit GatedRecurrentLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); + void forward(PassType passType) override; - void backward(const UpdateCallback& callback); + void backward(const UpdateCallback& callback) override; - void resetState(); + void resetState() override; - void setState(LayerStatePtr state); + void setState(LayerStatePtr state) override; - LayerStatePtr getState(); + LayerStatePtr getState() override; protected: void forwardSequence(int batchSize, diff --git a/paddle/gserver/layers/GetOutputLayer.cpp b/paddle/gserver/layers/GetOutputLayer.cpp index b77fdbb30e11b72b0c7de765df173204aa0b6851..4e29efd4612b18e655ba7674a3fd7890ce3f0e79 100644 --- a/paddle/gserver/layers/GetOutputLayer.cpp +++ b/paddle/gserver/layers/GetOutputLayer.cpp @@ -22,17 +22,18 @@ public: ~GetOutputLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { if (!Layer::init(layerMap, parameterMap)) return false; CHECK_EQ(1U, inputLayers_.size()); CHECK_NE(inputArgument_[0], ""); return true; } - void forward(PassType passType) { + void forward(PassType passType) override { output_ = getPrev(0)->getOutput(inputArgument_[0]); } - void backward(const UpdateCallback& callback = nullptr) {} + void backward(const UpdateCallback& callback = nullptr) override {} }; REGISTER_LAYER(get_output, GetOutputLayer); diff --git a/paddle/gserver/layers/GruStepLayer.cpp b/paddle/gserver/layers/GruStepLayer.cpp index 4a1006aa941f396c233a0cecfc38228f1f9fafe1..5b5cb25f9269a30f79d602b342411d0e6bfa429b 100644 --- a/paddle/gserver/layers/GruStepLayer.cpp +++ b/paddle/gserver/layers/GruStepLayer.cpp @@ -55,10 +55,11 @@ public: ~GruStepLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(gru_step, GruStepLayer); diff --git a/paddle/gserver/layers/HierarchicalSigmoidLayer.h b/paddle/gserver/layers/HierarchicalSigmoidLayer.h index 70da3ac126e147387b20c5a97d0116a5a679e044..3f6875fb9f007c0938bfcd7cad99c73b4ba1511b 100644 --- a/paddle/gserver/layers/HierarchicalSigmoidLayer.h +++ b/paddle/gserver/layers/HierarchicalSigmoidLayer.h @@ -61,9 +61,10 @@ class HierarchicalSigmoidLayer : public Layer { public: explicit HierarchicalSigmoidLayer(const LayerConfig& config) : Layer(config) {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; protected: /** diff --git a/paddle/gserver/layers/InterpolationLayer.cpp b/paddle/gserver/layers/InterpolationLayer.cpp index 44fe1fb1fea4203a4a1cac67c581b13adda65966..eac7428571980baf6b2ddb8b2cc85b9c98afa5d6 100644 --- a/paddle/gserver/layers/InterpolationLayer.cpp +++ b/paddle/gserver/layers/InterpolationLayer.cpp @@ -43,10 +43,11 @@ public: ~InterpolationLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(interpolation, InterpolationLayer); diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp index c47943f81c01589eada4b825d54be5c69314b6fa..f76d41ad3e8a3b1730f9d50c0773ee4f61ddb541 100644 --- a/paddle/gserver/layers/Layer.cpp +++ b/paddle/gserver/layers/Layer.cpp @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/utils/Util.h" #include "paddle/math/SparseMatrix.h" +#include "paddle/utils/Error.h" #include "paddle/utils/Logging.h" #include "AddtoLayer.h" @@ -334,7 +335,8 @@ void Layer::showOutputStats() { void Layer::forwardActivation() { /* activation */ - activation_->forward(output_); + auto status = activation_->forward(output_); + status.check(); /* dropout */ if (config_.drop_rate() > 0) { @@ -372,7 +374,8 @@ void Layer::backwardActivation() { oGrad->dotMul(*oGrad, *dropOutMask_); } - activation_->backward(output_); + auto status = activation_->backward(output_); + status.check(); } void Layer::forwardDropOut() { diff --git a/paddle/gserver/layers/Layer.h b/paddle/gserver/layers/Layer.h index 6dfd48fb96618102b71e9f6de79a348dc7f62647..7c4bea072157aac17787afab184b51c09ff656f2 100644 --- a/paddle/gserver/layers/Layer.h +++ b/paddle/gserver/layers/Layer.h @@ -311,6 +311,7 @@ public: return *output->second; } else { LOG(FATAL) << "No specific output " << str; + return *((Argument*)nullptr); } } } diff --git a/paddle/gserver/layers/LstmLayer.h b/paddle/gserver/layers/LstmLayer.h index f49df2c412f05f74da455d41cdf7c9bd4b9ec2e2..c45a52d2e9aaf41a8e02495cc2deae60ab13650a 100644 --- a/paddle/gserver/layers/LstmLayer.h +++ b/paddle/gserver/layers/LstmLayer.h @@ -74,17 +74,18 @@ class LstmLayer : public Layer, public LstmCompute { public: explicit LstmLayer(const LayerConfig &config) : Layer(config) {} - bool init(const LayerMap &layerMap, const ParameterMap ¶meterMap); + bool init(const LayerMap &layerMap, + const ParameterMap ¶meterMap) override; - void forward(PassType passType); + void forward(PassType passType) override; - void backward(const UpdateCallback &callback); + void backward(const UpdateCallback &callback) override; - void resetState(); + void resetState() override; - void setState(LayerStatePtr state); + void setState(LayerStatePtr state) override; - LayerStatePtr getState(); + LayerStatePtr getState() override; protected: /** diff --git a/paddle/gserver/layers/LstmStepLayer.cpp b/paddle/gserver/layers/LstmStepLayer.cpp index 5fc6474b8653f4c7dac284e11d88f803405169a3..568277a90c62c73a811dcbf66782a4bdc4021b81 100644 --- a/paddle/gserver/layers/LstmStepLayer.cpp +++ b/paddle/gserver/layers/LstmStepLayer.cpp @@ -35,10 +35,11 @@ public: ~LstmStepLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(lstm_step, LstmStepLayer); diff --git a/paddle/gserver/layers/MDLstmLayer.cpp b/paddle/gserver/layers/MDLstmLayer.cpp index fb41af563195496a57eafcc52b49eadac697fa0a..be0f2a07d4aae253b7b18dbe406c4b94bf96bc8e 100644 --- a/paddle/gserver/layers/MDLstmLayer.cpp +++ b/paddle/gserver/layers/MDLstmLayer.cpp @@ -181,11 +181,12 @@ class MDLstmLayer : public LstmLayer { public: explicit MDLstmLayer(const LayerConfig& config) : LstmLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); + void forward(PassType passType) override; - void backward(const UpdateCallback& callback); + void backward(const UpdateCallback& callback) override; protected: void forwardOneSequence(int start, CoordIterator& coordIter); @@ -506,9 +507,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start, *frameState_[start + preOffsetV[i]].value, *checkFgOneDim, 1.0, 1.0); } } - activationGate_->forward(frameInputGate_[idxCurr]); - activationGate_->forward(frameForgetGate_[idxCurr]); - activation_->forward(frameInputNode_[idxCurr]); + auto status = activationGate_->forward(frameInputGate_[idxCurr]); + status.check(); + status = activationGate_->forward(frameForgetGate_[idxCurr]); + status.check(); + status = activation_->forward(frameInputNode_[idxCurr]); + status.check(); frameState_[idxCurr].value->zeroMem(); for (int i = 0; i < numDims_; i++) { @@ -530,10 +534,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start, frameOutputGate_[idxCurr].value->addDotMul( *frameState_[idxCurr].value, *checkOg_, 1.0, 1.0); - activationGate_->forward(frameOutputGate_[idxCurr]); + status = activationGate_->forward(frameOutputGate_[idxCurr]); + status.check(); framePreOutput_[idxCurr].value->copyFrom(*(frameState_[idxCurr].value)); - activationState_->forward(framePreOutput_[idxCurr]); + status = activationState_->forward(framePreOutput_[idxCurr]); + status.check(); frameOutput_[idxCurr].value->dotMul(*framePreOutput_[idxCurr].value, *frameOutputGate_[idxCurr].value); @@ -640,12 +646,12 @@ void MDLstmLayer::backwardGate2OutputSequence(int start, framePreOutput_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad, *frameOutputGate_[idxCurr].value); - activationState_->backward(framePreOutput_[idxCurr]); + activationState_->backward(framePreOutput_[idxCurr]).check(); frameState_[idxCurr].grad->copyFrom(*(framePreOutput_[idxCurr].grad)); frameOutputGate_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad, *framePreOutput_[idxCurr].value); - activationGate_->backward(frameOutputGate_[idxCurr]); + activationGate_->backward(frameOutputGate_[idxCurr]).check(); frameState_[idxCurr].grad->addDotMul( *frameOutputGate_[idxCurr].grad, *checkOg_, 1.0, 1.0); @@ -702,9 +708,9 @@ void MDLstmLayer::backwardGate2OutputSequence(int start, } } - activationGate_->backward(frameInputGate_[idxCurr]); - activationGate_->backward(frameForgetGate_[idxCurr]); - activation_->backward(frameInputNode_[idxCurr]); + activationGate_->backward(frameInputGate_[idxCurr]).check(); + activationGate_->backward(frameForgetGate_[idxCurr]).check(); + activation_->backward(frameInputNode_[idxCurr]).check(); if (bias_->getWGrad()) { for (int i = 0; i < numDims_; i++) { diff --git a/paddle/gserver/layers/MaxIdLayer.cpp b/paddle/gserver/layers/MaxIdLayer.cpp index 80555f3f7b324100c059c3356a4a2e462bc6face..9e72b167cd963ae4928bf85503214dd7cee31148 100644 --- a/paddle/gserver/layers/MaxIdLayer.cpp +++ b/paddle/gserver/layers/MaxIdLayer.cpp @@ -30,8 +30,8 @@ private: public: explicit MaxIdLayer(const LayerConfig& config) : Layer(config) {} - virtual bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { bool ret = Layer::init(layerMap, parameterMap); CHECK_EQ(1UL, inputLayers_.size()); @@ -40,7 +40,7 @@ public: return ret; } - virtual void forward(PassType passType) { + void forward(PassType passType) override { Layer::forward(passType); const Argument& input = getInput(0); size_t batchSize = input.getBatchSize(); @@ -54,7 +54,7 @@ public: input.value->rowMax(*output_.ids, *output_.in); } - virtual void backward(const UpdateCallback& callback) {} + void backward(const UpdateCallback& callback) override {} }; REGISTER_LAYER(maxid, MaxIdLayer); diff --git a/paddle/gserver/layers/MaxLayer.h b/paddle/gserver/layers/MaxLayer.h index 472ee0ccca196250f4b81fc1e921aaee5f352b7e..baa58ca2d7a6970f0d2f3ef6f8609404c82efa30 100644 --- a/paddle/gserver/layers/MaxLayer.h +++ b/paddle/gserver/layers/MaxLayer.h @@ -42,14 +42,13 @@ protected: public: explicit MaxLayer(const LayerConfig& config) : SequencePoolLayer(config) {} - ~MaxLayer() {} - - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { return SequencePoolLayer::init(layerMap, parameterMap); } - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/MaxOutLayer.h b/paddle/gserver/layers/MaxOutLayer.h index 59c2245e0d6490d4f8e1b77b1c88267747aaa63a..73fd8536be56b2c620fbfdea1937f3acd593bf05 100644 --- a/paddle/gserver/layers/MaxOutLayer.h +++ b/paddle/gserver/layers/MaxOutLayer.h @@ -45,10 +45,11 @@ public: explicit MaxOutLayer(const LayerConfig& config) : Layer(config) {} virtual ~MaxOutLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/MixedLayer.h b/paddle/gserver/layers/MixedLayer.h index 9655a152c7bc96fb3941fcbd9db4ff71a59e4ebe..755c9deb8b1be34b6f44a7b30b107f99102a3853 100644 --- a/paddle/gserver/layers/MixedLayer.h +++ b/paddle/gserver/layers/MixedLayer.h @@ -35,21 +35,22 @@ public: ~MixedLayer() {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - virtual void prefetch(); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); - virtual void resetState(); + void prefetch() override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + void resetState() override; /** * setState() should be called after getState(). * Argument state consists of all projections states. */ - virtual void setState(LayerStatePtr state); + void setState(LayerStatePtr state) override; /** * Return state which consists of all projections states. */ - virtual LayerStatePtr getState(); + LayerStatePtr getState() override; protected: std::vector> projections_; diff --git a/paddle/gserver/layers/MultiplexLayer.cpp b/paddle/gserver/layers/MultiplexLayer.cpp index d09720c5255747df11d4d7367f67a245e63e6846..297972b3cd9e4dfba94e2597053ab7c7c560c9dd 100644 --- a/paddle/gserver/layers/MultiplexLayer.cpp +++ b/paddle/gserver/layers/MultiplexLayer.cpp @@ -69,10 +69,11 @@ public: ~MultiplexLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; private: /** diff --git a/paddle/gserver/layers/NCELayer.cpp b/paddle/gserver/layers/NCELayer.cpp index 5ab765247f63dfe6e6651ca4d27dc7183a9f33e1..0bc2ef11829337d9b765ef00066289494eb984b3 100644 --- a/paddle/gserver/layers/NCELayer.cpp +++ b/paddle/gserver/layers/NCELayer.cpp @@ -61,7 +61,8 @@ public: rand_(0, config.num_classes() - 1), prepared_(false) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { /* Initialize the basic parent class */ Layer::init(layerMap, parameterMap); @@ -146,7 +147,7 @@ public: prepared_ = true; } - void prefetch() { + void prefetch() override { prepareSamples(); IVector::resizeOrCreate(labelIds_, samples_.size(), useGpu_); int* ids = labelIds_->getData(); @@ -163,7 +164,7 @@ public: } } - void forward(PassType passType) { + void forward(PassType passType) override { Layer::forward(passType); CHECK(!useGpu_) << "GPU is not supported"; @@ -193,12 +194,13 @@ public: forwardOneInput(l); } - activation_->forward(sampleOut_); + auto status = activation_->forward(sampleOut_); + status.check(); forwardCost(); } - void backward(const UpdateCallback& callback) { + void backward(const UpdateCallback& callback) override { Matrix::resizeOrCreate(sampleOut_.grad, 1, samples_.size(), @@ -207,7 +209,8 @@ public: backwardCost(); - activation_->backward(sampleOut_); + auto status = activation_->backward(sampleOut_); + status.check(); if (biases_->getWGrad()) { backwardBias(callback); diff --git a/paddle/gserver/layers/NormLayer.h b/paddle/gserver/layers/NormLayer.h index 011bab8fdedab00b336290a245b82de07496b554..e77faaa322570933b3ea2de877b7859857306432 100644 --- a/paddle/gserver/layers/NormLayer.h +++ b/paddle/gserver/layers/NormLayer.h @@ -30,7 +30,8 @@ class NormLayer : public Layer { public: explicit NormLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { Layer::init(layerMap, parameterMap); return true; } @@ -56,9 +57,10 @@ protected: public: explicit ResponseNormLayer(const LayerConfig& config) : NormLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - void forward(PassType passType) { LOG(FATAL) << "Not implemented"; } - void backward(const UpdateCallback& callback = nullptr) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override { LOG(FATAL) << "Not implemented"; } + void backward(const UpdateCallback& callback = nullptr) override { LOG(FATAL) << "Not implemented"; } }; diff --git a/paddle/gserver/layers/NormProjectionLayer.h b/paddle/gserver/layers/NormProjectionLayer.h index 2c0d8a3a718c484508b2bf6d4e7861d54a1682bb..2997ae8848c438fa13037ccf03c1faca9ad73224 100644 --- a/paddle/gserver/layers/NormProjectionLayer.h +++ b/paddle/gserver/layers/NormProjectionLayer.h @@ -36,9 +36,10 @@ public: size_t getSize(); - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; protected: TensorShape shape_; diff --git a/paddle/gserver/layers/OuterProdLayer.cpp b/paddle/gserver/layers/OuterProdLayer.cpp index b606e4436567eb2a8df9fd501a2af8c8aa1d2fdf..283fdb003a2bb9474eac7a379ceb2c02027cfc5f 100644 --- a/paddle/gserver/layers/OuterProdLayer.cpp +++ b/paddle/gserver/layers/OuterProdLayer.cpp @@ -38,10 +38,11 @@ public: ~OuterProdLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(out_prod, OuterProdLayer); diff --git a/paddle/gserver/layers/PadLayer.cpp b/paddle/gserver/layers/PadLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bb618c09f9777785d93995fa7140dd4a5383cd1b --- /dev/null +++ b/paddle/gserver/layers/PadLayer.cpp @@ -0,0 +1,115 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "PadLayer.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +REGISTER_LAYER(pad, PadLayer); + +bool PadLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + + auto& pad_conf = config_.inputs(0).pad_conf(); + auto& img_conf = pad_conf.image_conf(); + CHECK_EQ(config_.inputs_size(), 1); + inDims_ = TensorShape( + {0, + img_conf.channels(), + img_conf.has_img_size_y() ? img_conf.img_size_y() : img_conf.img_size(), + img_conf.img_size()}); + + CHECK_EQ(2, pad_conf.pad_c_size()); + CHECK_EQ(2, pad_conf.pad_h_size()); + CHECK_EQ(2, pad_conf.pad_w_size()); + padc_.push_back(pad_conf.pad_c(0)); + padc_.push_back(pad_conf.pad_c(1)); + padh_.push_back(pad_conf.pad_h(0)); + padh_.push_back(pad_conf.pad_h(1)); + padw_.push_back(pad_conf.pad_w(0)); + padw_.push_back(pad_conf.pad_w(1)); + + outDims_ = TensorShape(4); + setOutDims(0); + + createFunction(forward_, + "Pad", + FuncConfig() + .set("cstart", padc_[0]) + .set("cend", padc_[1]) + .set("hstart", padh_[0]) + .set("hend", padh_[1]) + .set("wstart", padw_[0]) + .set("wend", padw_[1])); + createFunction(backward_, + "PadGrad", + FuncConfig() + .set("cstart", padc_[0]) + .set("cend", padc_[1]) + .set("hstart", padh_[0]) + .set("hend", padh_[1]) + .set("wstart", padw_[0]) + .set("wend", padw_[1])); + + return true; +} + +void PadLayer::setOutDims(const size_t batchSize) { + outDims_.reshape({batchSize, + inDims_[1] + padc_[0] + padc_[1], + inDims_[2] + padh_[0] + padh_[1], + inDims_[3] + padw_[0] + padw_[1]}); +} + +void PadLayer::setTensorDim(const size_t batchSize) { + CHECK_EQ(static_cast(inputLayers_.size()), 1); + inDims_.setDim(0, batchSize); + int h = inputLayers_[0]->getOutput().getFrameHeight(); + if (h != 0) inDims_.setDim(2, h); + int w = inputLayers_[0]->getOutput().getFrameWidth(); + if (w != 0) inDims_.setDim(3, w); + setOutDims(batchSize); +} + +void PadLayer::forward(PassType passType) { + Layer::forward(passType); + MatrixPtr input = inputLayers_[0]->getOutputValue(); + size_t batchSize = input->getHeight(); + setTensorDim(batchSize); + int size = outDims_[1] * outDims_[2] * outDims_[3]; + resetOutput(batchSize, size); + MatrixPtr outV = getOutputValue(); + REGISTER_TIMER_INFO("PadForward", getName().c_str()); + + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getInputValue(0), inDims_); + outputs.addArg(*getOutputValue(), outDims_, ASSIGN_TO); + forward_[0]->calc(inputs, outputs); +} + +void PadLayer::backward(const UpdateCallback& callback) { + (void)callback; + REGISTER_TIMER_INFO("PadBackward", getName().c_str()); + + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*getOutputGrad(), outDims_); + outputs.addArg(*getInputGrad(0), inDims_, ADD_TO); + backward_[0]->calc(inputs, outputs); +} +} // namespace paddle diff --git a/paddle/gserver/layers/PadLayer.h b/paddle/gserver/layers/PadLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..b2bbf28082e630aeb429ee997a1d43ce7ba05d1c --- /dev/null +++ b/paddle/gserver/layers/PadLayer.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" + +namespace paddle { + +/** + * \brief This layer pads zeros to inputs according to the specify dimension. + * The input and output is a 4D tensor. Padding zeros from the 2nd to + * the 4th dimenstion according padc_, padh_ and padw_. + */ +class PadLayer : public Layer { +public: + explicit PadLayer(const LayerConfig& config) : Layer(config) {} + + ~PadLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + +protected: + void setOutDims(const size_t batchSize); + void setTensorDim(const size_t batchSize); + + std::vector padc_; + std::vector padh_; + std::vector padw_; + TensorShape inDims_; + TensorShape outDims_; +}; +} // namespace paddle diff --git a/paddle/gserver/layers/ParameterReluLayer.h b/paddle/gserver/layers/ParameterReluLayer.h index a82497fc01ca1f63719a905c7545911a7e05289b..9a11b81ebf1f5c06355fc107b00aa69b65148ed5 100644 --- a/paddle/gserver/layers/ParameterReluLayer.h +++ b/paddle/gserver/layers/ParameterReluLayer.h @@ -56,9 +56,10 @@ public: ~ParameterReluLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/PoolLayer.h b/paddle/gserver/layers/PoolLayer.h index 318b89d7c2bce896d183eba8c48c230d962918a5..d43292ad2d4bbe1229ca59ca21bee92c9ec006a3 100644 --- a/paddle/gserver/layers/PoolLayer.h +++ b/paddle/gserver/layers/PoolLayer.h @@ -46,7 +46,8 @@ public: */ static Layer* create(const LayerConfig& config); - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/PoolProjectionLayer.h b/paddle/gserver/layers/PoolProjectionLayer.h index 3dc6af2f0e9fb1a12eca7bc0c531a2e7b151fb8a..e31116de8ccb1f6b847c9fff47961bedfad1a79c 100644 --- a/paddle/gserver/layers/PoolProjectionLayer.h +++ b/paddle/gserver/layers/PoolProjectionLayer.h @@ -40,7 +40,7 @@ public: size_t getSize(); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/PowerLayer.cpp b/paddle/gserver/layers/PowerLayer.cpp index 64fecab5b08354ceea8b290b78eede72d24a98a2..31c34b43e2995a2bf7f4d16629a8172a7e76c8e1 100644 --- a/paddle/gserver/layers/PowerLayer.cpp +++ b/paddle/gserver/layers/PowerLayer.cpp @@ -40,10 +40,11 @@ public: ~PowerLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(power, PowerLayer); diff --git a/paddle/gserver/layers/PrintLayer.cpp b/paddle/gserver/layers/PrintLayer.cpp index ac7f658864fee6812ea89d1dbd84ad4db94e3035..de198af111be4200dd1b240f6de9464e3f43b06d 100644 --- a/paddle/gserver/layers/PrintLayer.cpp +++ b/paddle/gserver/layers/PrintLayer.cpp @@ -19,38 +19,17 @@ namespace paddle { class PrintLayer : public Layer { public: explicit PrintLayer(const LayerConfig& config) : Layer(config) {} - void forward(PassType passType); - void backward(const UpdateCallback& callback) {} -}; -void PrintLayer::forward(PassType passType) { - Layer::forward(passType); - for (size_t i = 0; i != inputLayers_.size(); ++i) { - const auto& argu = getInput(i); - const std::string& name = inputLayers_[i]->getName(); - if (argu.value) { - std::ostringstream os; - argu.value->print(os); - LOG(INFO) << "layer=" << name << " value matrix:\n" << os.str(); - } - if (argu.ids) { - std::ostringstream os; - argu.ids->print(os, argu.ids->getSize()); - LOG(INFO) << "layer=" << name << " ids vector:\n" << os.str(); - } - if (auto startPos = argu.sequenceStartPositions) { - std::ostringstream os; - startPos->getVector(false)->print(os, startPos->getSize()); - LOG(INFO) << "layer=" << name << " sequence pos vector:\n" << os.str(); - } - if (auto subStartPos = argu.subSequenceStartPositions) { - std::ostringstream os; - subStartPos->getVector(false)->print(os, subStartPos->getSize()); - LOG(INFO) << "layer=" << name << " sub-sequence pos vector:\n" - << os.str(); + void forward(PassType passType) override { + Layer::forward(passType); + for (size_t i = 0; i != inputLayers_.size(); ++i) { + getInput(i).printValueString(LOG(INFO), + "layer=" + inputLayers_[i]->getName() + " "); } } -} + + void backward(const UpdateCallback& callback) override {} +}; REGISTER_LAYER(print, PrintLayer); diff --git a/paddle/gserver/layers/PriorBox.cpp b/paddle/gserver/layers/PriorBox.cpp index 36ace7597cd66cc2d83353ec999a75c79dd1e33e..bcf5e912a50fef2cec8ebdf1e0dad9efa43fba2f 100644 --- a/paddle/gserver/layers/PriorBox.cpp +++ b/paddle/gserver/layers/PriorBox.cpp @@ -30,10 +30,11 @@ namespace paddle { class PriorBoxLayer : public Layer { public: explicit PriorBoxLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback) {} + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override {} protected: int numPriors_; diff --git a/paddle/gserver/layers/RecurrentLayer.cpp b/paddle/gserver/layers/RecurrentLayer.cpp index 55e0fdfb9048c02b2dcd474c6887eee180328260..e4c2b483d2fa4032735858dab17647592791a9c7 100644 --- a/paddle/gserver/layers/RecurrentLayer.cpp +++ b/paddle/gserver/layers/RecurrentLayer.cpp @@ -45,17 +45,18 @@ class RecurrentLayer : public Layer { public: explicit RecurrentLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); + void forward(PassType passType) override; - void backward(const UpdateCallback& callback); + void backward(const UpdateCallback& callback) override; - void resetState(); + void resetState() override; - void setState(LayerStatePtr state); + void setState(LayerStatePtr state) override; - LayerStatePtr getState(); + LayerStatePtr getState() override; protected: /** @@ -217,21 +218,22 @@ void RecurrentLayer::forwardOneSequence(int start, int length) { if (prevOutput_) { frameOutput_[start].value->mul(*prevOutput_, *weight_->getW(), 1, 1); } - activation_->forward(frameOutput_[start]); + activation_->forward(frameOutput_[start]).check(); + for (int i = 1; i < length; ++i) { frameOutput_[start + i].value->mul( *frameOutput_[start + i - 1].value, *weight_->getW(), 1, 1); - activation_->forward(frameOutput_[start + i]); + activation_->forward(frameOutput_[start + i]).check(); } if (prevOutput_) { prevOutput_->assign(*frameOutput_[start + length - 1].value); } } else { - activation_->forward(frameOutput_[start + length - 1]); + activation_->forward(frameOutput_[start + length - 1]).check(); for (int i = length - 2; i >= 0; --i) { frameOutput_[start + i].value->mul( *frameOutput_[start + i + 1].value, *weight_->getW(), 1, 1); - activation_->forward(frameOutput_[start + i]); + activation_->forward(frameOutput_[start + i]).check(); } } } @@ -280,11 +282,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) { MatrixPtr weightT = weight_->getW()->getTranspose(); if (!reversed_) { for (int i = length - 1; i > 0; --i) { - activation_->backward(frameOutput_[start + i]); + activation_->backward(frameOutput_[start + i]).check(); frameOutput_[start + i - 1].grad->mul( *frameOutput_[start + i].grad, *weightT, 1, 1); } - activation_->backward(frameOutput_[start]); + activation_->backward(frameOutput_[start]).check(); if (weight_->getWGrad()) { weight_->getWGrad()->mul( *output_.value->subMatrix(start, length - 1)->getTranspose(), @@ -294,11 +296,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) { } } else { for (int i = 0; i < length - 1; ++i) { - activation_->backward(frameOutput_[start + i]); + activation_->backward(frameOutput_[start + i]).check(); frameOutput_[start + i + 1].grad->mul( *frameOutput_[start + i].grad, *weightT, 1, 1); } - activation_->backward(frameOutput_[start + length - 1]); + activation_->backward(frameOutput_[start + length - 1]).check(); if (weight_->getWGrad()) { weight_->getWGrad()->mul( *output_.value->subMatrix(start + 1, length - 1)->getTranspose(), @@ -333,7 +335,7 @@ void RecurrentLayer::forwardBatch(int batchSize, } Argument arg; arg.value = batch2; - activation_->forward(arg); + activation_->forward(arg).check(); } } batchValue_->copyBackSeq(*output_.value); @@ -363,7 +365,7 @@ void RecurrentLayer::backwardBatch(int batchSize, Argument arg; arg.value = batch1; arg.grad = batch2; - activation_->backward(arg); + activation_->backward(arg).check(); if (n != 0) { batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight()); diff --git a/paddle/gserver/layers/RecurrentLayerGroup.cpp b/paddle/gserver/layers/RecurrentLayerGroup.cpp index af8dd61d84e2e53ca26dc054d0516e62ab7aa216..78a74ff19a38cd205f3a46900bf716e2e1b1e4d5 100644 --- a/paddle/gserver/layers/RecurrentLayerGroup.cpp +++ b/paddle/gserver/layers/RecurrentLayerGroup.cpp @@ -33,15 +33,15 @@ public: void initSubNetwork(NeuralNetwork* rootNetwork, const ModelConfig& config, const std::vector& parameterTypes, - bool useGpu); + bool useGpu) override; - void forward(PassType passType) { + void forward(PassType passType) override { REGISTER_TIMER_INFO("RecurrentGroupFwTime", getName().c_str()); const std::vector inArgs; std::vector outArgs; network_->forward(inArgs, &outArgs, passType); } - void backward(const UpdateCallback& callback) { + void backward(const UpdateCallback& callback) override { REGISTER_TIMER_INFO("RecurrentGroupBwTime", getName().c_str()); network_->backward(nullptr); @@ -53,7 +53,8 @@ public: /** * @see Layer.accessSubNetwork */ - void accessSubNetwork(const std::function& callback) { + void accessSubNetwork( + const std::function& callback) override { callback(*network_); } diff --git a/paddle/gserver/layers/ResizeLayer.cpp b/paddle/gserver/layers/ResizeLayer.cpp index 7fcb3adea01b9d16394ee90b751b10902dc3a190..eb3b63c106901f89dd75cc2a495477b240d40e3c 100644 --- a/paddle/gserver/layers/ResizeLayer.cpp +++ b/paddle/gserver/layers/ResizeLayer.cpp @@ -20,18 +20,19 @@ namespace paddle { /** * @brief A layer for resizing a minibatch matrix h*w to h'*w' * @note - * origin matrix height * witdth) + * origin matrix height * width) * resize matrix: (height * width / size) * size */ class ResizeLayer : public Layer { public: explicit ResizeLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); + void forward(PassType passType) override; - void backward(const UpdateCallback& callback); + void backward(const UpdateCallback& callback) override; }; REGISTER_LAYER(resize, ResizeLayer); diff --git a/paddle/gserver/layers/RotateLayer.cpp b/paddle/gserver/layers/RotateLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7c71088d786ab218bf0f71b577985c023dd1436f --- /dev/null +++ b/paddle/gserver/layers/RotateLayer.cpp @@ -0,0 +1,102 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "RotateLayer.h" + +namespace paddle { + +REGISTER_LAYER(rotate, RotateLayer); + +bool RotateLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + CHECK_EQ(inputLayers_.size(), 1UL); + height_ = config_.height(); + width_ = config_.width(); + CHECK_GT(height_, 0); + CHECK_GT(width_, 0); + return true; +} + +void RotateLayer::forward(PassType passType) { + Layer::forward(passType); + + MatrixPtr input = getInputValue(0); + batchSize_ = input->getHeight(); + size_ = input->getWidth(); + CHECK_GE(size_, height_ * width_); + CHECK_EQ(size_ % (height_ * width_), 0) + << "total size_ is not dividable by (height_ * width_), i.e., " + << "channel number should be an integer"; + channels_ = size_ / (height_ * width_); + + resizeOutput(batchSize_, size_); + + MatrixPtr outV = getOutputValue(); + for (int b = 0; b < batchSize_; b++) { // for each input feat map + for (int c = 0; c < channels_; c++) { // for each feat channel + MatrixPtr inputSample = + Matrix::create(input->getData() + b * size_ + c * height_ * width_, + height_, + width_, + false, + useGpu_); + MatrixPtr outputSample = + Matrix::create(outV->getData() + b * size_ + c * height_ * width_, + width_, + height_, + false, + useGpu_); + inputSample->rotate(outputSample, false, true /* clock-wise */); + } + } + + if (getInputGrad(0)) { + zeroGrad(); + } +} + +void RotateLayer::backward(const UpdateCallback& callback) { + (void)callback; + + MatrixPtr outputGrad = getOutputGrad(); + if (outputGrad == NULL) { + return; + } + // the grad should be rotated in the reverse direction + MatrixPtr preGrad = getInputGrad(0); + + for (int b = 0; b < batchSize_; b++) { // for each input feat map + for (int c = 0; c < channels_; c++) { // for each feat channel + MatrixPtr inputSampleGrad = + Matrix::create(preGrad->getData() + b * size_ + c * height_ * width_, + height_, + width_, + false, + useGpu_); + MatrixPtr outputSampleGrad = Matrix::create( + outputGrad->getData() + b * size_ + c * height_ * width_, + width_, + height_, + false, + useGpu_); + MatrixPtr tmpGrad = nullptr; + outputSampleGrad->rotate(tmpGrad, true, false /* anti clock-wise */); + inputSampleGrad->add(*tmpGrad); + } + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/RotateLayer.h b/paddle/gserver/layers/RotateLayer.h new file mode 100644 index 0000000000000000000000000000000000000000..1a64d4d5a51d9c04df07861f02f1bb91eaec088e --- /dev/null +++ b/paddle/gserver/layers/RotateLayer.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" +#include "paddle/math/Matrix.h" + +namespace paddle { +/** + * A layer for rotating a multi-channel feature map (M x N x C) in the spatial + * domain + * The rotation is 90 degrees in clock-wise for each channel + * \f[ + * y(j,i,:) = x(M-i-1,j,:) + * \f] + * where \f$x\f$ is (M x N x C) input, and \f$y\f$ is (N x M x C) output. + * + * The config file api is rotate_layer + * +*/ + +class RotateLayer : public Layer { +public: + explicit RotateLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + void forward(PassType passType); + void backward(const UpdateCallback& callback = nullptr); + +private: + int batchSize_; + int size_; + int height_; + int width_; + int channels_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/SamplingIdLayer.cpp b/paddle/gserver/layers/SamplingIdLayer.cpp index 59ff5d41b529099277375cd5e1b498f3331c3b0a..2538d99bb71fa1ce6546730b817a49347fe3c5d8 100644 --- a/paddle/gserver/layers/SamplingIdLayer.cpp +++ b/paddle/gserver/layers/SamplingIdLayer.cpp @@ -35,8 +35,8 @@ public: explicit SamplingIdLayer(const LayerConfig& config) : Layer(config), rand1_(0, 1) {} - virtual bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { bool ret = Layer::init(layerMap, parameterMap); CHECK_EQ(1UL, inputLayers_.size()); if (useGpu_) { @@ -48,7 +48,7 @@ public: return ret; } - void forward(PassType passType) { + void forward(PassType passType) override { Layer::forward(passType); if (useGpu_) { for (size_t i = 0; i < inputLayers_.size(); i++) { @@ -83,7 +83,7 @@ public: output_.ids->copyFrom(ids.data(), batchSize); } - virtual void backward(const UpdateCallback& callback) {} + void backward(const UpdateCallback& callback) override {} }; REGISTER_LAYER(sampling_id, SamplingIdLayer); diff --git a/paddle/gserver/layers/ScalingLayer.cpp b/paddle/gserver/layers/ScalingLayer.cpp index 7f0084be6b57f5ce8245609e64c744c1a049a925..a38ee0857a767981eb24e79e96bf6115e9c63720 100644 --- a/paddle/gserver/layers/ScalingLayer.cpp +++ b/paddle/gserver/layers/ScalingLayer.cpp @@ -37,10 +37,11 @@ public: ~ScalingLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(scaling, ScalingLayer); diff --git a/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp b/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp index 5eacff6b7143996130bea64766ef42c66f4c7310..d9a91de8a6f4daf514f089a3d63cb519223bfdd0 100644 --- a/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp +++ b/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp @@ -192,7 +192,8 @@ void SelectiveFullyConnectedLayer::forward(PassType passType) { nnz, /*trans=*/false, /*useGpu=*/useGpu_); - activation_->forward(arg); + //! TODO(yuyang18): Why we cannot invoke forwardActivation here? + activation_->forward(arg).check(); } else /* train and test in train, not generating */ { // during training, this layer output value is *Matrix*, which is input of // eg. multi-class-cross-entropy diff --git a/paddle/gserver/layers/SelectiveFullyConnectedLayer.h b/paddle/gserver/layers/SelectiveFullyConnectedLayer.h index bdf9a4652cc71710d1d33e8b085c5aec28f6f806..99126fdba542bd142341039af27c3af72b391ca7 100644 --- a/paddle/gserver/layers/SelectiveFullyConnectedLayer.h +++ b/paddle/gserver/layers/SelectiveFullyConnectedLayer.h @@ -65,9 +65,10 @@ public: : Layer(config), selCols_(nullptr) {} ~SelectiveFullyConnectedLayer() {} - void prefetch(); + void prefetch() override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; Weight& getWeight(int idx) { return *weights_[idx]; } @@ -90,8 +91,8 @@ public: void fillSelectiveData( const std::shared_ptr>>& candidates); - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; private: /** diff --git a/paddle/gserver/layers/SequenceConcatLayer.cpp b/paddle/gserver/layers/SequenceConcatLayer.cpp index 069bc26e602ff7d925b4115d12388b6716676b29..4b24d8f0c852e1bdc887d4ee1465b9ad05d210bb 100644 --- a/paddle/gserver/layers/SequenceConcatLayer.cpp +++ b/paddle/gserver/layers/SequenceConcatLayer.cpp @@ -21,9 +21,11 @@ namespace paddle { /** * A layer for concatenating the first sequence with the second sequence - * following the first - * Input: two sequences each containing some instances + * Input: two sequences each containing the same number of instances + * seq1 = [a1, a2, ..., an] + * seq2 = [b1, b2, ..., bn] * Output: a concatenated sequence of the two input sequences + * out = [a1, b1, a2, b2, ..., an, bn] */ class SequenceConcatLayer : public Layer { @@ -35,10 +37,11 @@ public: ~SequenceConcatLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(seqconcat, SequenceConcatLayer); @@ -167,13 +170,17 @@ void SequenceConcatLayer::backward(const UpdateCallback& callback) { size_t rightNumIns = 0; for (size_t seqId = 0; seqId < numSequences1; ++seqId) { leftNumIns = starts1[seqId + 1] - starts1[seqId]; - inputGrad1->subMatrix(starts1[seqId], leftNumIns) - ->add(*(outputGrad->subMatrix(offset, leftNumIns))); + if (inputGrad1) { + inputGrad1->subMatrix(starts1[seqId], leftNumIns) + ->add(*(outputGrad->subMatrix(offset, leftNumIns))); + } offset += leftNumIns; rightNumIns = starts2[seqId + 1] - starts2[seqId]; - inputGrad2->subMatrix(starts2[seqId], rightNumIns) - ->add(*(outputGrad->subMatrix(offset, rightNumIns))); + if (inputGrad2) { + inputGrad2->subMatrix(starts2[seqId], rightNumIns) + ->add(*(outputGrad->subMatrix(offset, rightNumIns))); + } offset += rightNumIns; } } diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp index 4bfce766c769f4be2e5cc7bf691d539b1d307a47..7a13cd7ad0fecf202613d8da365ea832b41ab04e 100644 --- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp +++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp @@ -42,12 +42,11 @@ public: explicit SequenceLastInstanceLayer(const LayerConfig& config) : SequencePoolLayer(config) {} - ~SequenceLastInstanceLayer() {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer); diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h index aa9c132586e55d0f6bccec1689db60145ca2d43f..85b51ccd1dc7e7eb7aa9344b0f7ec6f70a35a0b4 100644 --- a/paddle/gserver/layers/SequencePoolLayer.h +++ b/paddle/gserver/layers/SequencePoolLayer.h @@ -46,12 +46,11 @@ protected: public: explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {} - virtual ~SequencePoolLayer() {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/SequenceReshapeLayer.cpp b/paddle/gserver/layers/SequenceReshapeLayer.cpp index 23924b0490851ad3c3c74d77e7abd8b0af8fc234..433592953b220eda4db4634124a57a2074cef4c0 100644 --- a/paddle/gserver/layers/SequenceReshapeLayer.cpp +++ b/paddle/gserver/layers/SequenceReshapeLayer.cpp @@ -20,9 +20,12 @@ limitations under the License. */ namespace paddle { /** - * A layer for reshaping the sequence - * Input: a sequence - * Output: a sequence + * A layer for reshaping the sequence. Assume the input sequence has + * T instances, the dimension of each instance is M, and the input + * reshape_dim is N, then the output sequence has T*M/N instances, + * the dimension of each instance is N. + * + * Note that T*M/N must be an integer. */ class SequenceReshapeLayer : public Layer { @@ -34,12 +37,11 @@ protected: public: explicit SequenceReshapeLayer(const LayerConfig& config) : Layer(config) {} - ~SequenceReshapeLayer() {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(seqreshape, SequenceReshapeLayer); diff --git a/paddle/gserver/layers/SlopeInterceptLayer.cpp b/paddle/gserver/layers/SlopeInterceptLayer.cpp index b678f414b6d76fa26818cb379fb0f0fb8fc7ec09..faf98744a7fdcf9c2c1712d783f153739ccc8eca 100644 --- a/paddle/gserver/layers/SlopeInterceptLayer.cpp +++ b/paddle/gserver/layers/SlopeInterceptLayer.cpp @@ -39,12 +39,11 @@ class SlopeInterceptLayer : public Layer { public: explicit SlopeInterceptLayer(const LayerConfig& config) : Layer(config) {} - ~SlopeInterceptLayer() {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(slope_intercept, SlopeInterceptLayer); diff --git a/paddle/gserver/layers/SpatialPyramidPoolLayer.h b/paddle/gserver/layers/SpatialPyramidPoolLayer.h index 32e88cf141a667d9dffbe7dcba46e9fde721f9e7..7d3cb80443801a947e3d529beb002561c4ac1964 100644 --- a/paddle/gserver/layers/SpatialPyramidPoolLayer.h +++ b/paddle/gserver/layers/SpatialPyramidPoolLayer.h @@ -43,9 +43,8 @@ protected: public: explicit SpatialPyramidPoolLayer(const LayerConfig& config) : Layer(config) {} - ~SpatialPyramidPoolLayer() {} - - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; ProjectionConfig getConfig(size_t sizeX_, size_t sizeY_, @@ -54,7 +53,7 @@ public: std::string& poolType_); size_t getSize(); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/SubSequenceLayer.cpp b/paddle/gserver/layers/SubSequenceLayer.cpp index c52fbee26232ad6eb09f84315a57c73e6aa02eb0..19b7ad1869af98e6313fe85a40203fd1e84f31d6 100644 --- a/paddle/gserver/layers/SubSequenceLayer.cpp +++ b/paddle/gserver/layers/SubSequenceLayer.cpp @@ -35,12 +35,11 @@ protected: public: explicit SubSequenceLayer(const LayerConfig& config) : Layer(config) {} - ~SubSequenceLayer() {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(subseq, SubSequenceLayer); diff --git a/paddle/gserver/layers/SumToOneNormLayer.cpp b/paddle/gserver/layers/SumToOneNormLayer.cpp index aa99b49380d3682ccf3d89220c0c68f22e458271..00f8519550bcff9bb706b1a28dc0dfcdc06cc54a 100644 --- a/paddle/gserver/layers/SumToOneNormLayer.cpp +++ b/paddle/gserver/layers/SumToOneNormLayer.cpp @@ -41,12 +41,11 @@ protected: public: explicit SumToOneNormLayer(const LayerConfig& config) : Layer(config) {} - ~SumToOneNormLayer() {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(sum_to_one_norm, SumToOneNormLayer); diff --git a/paddle/gserver/layers/TensorLayer.h b/paddle/gserver/layers/TensorLayer.h index ac38ffb620570320497446a6825ca2273b73facc..43992f692d3ce40fa095c8e0190bae01dc2ac3c1 100644 --- a/paddle/gserver/layers/TensorLayer.h +++ b/paddle/gserver/layers/TensorLayer.h @@ -44,13 +44,12 @@ protected: public: explicit TensorLayer(const LayerConfig& config) : Layer(config) {} - ~TensorLayer() {} - - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; Weight& getWeight(int idx) { return *weights_[idx]; } - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/TransLayer.h b/paddle/gserver/layers/TransLayer.h index b43fa1ebfb003226daed724b4ede3006545e8b07..be10bb74f6b218f0b12dc9f20db9a6ee8af7a478 100644 --- a/paddle/gserver/layers/TransLayer.h +++ b/paddle/gserver/layers/TransLayer.h @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { /** - * A layer for transposition. + * A layer for transposing a minibatch matrix. * \f[ y = x^\mathrm{T} * \f] @@ -32,9 +32,10 @@ class TransLayer : public Layer { public: explicit TransLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/ValidationLayer.h b/paddle/gserver/layers/ValidationLayer.h index 4c1de7b3b7d6975c2693eb065f7d3e19cc51a95c..c8b2634a1366ed03846f2331726d04232b5d32ee 100644 --- a/paddle/gserver/layers/ValidationLayer.h +++ b/paddle/gserver/layers/ValidationLayer.h @@ -26,7 +26,8 @@ class ValidationLayer : public Layer { public: explicit ValidationLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; LayerPtr getOutputLayer() { return inputLayers_[0]; } @@ -37,13 +38,13 @@ public: return inputLayers_[2]; } - virtual void forward(PassType passType); + void forward(PassType passType) override; - virtual void backward(const UpdateCallback& callback = nullptr); + void backward(const UpdateCallback& callback = nullptr) override; virtual void validationImp(MatrixPtr outputValue, IVectorPtr label) = 0; - virtual void onPassEnd() = 0; + void onPassEnd() override = 0; }; /* @@ -57,11 +58,12 @@ public: cpuLabel_(nullptr), cpuWeight_(nullptr) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void validationImp(MatrixPtr outputValue, IVectorPtr label); + void validationImp(MatrixPtr outputValue, IVectorPtr label) override; - void onPassEnd(); + void onPassEnd() override; struct PredictionResult { PredictionResult(real __out, int __label) : out(__out), label(__label) {} @@ -86,11 +88,12 @@ public: explicit PnpairValidation(const LayerConfig& config) : ValidationLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void validationImp(MatrixPtr outputValue, IVectorPtr label); + void validationImp(MatrixPtr outputValue, IVectorPtr label) override; - void onPassEnd(); + void onPassEnd() override; private: bool passBegin_; diff --git a/paddle/gserver/layers/WarpCTCLayer.h b/paddle/gserver/layers/WarpCTCLayer.h index 3d9ae9249af66dd085f5b6bb7a3c09d8b2276a24..7e8d7379d267886805db2eb7983a4dabbf949914 100644 --- a/paddle/gserver/layers/WarpCTCLayer.h +++ b/paddle/gserver/layers/WarpCTCLayer.h @@ -30,9 +30,10 @@ public: explicit WarpCTCLayer(const LayerConfig& config) : Layer(config) {} ~WarpCTCLayer() {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; protected: /** diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index ae016e74eaa84f7c43a30c09c8c4577e25360c4e..7617af10ba719490d1b33dd297b070cd8c7c292c 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -24,7 +24,7 @@ real getCostSum(LayerPtr& testLayer, MatrixPtr weights) { if (weights) { outArgs[0].value->dotMul(*outArgs[0].value, *weights); } - return Argument::sumCosts(outArgs); + return Argument::sum(outArgs); } real getDiffAndPrint(real newCost1, @@ -241,7 +241,7 @@ void testBatchState(LayerPtr testLayer, std::vector args; args.push_back(out); - EXPECT_EQ(0, Argument::sumCosts(args)) << "testBatchState failed"; + EXPECT_EQ(0, Argument::sum(args)) << "testBatchState failed"; for (size_t seqId = 0; seqId < numSequences; ++seqId) { start[seqId] += seqLens[seqId]; } @@ -672,7 +672,7 @@ void testLayerGradKernel(TestConfig testConf, outArgs[0].value->dotMul(*testLayer->getOutput().value, *weights); } - real cost = Argument::sumCosts(outArgs); + real cost = Argument::sum(outArgs); LOG(INFO) << " cost " << cost; EXPECT_FALSE(std::isnan(cost)); diff --git a/paddle/gserver/tests/test_Evaluator.cpp b/paddle/gserver/tests/test_Evaluator.cpp index 8165eb8269336193858962edac4f9637c2fc1c2f..4f5fdbb37ce024e18b8d39c5dda74c69bf82166a 100644 --- a/paddle/gserver/tests/test_Evaluator.cpp +++ b/paddle/gserver/tests/test_Evaluator.cpp @@ -110,6 +110,18 @@ void testEvaluator(TestConfig testConf, testEvaluator->finish(); LOG(INFO) << *testEvaluator; + std::vector names; + testEvaluator->getNames(&names); + paddle::Error err; + for (auto& name : names) { + auto value = testEvaluator->getValue(name, &err); + ASSERT_TRUE(err.isOK()); + LOG(INFO) << name << " " << value; + auto tp = testEvaluator->getType(name, &err); + ASSERT_TRUE(err.isOK()); + ASSERT_EQ(testConf.evaluatorConfig.type(), tp); + } + double totalScore2 = 0.0; if (testConf.testAccumulate) { testEvaluator->start(); @@ -129,6 +141,7 @@ void testEvaluatorAll(TestConfig testConf, TEST(Evaluator, classification_error) { TestConfig config; config.evaluatorConfig.set_type("classification_error"); + config.evaluatorConfig.set_top_k(5); config.inputDefs.push_back({INPUT_DATA, "output", 50}); config.inputDefs.push_back({INPUT_LABEL, "label", 50}); diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 66a70ecd41091b9590038dab3194dd2a0c59dd03..14d9db52470b2828186eca04d303135910489266 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -310,7 +310,11 @@ TEST(Layer, CTCLayer) { config.layerConfig.add_inputs(); for (auto useGpu : {false, true}) { - testLayerGrad(config, "ctc", 100, /* trans */ false, /* useGpu */ useGpu); + testLayerGrad(config, + "ctc", + 100, + /* trans */ false, /* useGpu */ + useGpu); } } @@ -587,7 +591,11 @@ TEST(Layer, hsigmoidLayer) { config.layerConfig.add_inputs(); // Not support GPU now - testLayerGrad(config, "hsigmoid", 100, /* trans */ false, /* useGpu */ false); + testLayerGrad(config, + "hsigmoid", + 100, + /* trans */ false, /* useGpu */ + false); } TEST(Layer, multi_cross) { @@ -1022,8 +1030,12 @@ void testNormLayer(const string& normType, bool trans, bool useGpu) { } TEST(Layer, NormLayer) { - testNormLayer("cmrnorm-projection", /* trans= */ false, /* useGpu= */ true); - testNormLayer("cmrnorm-projection", /* trans= */ false, /* useGpu= */ false); + testNormLayer("cmrnorm-projection", + /* trans= */ false, /* useGpu= */ + true); + testNormLayer("cmrnorm-projection", + /* trans= */ false, /* useGpu= */ + false); } void setPoolConfig(TestConfig* config, @@ -1304,6 +1316,25 @@ TEST(Layer, ResizeLayer) { } } +TEST(Layer, RotateLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("rotate"); + const int CHANNEL = 2; + const int HEIGHT = 8; + const int WIDTH = 4; + const int INPUT_SIZE = HEIGHT * WIDTH * CHANNEL; + config.layerConfig.set_size(INPUT_SIZE); + config.layerConfig.set_height(HEIGHT); + config.layerConfig.set_width(WIDTH); + config.inputDefs.push_back({INPUT_DATA, "layer_0", INPUT_SIZE, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "rotate", 100, false, useGpu); + } +} + TEST(Layer, NCELayer) { TestConfig config; size_t numClasses = 4; @@ -1563,6 +1594,35 @@ TEST(Layer, MultiplexLayer) { } } +TEST(Layer, PadLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("pad"); + + int c = 4; + int h = 31; + int w = 36; + size_t size = c * h * w; + config.inputDefs.push_back({INPUT_DATA, "layer_0", size, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + PadConfig* pad = input->mutable_pad_conf(); + ImageConfig* image = pad->mutable_image_conf(); + + image->set_channels(c); + image->set_img_size(h); + image->set_img_size_y(w); + pad->add_pad_c(1); + pad->add_pad_c(2); + pad->add_pad_h(2); + pad->add_pad_h(3); + pad->add_pad_w(3); + pad->add_pad_w(5); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "pad", 10, false, useGpu); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/paddle/gserver/tests/test_WarpCTCLayer.cpp b/paddle/gserver/tests/test_WarpCTCLayer.cpp index 23ae95852e84216c9065f1b123d35ce868fbb90f..55427e2f12fd7b77c6eea1f65b3229e6fd29d71d 100644 --- a/paddle/gserver/tests/test_WarpCTCLayer.cpp +++ b/paddle/gserver/tests/test_WarpCTCLayer.cpp @@ -148,11 +148,11 @@ LayerPtr createCTCLayer(string name, ActivationFunction* softmaxActivation = ActivationFunction::create("softmax"); - softmaxActivation->forward(dataLayer->getOutput()); + softmaxActivation->forward(dataLayer->getOutput()).check(); layer->forward(PASS_GC); layer->backward(); - softmaxActivation->backward(dataLayer->getOutput()); + softmaxActivation->backward(dataLayer->getOutput()).check(); return layer; } diff --git a/paddle/math/CpuSparseMatrix.cpp b/paddle/math/CpuSparseMatrix.cpp index 82a482f701481267e564c7ad8179689deb65a75b..bf62229c03bb1d6e2bdf86d8c56a8157938fb832 100644 --- a/paddle/math/CpuSparseMatrix.cpp +++ b/paddle/math/CpuSparseMatrix.cpp @@ -372,7 +372,7 @@ MatrixPtr CpuSparseMatrix::subMatrix(size_t startRow, size_t numRows) { } /* mem MUST be alloced outside (memAlloc=false) */ -void CpuSparseMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { +void CpuSparseMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) { CHECK(!memAlloc); CpuSparseMatrix* mat = dynamic_cast(matTrans.get()); if (format_ == SPARSE_CSR) { diff --git a/paddle/math/CpuSparseMatrix.h b/paddle/math/CpuSparseMatrix.h index d3e8871cb5b320ce420d601bde7f18d85398dde7..860cad1047fc343b13efa901186ea218d0855151 100644 --- a/paddle/math/CpuSparseMatrix.h +++ b/paddle/math/CpuSparseMatrix.h @@ -201,7 +201,7 @@ public: void zeroMem(); /// mem MUST be alloced outside (memAlloc=false) - void transpose(MatrixPtr matTrans, bool memAlloc); + void transpose(MatrixPtr& matTrans, bool memAlloc); void mul(const Matrix& A, const Matrix& B, real alpha, real beta); diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 3ae237bc7de895293c15eedc811cf8a2011a7c52..07450bfb0ef709840f7e8253e87c227276529a2a 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -274,6 +274,18 @@ real GpuMatrix::getSum() { return sum; } +real GpuMatrix::getMin() { + CHECK(isContiguous()); + auto vec = GpuVector(height_ * width_, data_); + return vec.getMin(); +} + +real GpuMatrix::getMax() { + CHECK(isContiguous()); + auto vec = GpuVector(height_ * width_, data_); + return vec.getMax(); +} + void GpuMatrix::accumulateColSum(Matrix& src) { CHECK_EQ(getWidth(), src.getWidth()); CHECK_EQ(getHeight(), (size_t)1); @@ -371,11 +383,13 @@ MatrixPtr GpuMatrix::getTranspose() { } } -void GpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { +void GpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) { if (memAlloc) { matTrans = std::make_shared(width_, height_); } else { CHECK(matTrans != NULL); + CHECK_EQ(matTrans->getHeight(), width_); + CHECK_EQ(matTrans->getWidth(), height_); } real* dataTrans = matTrans->getData(); real* data = getData(); @@ -385,13 +399,27 @@ void GpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { hl_matrix_transpose(data, dataTrans, height_, width_, lda, ldc); } +void GpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) { + if (memAlloc) { + matRot = std::make_shared(width_, height_); + } else { + CHECK(matRot != NULL); + CHECK_EQ(matRot->getHeight(), width_); + CHECK_EQ(matRot->getWidth(), height_); + } + + real* dataRot = matRot->getData(); + real* data = getData(); + hl_matrix_rotate(data, dataRot, height_, width_, clockWise); +} + MatrixPtr GpuMatrix::getInverse() { MatrixPtr matInv; inverse(matInv, true); return matInv; } -void GpuMatrix::inverse(MatrixPtr matInv, bool memAlloc) { +void GpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) { CHECK_EQ(height_, width_); if (memAlloc) { @@ -704,6 +732,7 @@ void GpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) { size_t beam = maxVal.getWidth(); CHECK_EQ(maxIds.getSize(), numSamples * beam); CHECK_EQ(maxVal.getHeight(), numSamples); + CHECK_EQ(maxVal.getWidth(), beam); hl_matrix_top_k(maxVal.getData(), maxVal.getStride(), @@ -764,19 +793,32 @@ void GpuMatrix::maxoutBackward(Matrix& a, } /*calulate the error of classification */ -void GpuMatrix::classificationError(Matrix& output, IVector& label) { - auto output_ptr = dynamic_cast(&output); - auto label_ptr = dynamic_cast(&label); - CHECK(output_ptr && label_ptr) << "Invalid argument pointer"; - - CHECK(height_ == output_ptr->height_ && width_ == 1) +void GpuMatrix::classificationError(Matrix& output, + IVector& label, + size_t topkSize) { + auto gpuOutput = dynamic_cast(&output); + auto gpuLabel = dynamic_cast(&label); + size_t numSamples = this->getHeight(); + GpuMatrixPtr gpuTopVal = std::make_shared(numSamples, topkSize); + GpuIVectorPtr gpuTopIds = std::make_shared(numSamples * topkSize); + + CHECK(gpuOutput && gpuLabel) << "Invalid argument pointer"; + CHECK(gpuTopVal && gpuTopIds) << "Allocate GPU memory failed"; + CHECK(gpuLabel->getSize() == numSamples) << "Vector size is not equal"; + CHECK(numSamples == gpuOutput->getHeight() && this->getWidth() == 1) << "Matrix dimensions are not equal"; - hl_matrix_classification_error((real*)output_ptr->data_, - (int*)label_ptr->getData(), - data_, - height_, - output_ptr->width_); + size_t dim = gpuOutput->getWidth(); + hl_matrix_classification_error(gpuTopVal->getData(), + gpuTopVal->getStride(), + gpuTopIds->getData(), + gpuOutput->getData(), + gpuOutput->getStride(), + dim, + topkSize, + numSamples, + gpuLabel->getData(), + this->getData()); } /* copy -log(output[i * width + label]) to this->data[i] */ @@ -913,59 +955,6 @@ void GpuMatrix::softreluDerivative(Matrix& output) { void GpuMatrix::scaledTanh(Matrix& output, real p1, real p2) { BaseMatrix::scaledTanh(output, p1, p2); } -void GpuMatrix::cosSim(Matrix& output1, Matrix& output2, real scale) { - CHECK(output1.useGpu_ == true && output2.useGpu_ == true) - << "Matrix type are not equal"; - size_t numSamples = getHeight(); - size_t dim = output1.getWidth(); - CHECK_EQ(getWidth(), 1UL); - CHECK_EQ(output1.getHeight(), numSamples); - CHECK_EQ(output1.getWidth(), output2.getWidth()); - real* out = getData(); - real* x = output1.getData(); - real* y = output2.getData(); - hl_cossim(out, x, y, dim, output1.getHeight(), output2.getHeight(), scale); -} -void GpuMatrix::cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale) { - CHECK(output.useGpu_ == true && prevOut1.useGpu_ == true && - prevOut2.useGpu_ == true && prevGrad1.useGpu_ == true && - prevGrad2.useGpu_ == true) - << "Matrix type are not equal"; - CHECK_EQ(getWidth(), 1UL); - CHECK_EQ(output.getWidth(), 1UL); - - size_t numSamples = getHeight(); - CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(prevOut1.getHeight(), numSamples); - CHECK_EQ(prevGrad1.getHeight(), numSamples); - - size_t dim = prevOut1.getWidth(); - CHECK_EQ(prevOut2.getWidth(), dim); - CHECK_EQ(prevGrad1.getWidth(), dim); - CHECK_EQ(prevGrad2.getWidth(), dim); - - real* grad = getData(); - real* out = output.getData(); - real* prevOutX = prevOut1.getData(); - real* prevOutY = prevOut2.getData(); - real* prevGradX = prevGrad1.getData(); - real* prevGradY = prevGrad2.getData(); - hl_cossim_derivative(grad, - out, - prevOutX, - prevOutY, - prevGradX, - prevGradY, - dim, - prevOut1.getHeight(), - prevOut2.getHeight(), - scale); -} void GpuMatrix::randomizeUniform() { CHECK(isContiguous()); @@ -1690,11 +1679,13 @@ MatrixPtr CpuMatrix::getTranspose() { } } -void CpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { +void CpuMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) { if (memAlloc) { matTrans = std::make_shared(width_, height_); } else { CHECK(matTrans != NULL); + CHECK_EQ(matTrans->getHeight(), width_); + CHECK_EQ(matTrans->getWidth(), height_); } real* dataTrans = matTrans->getData(); real* data = getData(); @@ -1708,13 +1699,35 @@ void CpuMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { } } +void CpuMatrix::rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) { + if (memAlloc) { + matRot = std::make_shared(width_, height_); + } else { + CHECK(matRot != NULL); + CHECK_EQ(matRot->getHeight(), width_); + CHECK_EQ(matRot->getWidth(), height_); + } + real* dataRot = matRot->getData(); + real* data = getData(); + + for (size_t i = 0; i < height_; i++) { + for (size_t j = 0; j < width_; j++) { + if (clockWise) { + dataRot[j * height_ + i] = data[(height_ - i - 1) * width_ + j]; + } else { + dataRot[j * height_ + i] = data[i * width_ + (width_ - j - 1)]; + } + } + } +} + MatrixPtr CpuMatrix::getInverse() { MatrixPtr matInv; inverse(matInv, true); return matInv; } -void CpuMatrix::inverse(MatrixPtr matInv, bool memAlloc) { +void CpuMatrix::inverse(MatrixPtr& matInv, bool memAlloc) { CHECK_EQ(height_, width_); if (memAlloc) { @@ -3040,7 +3053,7 @@ void CpuMatrix::rowMax(Matrix& max) { max.maxRows(*this); } -/* get beam size of max ids and values */ +/* Get the top k elements of each row of this matrix */ void CpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) { CHECK(isContiguous()); CHECK(!maxIds.useGpu() && !maxVal.useGpu()) << "Matrix type are not equal"; @@ -3048,6 +3061,7 @@ void CpuMatrix::rowMax(IVector& maxIds, Matrix& maxVal) { size_t beam = maxVal.getWidth(); CHECK_EQ(maxIds.getSize(), numSamples * beam); CHECK_EQ(maxVal.getHeight(), numSamples); + CHECK_EQ(maxVal.getWidth(), beam); real* a = getData(); int* s = maxIds.getData(); @@ -3199,32 +3213,39 @@ void CpuMatrix::rowNormalizeL1(Matrix& out) { } /* calulate classification error */ -void CpuMatrix::classificationError(Matrix& output, IVector& label) { - CHECK(dynamic_cast(&output)); - CHECK(dynamic_cast(&label)); +void CpuMatrix::classificationError(Matrix& output, + IVector& label, + size_t topkSize) { + size_t numSamples = this->getHeight(); + auto cpuOutput = dynamic_cast(&output); + auto cpuLabel = dynamic_cast(&label); + IVectorPtr cpuTopIds = std::make_shared(numSamples * topkSize); + MatrixPtr cpuTopVal = std::make_shared(numSamples, topkSize); + + CHECK(cpuOutput && cpuLabel) << "Invalid argument pointer"; + CHECK(cpuTopIds && cpuTopVal) << "Allocate cpu memory failed"; + CHECK(cpuLabel->getSize() == numSamples) << "Vector size is not equal"; + CHECK(cpuOutput->getHeight() == numSamples && this->getWidth() == 1) + << "Matrix dimensions are not equal"; - CHECK_EQ(getWidth(), (size_t)1); - size_t numSamples = getHeight(); - CHECK_EQ(label.getSize(), numSamples); - CHECK_EQ(output.getHeight(), numSamples); + // top k matrix classification + cpuOutput->rowMax(*cpuTopIds, *cpuTopVal); - size_t dim = output.getWidth(); - real* out = output.getData(); - int* lbl = label.getData(); - real maxData = 0.0; - int maxIndex = -1; + size_t dim = cpuOutput->getWidth(); + real* result = this->getData(); + int* ids = cpuTopIds->getData(); + int* lbl = cpuLabel->getData(); for (size_t i = 0; i < numSamples; ++i) { CHECK_GE(lbl[i], 0); CHECK_LT((size_t)lbl[i], dim); - maxData = out[i * dim]; - maxIndex = 0; - for (size_t j = 0; j < dim; ++j) { - if (maxData < out[i * dim + j]) { - maxIndex = j; - maxData = out[i * dim + j]; + + for (size_t j = 0; j < topkSize; ++j) { + if (ids[j + i * topkSize] == lbl[i]) { + result[i] = 0; + break; } + result[i] = 1.0f; } - getData()[i] = (maxIndex != lbl[i]); } } @@ -3418,105 +3439,6 @@ void CpuMatrix::softmaxDerivative(Matrix& output, Matrix& sftmaxSum) { } } -void CpuMatrix::cosSim(Matrix& output1, Matrix& output2, real scale) { - size_t numSamples = getHeight(); - size_t dim = output1.getWidth(); - CHECK_EQ(getWidth(), 1UL); - CHECK_EQ(output1.getHeight(), numSamples); - CHECK_EQ(output1.getWidth(), output2.getWidth()); - - real* out = getData(); - const real* x = output1.getData(); - const real* y = output2.getData(); - size_t yInc = dim; - if (output2.getHeight() == 1LU) { - yInc = 0; - } else { - CHECK_EQ(output2.getHeight(), numSamples); - } - for (size_t i = 0; i < numSamples; ++i, x += dim, y += yInc) { - real squareSumX = 0; - real squareSumY = 0; - real xy = 0; - for (size_t j = 0; j < dim; ++j) { - squareSumX += _square(x[j]); - squareSumY += _square(y[j]); - xy += x[j] * y[j]; - } - CHECK(squareSumX > 0 && squareSumY > 0); - out[i] = scale * xy / (std::sqrt(squareSumX) * std::sqrt(squareSumY)); - } -} - -void CpuMatrix::cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale) { - CHECK(output.useGpu_ == false) << "Matrix type are not equal"; - - CHECK_EQ(getWidth(), 1UL); - CHECK_EQ(output.getWidth(), 1UL); - - size_t numSamples = getHeight(); - CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(prevOut1.getHeight(), numSamples); - CHECK_EQ(prevGrad1.getHeight(), numSamples); - - size_t dim = prevOut1.getWidth(); - CHECK_EQ(prevOut2.getWidth(), dim); - CHECK_EQ(prevGrad1.getWidth(), dim); - CHECK_EQ(prevGrad2.getWidth(), dim); - - const real* grad = getData(); - const real* out = output.getData(); - const real* prevOutX = prevOut1.getData(); - const real* prevOutY = prevOut2.getData(); - real* prevGradX = prevGrad1.getData(); - real* prevGradY = prevGrad2.getData(); - size_t yInc = dim; - if (prevOut2.getHeight() == 1LU) { - yInc = 0; - CHECK_EQ(prevGrad2.getHeight(), 1LU); - } else { - CHECK_EQ(prevOut2.getHeight(), numSamples); - CHECK_EQ(prevGrad2.getHeight(), numSamples); - } - for (size_t i = 0; i < numSamples; ++i, - prevOutX += dim, - prevOutY += yInc, - prevGradX += dim, - prevGradY += yInc) { - real squareSumX = 0; - real squareSumY = 0; - real xy = 0; - for (size_t j = 0; j < dim; ++j) { - squareSumX += _square(prevOutX[j]); - squareSumY += _square(prevOutY[j]); - xy += prevOutX[j] * prevOutY[j]; - } - CHECK(squareSumX > 0 && squareSumY > 0); - if (xy == 0) { - real reciprocal = 1.0f / (std::sqrt(squareSumX) * std::sqrt(squareSumY)); - for (size_t j = 0; j < dim; ++j) { - prevGradX[j] += scale * grad[i] * prevOutY[j] * reciprocal; - prevGradY[j] += scale * grad[i] * prevOutX[j] * reciprocal; - } - } else { - real reciprocalXY = 1.0f / xy; - real reciprocalSquareSumX = 1.0f / squareSumX; - real reciprocalSquareSumY = 1.0f / squareSumY; - for (size_t j = 0; j < dim; ++j) { - prevGradX[j] += out[i] * grad[i] * (prevOutY[j] * reciprocalXY - - prevOutX[j] * reciprocalSquareSumX); - prevGradY[j] += out[i] * grad[i] * (prevOutX[j] * reciprocalXY - - prevOutY[j] * reciprocalSquareSumY); - } - } - } -} - void CpuMatrix::sumOfSquares(Matrix& output, Matrix& label) { CHECK(output.useGpu_ == false && label.useGpu_ == false) << "Matrix type are not equal"; diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index dd24f8821d49768354840e0381742218ab9a0204..d0ba2e93feabfcc11ac1d261bc40c9c6973a8c29 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -31,6 +31,7 @@ limitations under the License. */ namespace paddle { +/// TODO(tianbing), move to paddle/function/TensorType.h enum SparseValueType { NO_VALUE = 0, FLOAT_VALUE = 1 }; /** @@ -56,6 +57,7 @@ enum SparseValueType { NO_VALUE = 0, FLOAT_VALUE = 1 }; * value [1, 1, 2, 2, 5] * @endcode */ +/// TODO(tianbing), move to paddle/function/TensorType.h enum SparseFormat { SPARSE_CSR = 0, SPARSE_CSC = 1 }; class Matrix; @@ -370,7 +372,27 @@ public: * allocate matTrans' memory outside, then set memAlloc as false; * else set as true. */ - virtual void transpose(MatrixPtr matTrans, bool memAlloc) { + virtual void transpose(MatrixPtr& matTrans, bool memAlloc) { + LOG(FATAL) << "Not implemented"; + } + + /** + * @brief rotate 90 degrees in clock-wise if clockWise=true; + * otherwise rotate in anti clock-wise + * clock-wise: + * \f[ + * y(j,i) = x(M-i-1,j) + * \f] + * anti clock-wise: + * \f[ + * y(j,i) = x(i, N-1-j) + * \f] + * where \f$x\f$ is (M x N) input, and \f$y\f$ is (N x M) output. + * + * allocate matRot' memory outside, then set memAlloc as false; + * else set as true. + */ + virtual void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise) { LOG(FATAL) << "Not implemented"; } @@ -385,7 +407,7 @@ public: * if allocate matInv's memory outside, then set memAlloc as false; * else set as true. */ - virtual void inverse(MatrixPtr matInv, bool memAlloc) { + virtual void inverse(MatrixPtr& matInv, bool memAlloc) { LOG(FATAL) << "Not implemented"; } @@ -777,26 +799,6 @@ public: LOG(FATAL) << "Not implemented"; } - /** - * cosine similarity, for each row i, - * this[i] = cos(output1[i], output2[i]) - * - * output2 can only have one row, then for each row i, - * this[i] = cos(output1[i], output2[0]) - */ - virtual void cosSim(Matrix& output1, Matrix& output2, real scale = 1.0f) { - LOG(FATAL) << "Not implemented"; - } - - virtual void cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale = 1.0f) { - LOG(FATAL) << "Not implemented"; - } - /// print out the values of elements to os virtual void print(std::ostream& os) const { LOG(FATAL) << "Not implemented"; @@ -834,8 +836,11 @@ public: * output[i] = 1 if row i is an error. * * output[i] = 0 if row i is correct. + * */ - virtual void classificationError(Matrix& output, IVector& label) { + virtual void classificationError(Matrix& output, + IVector& label, + size_t topkSize = 1) { LOG(FATAL) << "Not implemented"; } @@ -1167,11 +1172,15 @@ public: void accumulateColSum(Matrix& src); real getAbsSum(); + real getMin(); + real getMax(); + MatrixPtr getTranspose(); - void transpose(MatrixPtr matTrans, bool memAlloc); + void transpose(MatrixPtr& matTrans, bool memAlloc); + void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise); MatrixPtr getInverse(); - void inverse(MatrixPtr matInv, bool memAlloc); + void inverse(MatrixPtr& matInv, bool memAlloc); /// add b to each sample of this. void addBias(Matrix& b, real scale); @@ -1298,14 +1307,6 @@ public: void softreluDerivative(Matrix& output); void scaledTanh(Matrix& output, real p1, real p2); - void cosSim(Matrix& output1, Matrix& output2, real scale); - void cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale); - virtual void print(std::ostream& os) const; virtual void print(std::ostream& os, size_t height, size_t width) const; @@ -1316,7 +1317,7 @@ public: void check(std::ostream& os, Matrix& refMat, bool printDiff = true); void randomizeUniform(); - void classificationError(Matrix& output, IVector& label); + void classificationError(Matrix& output, IVector& label, size_t topkSize = 1); void convExpand(Matrix& feature, int feaImgHeight, @@ -1483,10 +1484,11 @@ public: real getAbsSum(); MatrixPtr getTranspose(); - void transpose(MatrixPtr matTrans, bool memAlloc); + void transpose(MatrixPtr& matTrans, bool memAlloc); + void rotate(MatrixPtr& matRot, bool memAlloc, bool clockWise); MatrixPtr getInverse(); - void inverse(MatrixPtr matInv, bool memAlloc); + void inverse(MatrixPtr& matInv, bool memAlloc); void copyFrom(const Matrix& src); @@ -1725,14 +1727,6 @@ public: void softreluDerivative(Matrix& output); void scaledTanh(Matrix& output, real p1, real p2); - void cosSim(Matrix& output1, Matrix& output2, real scale); - void cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale); - void print(std::ostream& os) const; void print(std::ostream& os, size_t height, size_t width) const; void printOneRow(std::ostream& os, size_t idx) const; @@ -1748,7 +1742,7 @@ public: void randomizeUniform(); - void classificationError(Matrix& output, IVector& label); + void classificationError(Matrix& output, IVector& label, size_t topkSize = 1); void addByBitCode(size_t numClasses, const IVector& codes, const Matrix& vec); diff --git a/paddle/math/SparseMatrix.cpp b/paddle/math/SparseMatrix.cpp index 720a035ecbd26df01fe24c991982bbf7965ccbdc..6370c77386688a334fa0de8b4e2b272882e9e2b0 100644 --- a/paddle/math/SparseMatrix.cpp +++ b/paddle/math/SparseMatrix.cpp @@ -177,7 +177,6 @@ GpuSparseMatrix::GpuSparseMatrix(real* value, hl_sparse_matrix_s_ptr tmp2(tmp, hl_destruct_sparse_matrix); sMatrix_ = tmp2; } - LOG(INFO) << "weight to matrix "; } } @@ -498,7 +497,7 @@ void GpuSparseMatrix::setRow(size_t row, SparseValueType GpuSparseMatrix::getValueType() const { return valueType_; } -void GpuSparseMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { +void GpuSparseMatrix::transpose(MatrixPtr& matTrans, bool memAlloc) { CHECK_EQ(format_, SPARSE_CSC); int nnz = sMatrix_->nnz; if (memAlloc) { diff --git a/paddle/math/SparseMatrix.h b/paddle/math/SparseMatrix.h index 1d3801548e03a6ae679afb15bf7f620172d61c57..f6cd5df338965b55ca17636de097d2401dc057f9 100644 --- a/paddle/math/SparseMatrix.h +++ b/paddle/math/SparseMatrix.h @@ -109,7 +109,7 @@ public: MatrixPtr getTranspose(); /// B = A' - void transpose(MatrixPtr matTrans, bool memAlloc); + void transpose(MatrixPtr& matTrans, bool memAlloc); void copyFrom(const Matrix& src); void copyFrom(const Matrix& src, hl_stream_t stream); diff --git a/paddle/math/SparseRowMatrix.cpp b/paddle/math/SparseRowMatrix.cpp index b61c6b2d49ccead5e9cfdf595a8bebae0e5b87b5..b8c781ca1fd46c9840817abe26a20eec005c37e9 100644 --- a/paddle/math/SparseRowMatrix.cpp +++ b/paddle/math/SparseRowMatrix.cpp @@ -24,10 +24,6 @@ limitations under the License. */ #include "paddle/utils/Thread.h" #include "paddle/utils/Util.h" -DEFINE_bool(allow_inefficient_sparse_update, - false, - "Whether to allow inefficient sparse update"); - namespace paddle { const unsigned int SparseRowCpuMatrix::kUnusedId_ = -1U; diff --git a/paddle/math/SparseRowMatrix.h b/paddle/math/SparseRowMatrix.h index c05fc98ff9fe739688ed3c21466fb29b70e36854..1ccbf97b25922ae52377d7048da3a07012d21003 100644 --- a/paddle/math/SparseRowMatrix.h +++ b/paddle/math/SparseRowMatrix.h @@ -21,8 +21,6 @@ limitations under the License. */ #include "RowBuffer.h" #include "paddle/utils/Util.h" -DECLARE_bool(allow_inefficient_sparse_update); - namespace paddle { /** @@ -183,11 +181,10 @@ protected: inline void checkStoreSize() { if (buf_->isAutoGrowth()) { if (buf_->getRowCount() > 0.5 * height_) { - LOG(WARNING) - << "There are more than 0.5*height (" << localIndices_->size() - << ") rows are used for sparse " - << "update, which is not efficient. Considering not use " - << "sparse_update or set --allow_inefficient_sparse_update=true"; + LOG(WARNING) << "There are more than 0.5*height (" + << localIndices_->size() << ") rows are used for sparse " + << "update, which is not efficient. Considering not use " + << "sparse_update."; } } else { CHECK_LE(localIndices_->size(), buf_->getRowCount()); diff --git a/paddle/math/tests/test_Matrix.cpp b/paddle/math/tests/test_Matrix.cpp index a4084bdf7c6953651bfd9714fd8a5c930f774fe6..1c21da5b76e95603258a5006d0c57b00126e65b9 100644 --- a/paddle/math/tests/test_Matrix.cpp +++ b/paddle/math/tests/test_Matrix.cpp @@ -181,28 +181,6 @@ TEST(Matrix, copyByRowIndex) { } } -void testCosSim(int heightX, int heightY, int width, real scale) { - AutoCompare test(heightX, 1); - CpuMatrix arg1(heightX, width); - CpuMatrix arg2(heightY, width); - arg1.randomizeUniform(); - arg2.randomizeUniform(); - arg2.add(-0.5); - test.cmpWithArg(&Matrix::cosSim, arg1, arg2, scale); -} - -TEST(Matrix, cosSim) { - for (auto heightX : {10, 100, 1000}) { - for (auto heightY : {1, heightX}) { - for (auto width : {10, 100, 1000}) { - for (auto scale : {1.0, 2.0}) { - testCosSim(heightX, heightY, width, scale); - } - } - } - } -} - void testParamReluForward(int height, int width, int w_height, int w_width) { AutoCompare test(height, width); CpuMatrix arg1(height, width); diff --git a/paddle/math/tests/test_RowBuffer.cpp b/paddle/math/tests/test_RowBuffer.cpp index 5f66f22ef73dcff1868c1a3e03139a680b1ce2b5..8cc4c69a1a4d8afec08bf7fb13408e135a06c09c 100644 --- a/paddle/math/tests/test_RowBuffer.cpp +++ b/paddle/math/tests/test_RowBuffer.cpp @@ -17,10 +17,10 @@ limitations under the License. */ TEST(RowBuffer, testAutoGrow) { paddle::RowBuffer buf(128); - ASSERT_EQ(128, buf.getWidth()); + ASSERT_EQ(128UL, buf.getWidth()); ASSERT_TRUE(buf.isAutoGrowth()); buf.resize(2); - ASSERT_EQ(2, buf.getRowCount()); + ASSERT_EQ(2UL, buf.getRowCount()); for (size_t i = 0; i < buf.getWidth() * 2; ++i) { buf.data()[i] = i; } @@ -35,7 +35,7 @@ TEST(RowBuffer, testAutoGrow) { data[i] = i; } - ASSERT_EQ(3, buf.getRowCount()); + ASSERT_EQ(3UL, buf.getRowCount()); for (size_t i = 0; i < buf.getRowCount() - 1; ++i) { for (size_t j = 0; j < buf.getWidth(); ++j) { ASSERT_NEAR(i * buf.getWidth() + j, buf.get(i)[j], 1e-5); @@ -51,7 +51,7 @@ TEST(RowBuffer, testWithMemBuf) { std::make_shared(128 * 2 * sizeof(real)); paddle::RowBuffer buf(mem, 128); ASSERT_TRUE(!buf.isAutoGrowth()); - ASSERT_EQ(2, buf.getRowCount()); + ASSERT_EQ(2UL, buf.getRowCount()); for (size_t i = 0; i < buf.getWidth() * 2; ++i) { buf.data()[i] = i; } diff --git a/paddle/math/tests/test_SparseMatrix.cpp b/paddle/math/tests/test_SparseMatrix.cpp index 9d3fbaef43d719d07577631d5df3ac4656610cc6..c0572dfdbf738a4dfad04811b3a3e1b65487ff6d 100644 --- a/paddle/math/tests/test_SparseMatrix.cpp +++ b/paddle/math/tests/test_SparseMatrix.cpp @@ -248,11 +248,13 @@ TEST(Matrix, SparseMatrixTranspose) { /*dense matrix transpose*/ CpuMatrixPtr matC(new CpuMatrix(height, width)); matC->copyFrom(*matA); - CpuMatrixPtr matD(new CpuMatrix(width, height)); + MatrixPtr matD(new CpuMatrix(width, height)); matC->transpose(matD, false); + /*check result*/ checkSMatrixEqual2Dense( - std::dynamic_pointer_cast(matB), matD); + std::dynamic_pointer_cast(matB), + std::dynamic_pointer_cast(matD)); } } } diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index f0c49791d7e2a67220eafca3e1347f30958877a7..08b64c1bb6f5d359a2d2164e723a76c5360168ee 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -105,6 +105,21 @@ void testMatrixGetSum(int height, int width) { EXPECT_LE(fabs(cpuSum - gpuSum), err); } +void testMatrixGetMinMax(int height, int width) { + MatrixPtr cpuInput = std::make_shared(height, width); + MatrixPtr gpuInput = std::make_shared(height, width); + cpuInput->randomizeUniform(); + gpuInput->copyFrom(*cpuInput); + + real cpuMin = cpuInput->getMin(); + real gpuMin = gpuInput->getMin(); + real cpuMax = cpuInput->getMax(); + real gpuMax = gpuInput->getMax(); + + EXPECT_EQ(cpuMin, gpuMin); + EXPECT_EQ(cpuMax, gpuMax); +} + void testMatrixZeroAtOffset(int height, int width) { MatrixPtr cpuA = std::make_shared(height, width); MatrixPtr gpuA = std::make_shared(height, width); @@ -161,11 +176,29 @@ void testMatrixTranspose(int height, int width) { cpu->randomizeUniform(); gpu->copyFrom(*cpu); cpu->transpose(cpuT, false); - gpu->transpose(gpuT, false); + gpu->transpose(gpuT, true); TensorCheckEqual(*cpuT, *gpuT); } +void testMatrixRotate(int height, int width) { + MatrixPtr cpu = std::make_shared(height, width); + MatrixPtr gpu = std::make_shared(height, width); + MatrixPtr cpuR = std::make_shared(width, height); + MatrixPtr gpuR = std::make_shared(width, height); + + cpu->randomizeUniform(); + gpu->copyFrom(*cpu); + + cpu->rotate(cpuR, false, true); + gpu->rotate(gpuR, true, true); + TensorCheckEqual(*cpuR, *gpuR); + + cpu->rotate(cpuR, true, false); + gpu->rotate(gpuR, false, false); + TensorCheckEqual(*cpuR, *gpuR); +} + void testMatrixInverse(int height) { MatrixPtr cpu = std::make_shared(height, height); MatrixPtr gpu = std::make_shared(height, height); @@ -181,7 +214,7 @@ void testMatrixInverse(int height) { cpu->add(*outputCheck); gpu->copyFrom(*cpu); - cpu->inverse(cpuI, false); + cpu->inverse(cpuI, true); gpu->inverse(gpuI, false); TensorCheckErr(*cpuI, *gpuI); @@ -200,6 +233,7 @@ TEST(Matrix, unary) { testMatrixZeroAtOffset(height, width); testMatrixGetSum(height, width); testMatrixTranspose(height, width); + testMatrixRotate(height, width); } // inverse testMatrixInverse(height); @@ -686,61 +720,6 @@ TEST(Matrix, sequenceAvgForward) { } } -void testCosSimDerivate(int heightX, int heightY, int width, real scale) { - MatrixPtr prevOutX = CpuMatrix::create(heightX, width, false, false); - MatrixPtr prevOutY = CpuMatrix::create(heightY, width, false, false); - MatrixPtr grad = CpuMatrix::create(heightX, 1, false, false); - MatrixPtr output = CpuMatrix::create(heightX, 1, false, false); - MatrixPtr prevGradX = CpuMatrix::create(heightX, width, false, false); - MatrixPtr prevGradY = CpuMatrix::create(heightY, width, false, false); - - prevOutX->randomizeUniform(); - prevOutY->randomizeUniform(); - grad->randomizeUniform(); - output->randomizeUniform(); - prevGradX->randomizeUniform(); - prevGradY->randomizeUniform(); - - MatrixPtr prevOutXGpu = GpuMatrix::create(heightX, width, false, true); - MatrixPtr prevOutYGpu = GpuMatrix::create(heightY, width, false, true); - MatrixPtr gradGpu = GpuMatrix::create(heightX, 1, false, true); - MatrixPtr outputGpu = GpuMatrix::create(heightX, 1, false, true); - MatrixPtr prevGradXGpu = GpuMatrix::create(heightX, width, false, true); - MatrixPtr prevGradYGpu = GpuMatrix::create(heightY, width, false, true); - - prevOutXGpu->copyFrom(*prevOutX); - prevOutYGpu->copyFrom(*prevOutY); - gradGpu->copyFrom(*grad); - outputGpu->copyFrom(*output); - prevGradXGpu->copyFrom(*prevGradX); - prevGradYGpu->copyFrom(*prevGradY); - - grad->cosSimDerivative( - *output, *prevOutX, *prevOutY, *prevGradX, *prevGradY, scale); - - gradGpu->cosSimDerivative(*outputGpu, - *prevOutXGpu, - *prevOutYGpu, - *prevGradXGpu, - *prevGradYGpu, - scale); - - TensorCheckErr(*prevGradX, *prevGradXGpu); - TensorCheckErr(*prevGradY, *prevGradYGpu); -} - -TEST(Matrix, cosSimDerivate) { - for (auto heightX : {1, 10, 100}) { - for (auto heightY : {1, heightX}) { - for (auto width : {1, 10, 100}) { - for (auto scale : {1.0, 2.0}) { - testCosSimDerivate(heightX, heightY, width, scale); - } - } - } - } -} - void testParamReluBackwardDiff(int height, int width, int w_height, @@ -785,7 +764,7 @@ TEST(Matrix, paramReluBackwardDiff) { } } -void testClassificationError(int numSamples, int dim) { +void testClassificationError(int numSamples, int dim, int topkSize) { MatrixPtr cpuError = std::make_shared(numSamples, 1); MatrixPtr gpuError = std::make_shared(numSamples, 1); MatrixPtr cpuOutput = std::make_shared(numSamples, dim); @@ -798,17 +777,22 @@ void testClassificationError(int numSamples, int dim) { gpuOutput->copyFrom(*cpuOutput); gpuLabel->copyFrom(*cpuLabel); - cpuError->classificationError(*cpuOutput, *cpuLabel); - gpuError->classificationError(*gpuOutput, *gpuLabel); + cpuError->classificationError(*cpuOutput, *cpuLabel, topkSize); + gpuError->classificationError(*gpuOutput, *gpuLabel, topkSize); TensorCheckEqual(*cpuError, *gpuError); } TEST(Matrix, classificationError) { - for (auto numSamples : {1, 10, 100, 1000, 70000}) { - for (auto dim : {1, 10, 100, 1000}) { - VLOG(3) << " numSamples=" << numSamples << " dim=" << dim; - testClassificationError(numSamples, dim); + for (auto numSamples : {1, 5, 31, 90, 150, 300}) { + for (auto dim : + {1, 5, 8, 10, 15, 64, 80, 120, 256, 300, 1280, 5120, 50000}) { + for (auto topkSize : {1, 5, 10, 20, 40, (int)rand() % dim + 1}) { + if (topkSize > dim) continue; + VLOG(3) << " sample= " << numSamples << " topkSize= " << topkSize + << " dim= " << dim; + testClassificationError(numSamples, dim, topkSize); + } } } } diff --git a/paddle/math/tests/test_matrixUtil.h b/paddle/math/tests/test_matrixUtil.h index 9aa74b15193723970d80b5d1a4e0ac95341cd45a..47f461474622d13ea2f922a77348c78b450ec37f 100644 --- a/paddle/math/tests/test_matrixUtil.h +++ b/paddle/math/tests/test_matrixUtil.h @@ -30,6 +30,17 @@ void checkMatrixEqual(const MatrixPtr& a, const MatrixPtr& b) { } } +void checkSMatrixEqual(const CpuSparseMatrix& a, const CpuSparseMatrix& b) { + ASSERT_EQ(a.getWidth(), b.getWidth()); + ASSERT_EQ(a.getHeight(), b.getHeight()); + ASSERT_EQ(a.isTransposed(), b.isTransposed()); + ASSERT_EQ(a.getFormat(), b.getFormat()); + ASSERT_EQ(a.getElementCnt(), b.getElementCnt()); + for (size_t r = 0; r < a.getElementCnt(); ++r) { + ASSERT_FLOAT_EQ(a.getValue()[r], b.getValue()[r]); + } +} + void checkSMatrixEqual(const CpuSparseMatrixPtr& a, const CpuSparseMatrixPtr& b) { ASSERT_EQ(a->getWidth(), b->getWidth()); @@ -73,6 +84,36 @@ void checkSMatrixEqual2(const CpuSparseMatrixPtr& a, } } +void checkSMatrixEqual2Dense(const CpuSparseMatrix& a, const CpuMatrix& b) { + ASSERT_EQ(a.getWidth(), b.getWidth()); + ASSERT_EQ(a.getHeight(), b.getHeight()); + ASSERT_EQ(a.isTransposed(), b.isTransposed()); + + if (a.getFormat() == SPARSE_CSC) { + int* rows = a.getRows(); + for (size_t i = 0; i < a.getWidth(); i++) { + for (size_t j = a.getColStartIdx(i); j < a.getColStartIdx(i + 1); j++) { + if (a.getValueType() == FLOAT_VALUE) { + ASSERT_FLOAT_EQ(a.getValue()[j], b.getElement(rows[j], i)); + } else { + ASSERT_FLOAT_EQ(1.0, b.getElement(rows[j], i)); + } + } + } + } else { + int* cols = a.getCols(); + for (size_t i = 0; i < a.getHeight(); i++) { + for (size_t j = a.getRowStartIdx(i); j < a.getRowStartIdx(i + 1); j++) { + if (a.getValueType() == FLOAT_VALUE) { + ASSERT_FLOAT_EQ(a.getValue()[j], b.getElement(i, cols[j])); + } else { + ASSERT_FLOAT_EQ(1.0, b.getElement(i, cols[j])); + } + } + } + } +} + void checkSMatrixEqual2Dense(const CpuSparseMatrixPtr& a, const CpuMatrixPtr& b) { ASSERT_EQ(a->getWidth(), b->getWidth()); diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 65d01a15718ae2bebd4869eff0e5407524bc0e7c..7a343cca33f5b420be6192231ac73ca1c2da5fb9 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -602,6 +602,44 @@ void Argument::degradeSequence(const Argument& input, bool useGpu) { tgtBuf[numSequences] = numSubSequences; } +void Argument::getValueString( + std::unordered_map* out) const { + if (value) { + std::ostringstream os; + value->print(os); + out->insert({"value", os.str()}); + } + if (ids) { + std::ostringstream os; + ids->print(os, ids->getSize()); + out->insert({"ids", os.str()}); + } + if (sequenceStartPositions) { + std::ostringstream os; + sequenceStartPositions->getVector(false)->print( + os, sequenceStartPositions->getSize()); + out->insert({"sequence pos", os.str()}); + } + if (subSequenceStartPositions) { + std::ostringstream os; + subSequenceStartPositions->getVector(false)->print( + os, subSequenceStartPositions->getSize()); + out->insert({"sub-sequence pos", os.str()}); + } +} + +void Argument::printValueString(std::ostream& stream, + const std::string& prefix) const { + std::unordered_map out; + getValueString(&out); + for (auto field : {"value", "id", "sequence pos", "sub-sequence pos"}) { + auto it = out.find(field); + if (it != out.end()) { + stream << prefix << field << ":\n" << it->second; + } + } +} + void Argument::subArgFrom(const Argument& input, size_t offset, size_t height, diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index afd2de0202bf0f14ec3d4c5b856455a3488e41f6..9ef44be0cb3b960db1e789f3f26bb66d1fe63c81 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -163,7 +163,7 @@ struct Argument { : sequenceStartPositions->getData(false); } - static inline real sumCosts(const std::vector& arguments) { + static inline real sum(const std::vector& arguments) { real cost = 0; for (auto& arg : arguments) { if (arg.value) { @@ -297,6 +297,23 @@ struct Argument { sequence has sub-sequence degrades to a sequence. */ void degradeSequence(const Argument& input, bool useGpu); + + /** + * @brief getValueString will return the argument's output in string. There + * are several kinds of output. The keys of output dictionary are 'value', + * 'id', 'sequence pos', 'sub-sequence pos'. + * @param out [out]: the return values. + */ + void getValueString(std::unordered_map* out) const; + + /** + * @brief printValueString will print the argument's output in order of + * 'value', 'id', 'sequence pos', 'sub-sequence pos'. + * @param stream: Output stream + * @param prefix: line prefix for printing. + */ + void printValueString(std::ostream& stream, + const std::string& prefix = "") const; }; } // namespace paddle diff --git a/paddle/parameter/Parameter.cpp b/paddle/parameter/Parameter.cpp index 29d6e20dc16968cdda3e79b66b0c81aaaf303ef4..1ccded818796798105a889df978618688b56ed36 100644 --- a/paddle/parameter/Parameter.cpp +++ b/paddle/parameter/Parameter.cpp @@ -375,10 +375,6 @@ bool Parameter::load(const std::string& filename) { std::ifstream fs(filename, std::ios_base::binary); if (!fs) { LOG(INFO) << "missing parameters [" << filename << "] while loading model."; - if (isStatic()) { - LOG(FATAL) << getName() << " is static but missing, not allowed."; - return false; - } if (kMissParameterFail == FLAGS_load_missing_parameter_strategy) { LOG(FATAL) << getName() << " missing, not allowed."; return false; diff --git a/paddle/parameter/ParameterUpdaterBase.h b/paddle/parameter/ParameterUpdaterBase.h index b230e170c15f1b004c5357fb7d0ad2204d01f44b..6265c828a1a254d01dc975b0155e7ac69df49a31 100644 --- a/paddle/parameter/ParameterUpdaterBase.h +++ b/paddle/parameter/ParameterUpdaterBase.h @@ -55,7 +55,7 @@ public: // between startBatch() and finishBatch(), update() will be called // by the trainer multiple times, each time for updating one Parameter // with its gradient in PARAMETER_GRADIENT - virtual void update(Parameter* para) { + void update(Parameter* para) { SetDevice setDevice(para->getDeviceId()); para->updateHook(); this->updateImpl(para); diff --git a/paddle/pserver/test/CMakeLists.txt b/paddle/pserver/test/CMakeLists.txt index 64654f67d0c2c82f05a5038fb33b220f3cff0f39..6e8f9c37f64b70921e09241089a5a480fd8ca47f 100644 --- a/paddle/pserver/test/CMakeLists.txt +++ b/paddle/pserver/test/CMakeLists.txt @@ -10,9 +10,11 @@ add_test(NAME socket_test add_unittest_without_exec(test_ProtoServer test_ProtoServer.cpp) -add_test(NAME test_ProtoServer - COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port - ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer) +IF(NOT ON_TRAVIS) + add_test(NAME test_ProtoServer + COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port + ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer) +ENDIF(NOT ON_TRAVIS) # TODO(yuyang18): Run test_ProtoServer when with rdma # add_test(NAME test_ProtoServerRDMA diff --git a/paddle/pserver/test/test_ProtoServer.cpp b/paddle/pserver/test/test_ProtoServer.cpp index 9f86ee80f4e5cc99ea3597b3ed37a387578f032a..04236fda2fb62b928b5c06ff38acfd3eb7217b08 100644 --- a/paddle/pserver/test/test_ProtoServer.cpp +++ b/paddle/pserver/test/test_ProtoServer.cpp @@ -12,14 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/Util.h" - #include - +#include #include "ParameterService.pb.h" #include "paddle/math/Vector.h" #include "paddle/pserver/ProtoServer.h" #include "paddle/utils/Stat.h" +#include "paddle/utils/Util.h" DEFINE_string(server_addr, "127.0.0.1", "Server address"); DEFINE_int64(dim, 50000000, "Data size"); @@ -162,18 +161,9 @@ TEST(ProtoServer, extended) { int main(int argc, char** argv) { paddle::initMain(argc, argv); testing::InitGoogleTest(&argc, argv); - - MyServer* server; - if (FLAGS_rdma_tcp == "rdma") { - server = new MyServer(FLAGS_port, 0); - } else { - server = new MyServer(FLAGS_port); - } - - server->start(); + MyServer server(FLAGS_port, FLAGS_rdma_tcp == "rdma" ? 0 : -1); + server.start(); usleep(10000); - int ret = RUN_ALL_TESTS(); - - exit(ret); + return RUN_ALL_TESTS(); } diff --git a/paddle/py_paddle/dataprovider_converter.py b/paddle/py_paddle/dataprovider_converter.py index 21d1cb75f4d40e6ed011b33c6366c9d31c0fcc7c..c009b05cdeeb9dbe2dc70048e6827a12445f677e 100644 --- a/paddle/py_paddle/dataprovider_converter.py +++ b/paddle/py_paddle/dataprovider_converter.py @@ -23,8 +23,18 @@ __all__ = ['DataProviderConverter'] class IScanner(object): def __init__(self, input_type, pos): self.input_type = input_type - assert isinstance(self.input_type, dp2.InputType) + if not isinstance(self.input_type, dp2.InputType): + raise ValueError("input type should be dataprovider2.InputType") self.pos = pos + # data_in_gpu is used to indicate whether to create argument on GPU + # or not in GPU mode. Now if using one thread (trainer_count=1), + # trainer uses NeuralNetwork which needs to create argument on GPU + # before calling forward function. So, set data_in_gpu to True. + # Otherwise, trainer uses MultiGradientMachine which will transfer + # data from CPU to GPU in the forward function, set data_in_gpu to + # False in this case. + self.data_in_gpu = swig_paddle.isUsingGpu( + ) and swig_paddle.getTrainerCount() == 1 def scan(self, dat): pass @@ -50,10 +60,10 @@ class DenseScanner(IScanner): def finish_scan(self, argument): assert isinstance(argument, swig_paddle.Arguments) - assert isinstance(self.input_type, dp2.InputType) if self.__mat__.dtype != numpy.float32: self.__mat__ = self.__mat__.astype(numpy.float32) - m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, False) + m = swig_paddle.Matrix.createDenseFromNumpy(self.__mat__, True, + self.data_in_gpu) argument.setSlotValue(self.pos, m) @@ -63,7 +73,6 @@ class SparseBinaryScanner(IScanner): self.__rows__ = [0] self.__cols__ = [] self.__height__ = 0 - self.__nnz__ = 0 self.__value__ = [] def scan(self, dat): @@ -76,11 +85,13 @@ class SparseBinaryScanner(IScanner): def finish_scan(self, argument): assert isinstance(argument, swig_paddle.Arguments) - assert isinstance(self.input_type, dp2.InputType) - m = swig_paddle.Matrix.createSparse(self.__height__, - self.input_type.dim, - len(self.__cols__), - len(self.__value__) == 0) + m = swig_paddle.Matrix.createSparse( + self.__height__, + self.input_type.dim, + len(self.__cols__), + len(self.__value__) == 0, + False, # trans + False) # TODO supoort GPU assert isinstance(m, swig_paddle.Matrix) m.sparseCopyFrom(self.__rows__, self.__cols__, self.__value__) argument.setSlotValue(self.pos, m) @@ -104,7 +115,7 @@ class IndexScanner(IScanner): self.__ids__.append(dat) def finish_scan(self, argument): - ids = swig_paddle.IVector.create(self.__ids__) + ids = swig_paddle.IVector.create(self.__ids__, self.data_in_gpu) assert isinstance(argument, swig_paddle.Arguments) argument.setSlotIds(self.pos, ids) diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py index ce105d249aaf3e838443d3e0cf5996fe8c783a22..1c9455fab5f9c1179bddffb100cd53fe8adfb6b1 100644 --- a/paddle/py_paddle/util.py +++ b/paddle/py_paddle/util.py @@ -195,6 +195,12 @@ def __monkeypatch_gradient_machine__(): swig_paddle.GradientMachine.getParameters = getParameters + def getNonStaticParameters(self): + return (self.getNonStaticParameter(i) + for i in xrange(self.getNonStaticParameterSize())) + + swig_paddle.GradientMachine.getNonStaticParameters = getNonStaticParameters + def getLayerOutputs(self, layerNames): """ getLayerOutputs. get outputs of layers and return a numpy matrix dict. @@ -208,7 +214,7 @@ def __monkeypatch_gradient_machine__(): output = dict() for name in layerNames: - output[name] = __matrix_to_numpy__(self.getLayerOutput(name)) + output[name] = __arguments_to_numpy__(0, self.getLayerOutput(name)) return output swig_paddle.GradientMachine.getLayerOutputs = getLayerOutputs diff --git a/paddle/scripts/docker/Dockerfile b/paddle/scripts/docker/Dockerfile index 79c4efbed0b8562e6f50b27d6ac297da0929de79..98eaa15a0fdf206c0a0f23a98ee718d70b34ff4b 100644 --- a/paddle/scripts/docker/Dockerfile +++ b/paddle/scripts/docker/Dockerfile @@ -5,42 +5,54 @@ ARG DEBIAN_FRONTEND=noninteractive ARG UBUNTU_MIRROR RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' +# ENV variables +ARG BUILD_WOBOQ +ARG BUILD_AND_INSTALL +ARG WITH_AVX +ARG WITH_DOC +ARG WITH_STYLE_CHECK + +ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF} +ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF} +ENV WITH_GPU=OFF +ENV WITH_AVX=${WITH_AVX:-ON} +ENV WITH_DOC=${WITH_DOC:-OFF} +ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} + +ENV HOME /root + +# Add bash enhancements +COPY ./paddle/scripts/docker/root/ /root/ + RUN apt-get update && \ apt-get install -y git python-pip python-dev openssh-server bison && \ apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \ apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \ apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \ - apt-get install -y automake clang-3.8 llvm-3.8 libclang-3.8-dev && \ + apt-get install -y automake locales clang-format-3.8 && \ apt-get clean -y -RUN pip install --upgrade pip && \ - pip install 'protobuf==3.1.0.post1' && \ +# git credential to skip password typing +RUN git config --global credential.helper store + +# Fix locales to en_US.UTF-8 +RUN localedef -i en_US -f UTF-8 en_US.UTF-8 + +RUN pip install --upgrade pip && \ + pip install -U 'protobuf==3.1.0' && \ pip install -U wheel pillow BeautifulSoup && \ pip install -U docopt PyYAML sphinx && \ - pip install -U sphinx_rtd_theme recommonmark jupyter + pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ + pip install -U pre-commit 'requests==2.9.2' jupyter RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ - cd cmake-3.4.1 && ./bootstrap && make -j4 && make install && \ + cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \ cd .. && rm -rf cmake-3.4.1 -ARG WITH_AVX -ARG WITH_DOC -ARG WITH_STYLE_CHECK - -ENV WITH_GPU=OFF -ENV WITH_AVX=${WITH_AVX:-ON} -ENV WITH_DOC=${WITH_DOC:-ON} -ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} - -RUN mkdir /paddle COPY . /paddle/ RUN /paddle/paddle/scripts/docker/build.sh VOLUME ["/usr/share/nginx/html/data", "/usr/share/nginx/html/paddle"] -RUN echo 'export LD_LIBRARY_PATH=/usr/lib64:${LD_LIBRARY_PATH}' >> /etc/profile -RUN pip install /usr/local/opt/paddle/share/wheels/*.whl -RUN paddle version # print version after build - # Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service RUN mkdir /var/run/sshd RUN echo 'root:root' | chpasswd @@ -53,7 +65,6 @@ RUN mkdir /notes/ WORKDIR "/notes" EXPOSE 8888 -RUN mkdir -p /opt/bin COPY ./paddle/scripts/docker/entrypoint /opt/bin/ CMD ["/opt/bin/entrypoint"] diff --git a/paddle/scripts/docker/Dockerfile.gpu b/paddle/scripts/docker/Dockerfile.gpu index 6c1c2225d1a304f234a940584c6c33502eaabbb8..4d30ccdd2b5de27c4dfd110537e69c269028ef31 100644 --- a/paddle/scripts/docker/Dockerfile.gpu +++ b/paddle/scripts/docker/Dockerfile.gpu @@ -5,42 +5,54 @@ ARG DEBIAN_FRONTEND=noninteractive ARG UBUNTU_MIRROR RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' +# ENV variables +ARG BUILD_WOBOQ +ARG BUILD_AND_INSTALL +ARG WITH_AVX +ARG WITH_DOC +ARG WITH_STYLE_CHECK + +ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF} +ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF} +ENV WITH_GPU=ON +ENV WITH_AVX=${WITH_AVX:-ON} +ENV WITH_DOC=${WITH_DOC:-OFF} +ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} + +ENV HOME /root + +# Add bash enhancements +COPY ./paddle/scripts/docker/root/ /root/ + RUN apt-get update && \ apt-get install -y git python-pip python-dev openssh-server bison && \ apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \ apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \ apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \ - apt-get install -y automake clang-3.8 llvm-3.8 libclang-3.8-dev && \ + apt-get install -y automake locales clang-format-3.8 && \ apt-get clean -y -RUN pip install --upgrade pip && \ - pip install 'protobuf==3.1.0.post1' && \ +# git credential to skip password typing +RUN git config --global credential.helper store + +# Fix locales to en_US.UTF-8 +RUN localedef -i en_US -f UTF-8 en_US.UTF-8 + +RUN pip install --upgrade pip && \ + pip install -U 'protobuf==3.1.0' && \ pip install -U wheel pillow BeautifulSoup && \ pip install -U docopt PyYAML sphinx && \ - pip install -U sphinx_rtd_theme recommonmark jupyter + pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ + pip install -U pre-commit 'requests==2.9.2' jupyter RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ - cd cmake-3.4.1 && ./bootstrap && make -j4 && make install && \ + cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \ cd .. && rm -rf cmake-3.4.1 -ARG WITH_AVX -ARG WITH_DOC -ARG WITH_STYLE_CHECK - -ENV WITH_GPU=ON -ENV WITH_AVX=${WITH_AVX:-ON} -ENV WITH_DOC=${WITH_DOC:-ON} -ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} - -RUN mkdir /paddle COPY . /paddle/ RUN /paddle/paddle/scripts/docker/build.sh VOLUME ["/usr/share/nginx/html/data", "/usr/share/nginx/html/paddle"] -RUN echo 'export LD_LIBRARY_PATH=/usr/lib64:${LD_LIBRARY_PATH}' >> /etc/profile -RUN pip install /usr/local/opt/paddle/share/wheels/*.whl -RUN paddle version # print version after build - # Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service RUN mkdir /var/run/sshd RUN echo 'root:root' | chpasswd @@ -53,7 +65,6 @@ RUN mkdir /notes/ WORKDIR "/notes" EXPOSE 8888 -RUN mkdir -p /opt/bin COPY ./paddle/scripts/docker/entrypoint /opt/bin/ CMD ["/opt/bin/entrypoint"] diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 7edba3dd09cdc594383597ac7cf7913d50e9f6e1..d9c44f42340323afb4930dab35114f2adc5fbb3a 100755 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -8,42 +8,54 @@ function abort(){ trap 'abort' 0 set -e -if [ ${WITH_GPU} == 'ON' ]; then - ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/lib/libcudnn.so +# If Dockerfile.* sets BUILD_AND_INSTALL to 'ON', it would have copied +# source tree to /paddle, and this scripts should build it into +# /paddle/build. +if [[ ${BUILD_AND_INSTALL:-OFF} == 'ON' ]]; then + if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then + ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/lib/libcudnn.so + fi + + mkdir -p /paddle/build # -p means no error if exists + cd /paddle/build + cmake .. \ + -DWITH_DOC=${WITH_DOC:-OFF} \ + -DWITH_GPU=${WITH_GPU:-OFF} \ + -DWITH_AVX=${WITH_AVX:-OFF} \ + -DWITH_SWIG_PY=ON \ + -DCUDNN_ROOT=/usr/ \ + -DWITH_STYLE_CHECK=OFF \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + make -j `nproc` + make install + + if [[ ${BUILD_WOBOQ:-OFF} == 'ON' ]]; then + apt-get install -y clang-3.8 llvm-3.8 libclang-3.8-dev + # Install woboq_codebrowser. + git clone https://github.com/woboq/woboq_codebrowser /woboq + cd /woboq + cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \ + -DCMAKE_BUILD_TYPE=Release \ + . + make + + export WOBOQ_OUT=/usr/share/nginx/html/paddle + export BUILD_DIR=/paddle/build + mkdir -p $WOBOQ_OUT + cp -rv /woboq/data $WOBOQ_OUT/../data + /woboq/generator/codebrowser_generator \ + -b /paddle/build \ + -a \ + -o $WOBOQ_OUT \ + -p paddle:/paddle + /woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT + cd /woboq + make clean + fi + + pip install /usr/local/opt/paddle/share/wheels/py_paddle*linux*.whl + pip install /usr/local/opt/paddle/share/wheels/paddle*.whl + paddle version fi -mkdir -p /paddle/build # -p means no error if exists -cd /paddle/build -cmake .. \ - -DWITH_DOC=ON \ - -DWITH_GPU=${WITH_GPU} \ - -DWITH_AVX=${WITH_AVX} \ - -DWITH_SWIG_PY=ON \ - -DCUDNN_ROOT=/usr/ \ - -DWITH_STYLE_CHECK=OFF \ - -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -make -j `nproc` -make install - -# Install woboq_codebrowser. -git clone https://github.com/woboq/woboq_codebrowser /woboq -cd /woboq -cmake -DLLVM_CONFIG_EXECUTABLE=/usr/bin/llvm-config-3.8 \ - -DCMAKE_BUILD_TYPE=Release \ - . -make - -export WOBOQ_OUT=/usr/share/nginx/html/paddle -export BUILD_DIR=/paddle/build -mkdir -p $WOBOQ_OUT -cp -rv /woboq/data $WOBOQ_OUT/../data -/woboq/generator/codebrowser_generator \ - -b /paddle/build \ - -a \ - -o $WOBOQ_OUT \ - -p paddle:/paddle -/woboq/indexgenerator/codebrowser_indexgenerator $WOBOQ_OUT -cd /woboq -make clean -rm -rf /paddle/build trap : 0 diff --git a/paddle/scripts/docker/root/.bashrc b/paddle/scripts/docker/root/.bashrc new file mode 100755 index 0000000000000000000000000000000000000000..4b3024e4e81a0fa206a796c12a8b9d72f1a8f5d9 --- /dev/null +++ b/paddle/scripts/docker/root/.bashrc @@ -0,0 +1,46 @@ +# Locales + +export LC_ALL=en_US.UTF-8 +export LANG=en_US.UTF-8 +export LANGUAGE=en_US.UTF-8 + +# Aliases + +alias rm='rm -i' +alias cp='cp -i' +alias mv='mv -i' + +alias ls='ls -hFG' +alias l='ls -lF' +alias ll='ls -alF' +alias lt='ls -ltrF' +alias ll='ls -alF' +alias lls='ls -alSrF' +alias llt='ls -altrF' + +# Colorize directory listing + +alias ls="ls -ph --color=auto" + +# Colorize grep + +if echo hello|grep --color=auto l >/dev/null 2>&1; then + export GREP_OPTIONS="--color=auto" GREP_COLOR="1;31" +fi + +# Shell + +export CLICOLOR="1" + +YELLOW="\[\033[1;33m\]" +NO_COLOUR="\[\033[0m\]" +GREEN="\[\033[1;32m\]" +WHITE="\[\033[1;37m\]" + +source ~/.scripts/git-prompt.sh + +export PS1="\[\033[1;33m\]λ $WHITE\h $GREEN\w$YELLOW\$(__git_ps1 \" \[\033[35m\]{\[\033[36m\]%s\[\033[35m\]}\")$NO_COLOUR " + +# Git + +source ~/.scripts/git-completion.sh diff --git a/paddle/scripts/docker/root/.gitconfig b/paddle/scripts/docker/root/.gitconfig new file mode 100755 index 0000000000000000000000000000000000000000..6c249803a50403b9b79e36a13abe7fe88a35729d --- /dev/null +++ b/paddle/scripts/docker/root/.gitconfig @@ -0,0 +1,43 @@ +[user] + name = + email = + +[alias] + st = status --branch --short + ci = commit + br = branch + co = checkout + df = diff + l = log --pretty=format:\"%h %ad | %s%d [%an]\" --graph --date=short + ll = log --stat + +[merge] + tool = vimdiff + +[core] + excludesfile = ~/.gitignore + editor = vim + +[color] + branch = auto + diff = auto + status = auto + +[color "branch"] + current = yellow reverse + local = yellow + remote = green + +[color "diff"] + meta = yellow bold + frag = magenta bold + old = red bold + new = green bold + +[color "status"] + added = yellow + changed = green + untracked = cyan + +[push] + default = matching \ No newline at end of file diff --git a/paddle/scripts/docker/root/.scripts/git-completion.sh b/paddle/scripts/docker/root/.scripts/git-completion.sh new file mode 100755 index 0000000000000000000000000000000000000000..bdddef5ac2faf50b47dd03539dae8912bec8a16c --- /dev/null +++ b/paddle/scripts/docker/root/.scripts/git-completion.sh @@ -0,0 +1,2663 @@ +#!bash +# +# bash/zsh completion support for core Git. +# +# Copyright (C) 2006,2007 Shawn O. Pearce +# Conceptually based on gitcompletion (http://gitweb.hawaga.org.uk/). +# Distributed under the GNU General Public License, version 2.0. +# +# The contained completion routines provide support for completing: +# +# *) local and remote branch names +# *) local and remote tag names +# *) .git/remotes file names +# *) git 'subcommands' +# *) tree paths within 'ref:path/to/file' expressions +# *) file paths within current working directory and index +# *) common --long-options +# +# To use these routines: +# +# 1) Copy this file to somewhere (e.g. ~/.git-completion.sh). +# 2) Add the following line to your .bashrc/.zshrc: +# source ~/.git-completion.sh +# 3) Consider changing your PS1 to also show the current branch, +# see git-prompt.sh for details. + +case "$COMP_WORDBREAKS" in +*:*) : great ;; +*) COMP_WORDBREAKS="$COMP_WORDBREAKS:" +esac + +# __gitdir accepts 0 or 1 arguments (i.e., location) +# returns location of .git repo +__gitdir () +{ + if [ -z "${1-}" ]; then + if [ -n "${__git_dir-}" ]; then + echo "$__git_dir" + elif [ -n "${GIT_DIR-}" ]; then + test -d "${GIT_DIR-}" || return 1 + echo "$GIT_DIR" + elif [ -d .git ]; then + echo .git + else + git rev-parse --git-dir 2>/dev/null + fi + elif [ -d "$1/.git" ]; then + echo "$1/.git" + else + echo "$1" + fi +} + +# The following function is based on code from: +# +# bash_completion - programmable completion functions for bash 3.2+ +# +# Copyright © 2006-2008, Ian Macdonald +# © 2009-2010, Bash Completion Maintainers +# +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# The latest version of this software can be obtained here: +# +# http://bash-completion.alioth.debian.org/ +# +# RELEASE: 2.x + +# This function can be used to access a tokenized list of words +# on the command line: +# +# __git_reassemble_comp_words_by_ref '=:' +# if test "${words_[cword_-1]}" = -w +# then +# ... +# fi +# +# The argument should be a collection of characters from the list of +# word completion separators (COMP_WORDBREAKS) to treat as ordinary +# characters. +# +# This is roughly equivalent to going back in time and setting +# COMP_WORDBREAKS to exclude those characters. The intent is to +# make option types like --date= and : easy to +# recognize by treating each shell word as a single token. +# +# It is best not to set COMP_WORDBREAKS directly because the value is +# shared with other completion scripts. By the time the completion +# function gets called, COMP_WORDS has already been populated so local +# changes to COMP_WORDBREAKS have no effect. +# +# Output: words_, cword_, cur_. + +__git_reassemble_comp_words_by_ref() +{ + local exclude i j first + # Which word separators to exclude? + exclude="${1//[^$COMP_WORDBREAKS]}" + cword_=$COMP_CWORD + if [ -z "$exclude" ]; then + words_=("${COMP_WORDS[@]}") + return + fi + # List of word completion separators has shrunk; + # re-assemble words to complete. + for ((i=0, j=0; i < ${#COMP_WORDS[@]}; i++, j++)); do + # Append each nonempty word consisting of just + # word separator characters to the current word. + first=t + while + [ $i -gt 0 ] && + [ -n "${COMP_WORDS[$i]}" ] && + # word consists of excluded word separators + [ "${COMP_WORDS[$i]//[^$exclude]}" = "${COMP_WORDS[$i]}" ] + do + # Attach to the previous token, + # unless the previous token is the command name. + if [ $j -ge 2 ] && [ -n "$first" ]; then + ((j--)) + fi + first= + words_[$j]=${words_[j]}${COMP_WORDS[i]} + if [ $i = $COMP_CWORD ]; then + cword_=$j + fi + if (($i < ${#COMP_WORDS[@]} - 1)); then + ((i++)) + else + # Done. + return + fi + done + words_[$j]=${words_[j]}${COMP_WORDS[i]} + if [ $i = $COMP_CWORD ]; then + cword_=$j + fi + done +} + +if ! type _get_comp_words_by_ref >/dev/null 2>&1; then +_get_comp_words_by_ref () +{ + local exclude cur_ words_ cword_ + if [ "$1" = "-n" ]; then + exclude=$2 + shift 2 + fi + __git_reassemble_comp_words_by_ref "$exclude" + cur_=${words_[cword_]} + while [ $# -gt 0 ]; do + case "$1" in + cur) + cur=$cur_ + ;; + prev) + prev=${words_[$cword_-1]} + ;; + words) + words=("${words_[@]}") + ;; + cword) + cword=$cword_ + ;; + esac + shift + done +} +fi + +__gitcompadd () +{ + local i=0 + for x in $1; do + if [[ "$x" == "$3"* ]]; then + COMPREPLY[i++]="$2$x$4" + fi + done +} + +# Generates completion reply, appending a space to possible completion words, +# if necessary. +# It accepts 1 to 4 arguments: +# 1: List of possible completion words. +# 2: A prefix to be added to each possible completion word (optional). +# 3: Generate possible completion matches for this word (optional). +# 4: A suffix to be appended to each possible completion word (optional). +__gitcomp () +{ + local cur_="${3-$cur}" + + case "$cur_" in + --*=) + ;; + *) + local c i=0 IFS=$' \t\n' + for c in $1; do + c="$c${4-}" + if [[ $c == "$cur_"* ]]; then + case $c in + --*=*|*.) ;; + *) c="$c " ;; + esac + COMPREPLY[i++]="${2-}$c" + fi + done + ;; + esac +} + +# Generates completion reply from newline-separated possible completion words +# by appending a space to all of them. +# It accepts 1 to 4 arguments: +# 1: List of possible completion words, separated by a single newline. +# 2: A prefix to be added to each possible completion word (optional). +# 3: Generate possible completion matches for this word (optional). +# 4: A suffix to be appended to each possible completion word instead of +# the default space (optional). If specified but empty, nothing is +# appended. +__gitcomp_nl () +{ + local IFS=$'\n' + __gitcompadd "$1" "${2-}" "${3-$cur}" "${4- }" +} + +# Generates completion reply with compgen from newline-separated possible +# completion filenames. +# It accepts 1 to 3 arguments: +# 1: List of possible completion filenames, separated by a single newline. +# 2: A directory prefix to be added to each possible completion filename +# (optional). +# 3: Generate possible completion matches for this word (optional). +__gitcomp_file () +{ + local IFS=$'\n' + + # XXX does not work when the directory prefix contains a tilde, + # since tilde expansion is not applied. + # This means that COMPREPLY will be empty and Bash default + # completion will be used. + __gitcompadd "$1" "${2-}" "${3-$cur}" "" + + # use a hack to enable file mode in bash < 4 + compopt -o filenames +o nospace 2>/dev/null || + compgen -f /non-existing-dir/ > /dev/null +} + +# Execute 'git ls-files', unless the --committable option is specified, in +# which case it runs 'git diff-index' to find out the files that can be +# committed. It return paths relative to the directory specified in the first +# argument, and using the options specified in the second argument. +__git_ls_files_helper () +{ + ( + test -n "${CDPATH+set}" && unset CDPATH + cd "$1" + if [ "$2" == "--committable" ]; then + git diff-index --name-only --relative HEAD + else + # NOTE: $2 is not quoted in order to support multiple options + git ls-files --exclude-standard $2 + fi + ) 2>/dev/null +} + + +# __git_index_files accepts 1 or 2 arguments: +# 1: Options to pass to ls-files (required). +# 2: A directory path (optional). +# If provided, only files within the specified directory are listed. +# Sub directories are never recursed. Path must have a trailing +# slash. +__git_index_files () +{ + local dir="$(__gitdir)" root="${2-.}" file + + if [ -d "$dir" ]; then + __git_ls_files_helper "$root" "$1" | + while read -r file; do + case "$file" in + ?*/*) echo "${file%%/*}" ;; + *) echo "$file" ;; + esac + done | sort | uniq + fi +} + +__git_heads () +{ + local dir="$(__gitdir)" + if [ -d "$dir" ]; then + git --git-dir="$dir" for-each-ref --format='%(refname:short)' \ + refs/heads + return + fi +} + +__git_tags () +{ + local dir="$(__gitdir)" + if [ -d "$dir" ]; then + git --git-dir="$dir" for-each-ref --format='%(refname:short)' \ + refs/tags + return + fi +} + +# __git_refs accepts 0, 1 (to pass to __gitdir), or 2 arguments +# presence of 2nd argument means use the guess heuristic employed +# by checkout for tracking branches +__git_refs () +{ + local i hash dir="$(__gitdir "${1-}")" track="${2-}" + local format refs + if [ -d "$dir" ]; then + case "$cur" in + refs|refs/*) + format="refname" + refs="${cur%/*}" + track="" + ;; + *) + for i in HEAD FETCH_HEAD ORIG_HEAD MERGE_HEAD; do + if [ -e "$dir/$i" ]; then echo $i; fi + done + format="refname:short" + refs="refs/tags refs/heads refs/remotes" + ;; + esac + git --git-dir="$dir" for-each-ref --format="%($format)" \ + $refs + if [ -n "$track" ]; then + # employ the heuristic used by git checkout + # Try to find a remote branch that matches the completion word + # but only output if the branch name is unique + local ref entry + git --git-dir="$dir" for-each-ref --shell --format="ref=%(refname:short)" \ + "refs/remotes/" | \ + while read -r entry; do + eval "$entry" + ref="${ref#*/}" + if [[ "$ref" == "$cur"* ]]; then + echo "$ref" + fi + done | sort | uniq -u + fi + return + fi + case "$cur" in + refs|refs/*) + git ls-remote "$dir" "$cur*" 2>/dev/null | \ + while read -r hash i; do + case "$i" in + *^{}) ;; + *) echo "$i" ;; + esac + done + ;; + *) + echo "HEAD" + git for-each-ref --format="%(refname:short)" -- "refs/remotes/$dir/" | sed -e "s#^$dir/##" + ;; + esac +} + +# __git_refs2 requires 1 argument (to pass to __git_refs) +__git_refs2 () +{ + local i + for i in $(__git_refs "$1"); do + echo "$i:$i" + done +} + +# __git_refs_remotes requires 1 argument (to pass to ls-remote) +__git_refs_remotes () +{ + local i hash + git ls-remote "$1" 'refs/heads/*' 2>/dev/null | \ + while read -r hash i; do + echo "$i:refs/remotes/$1/${i#refs/heads/}" + done +} + +__git_remotes () +{ + local i IFS=$'\n' d="$(__gitdir)" + test -d "$d/remotes" && ls -1 "$d/remotes" + for i in $(git --git-dir="$d" config --get-regexp 'remote\..*\.url' 2>/dev/null); do + i="${i#remote.}" + echo "${i/.url*/}" + done +} + +__git_list_merge_strategies () +{ + git merge -s help 2>&1 | + sed -n -e '/[Aa]vailable strategies are: /,/^$/{ + s/\.$// + s/.*:// + s/^[ ]*// + s/[ ]*$// + p + }' +} + +__git_merge_strategies= +# 'git merge -s help' (and thus detection of the merge strategy +# list) fails, unfortunately, if run outside of any git working +# tree. __git_merge_strategies is set to the empty string in +# that case, and the detection will be repeated the next time it +# is needed. +__git_compute_merge_strategies () +{ + test -n "$__git_merge_strategies" || + __git_merge_strategies=$(__git_list_merge_strategies) +} + +__git_complete_revlist_file () +{ + local pfx ls ref cur_="$cur" + case "$cur_" in + *..?*:*) + return + ;; + ?*:*) + ref="${cur_%%:*}" + cur_="${cur_#*:}" + case "$cur_" in + ?*/*) + pfx="${cur_%/*}" + cur_="${cur_##*/}" + ls="$ref:$pfx" + pfx="$pfx/" + ;; + *) + ls="$ref" + ;; + esac + + case "$COMP_WORDBREAKS" in + *:*) : great ;; + *) pfx="$ref:$pfx" ;; + esac + + __gitcomp_nl "$(git --git-dir="$(__gitdir)" ls-tree "$ls" 2>/dev/null \ + | sed '/^100... blob /{ + s,^.* ,, + s,$, , + } + /^120000 blob /{ + s,^.* ,, + s,$, , + } + /^040000 tree /{ + s,^.* ,, + s,$,/, + } + s/^.* //')" \ + "$pfx" "$cur_" "" + ;; + *...*) + pfx="${cur_%...*}..." + cur_="${cur_#*...}" + __gitcomp_nl "$(__git_refs)" "$pfx" "$cur_" + ;; + *..*) + pfx="${cur_%..*}.." + cur_="${cur_#*..}" + __gitcomp_nl "$(__git_refs)" "$pfx" "$cur_" + ;; + *) + __gitcomp_nl "$(__git_refs)" + ;; + esac +} + + +# __git_complete_index_file requires 1 argument: +# 1: the options to pass to ls-file +# +# The exception is --committable, which finds the files appropriate commit. +__git_complete_index_file () +{ + local pfx="" cur_="$cur" + + case "$cur_" in + ?*/*) + pfx="${cur_%/*}" + cur_="${cur_##*/}" + pfx="${pfx}/" + ;; + esac + + __gitcomp_file "$(__git_index_files "$1" "$pfx")" "$pfx" "$cur_" +} + +__git_complete_file () +{ + __git_complete_revlist_file +} + +__git_complete_revlist () +{ + __git_complete_revlist_file +} + +__git_complete_remote_or_refspec () +{ + local cur_="$cur" cmd="${words[1]}" + local i c=2 remote="" pfx="" lhs=1 no_complete_refspec=0 + if [ "$cmd" = "remote" ]; then + ((c++)) + fi + while [ $c -lt $cword ]; do + i="${words[c]}" + case "$i" in + --mirror) [ "$cmd" = "push" ] && no_complete_refspec=1 ;; + --all) + case "$cmd" in + push) no_complete_refspec=1 ;; + fetch) + return + ;; + *) ;; + esac + ;; + -*) ;; + *) remote="$i"; break ;; + esac + ((c++)) + done + if [ -z "$remote" ]; then + __gitcomp_nl "$(__git_remotes)" + return + fi + if [ $no_complete_refspec = 1 ]; then + return + fi + [ "$remote" = "." ] && remote= + case "$cur_" in + *:*) + case "$COMP_WORDBREAKS" in + *:*) : great ;; + *) pfx="${cur_%%:*}:" ;; + esac + cur_="${cur_#*:}" + lhs=0 + ;; + +*) + pfx="+" + cur_="${cur_#+}" + ;; + esac + case "$cmd" in + fetch) + if [ $lhs = 1 ]; then + __gitcomp_nl "$(__git_refs2 "$remote")" "$pfx" "$cur_" + else + __gitcomp_nl "$(__git_refs)" "$pfx" "$cur_" + fi + ;; + pull|remote) + if [ $lhs = 1 ]; then + __gitcomp_nl "$(__git_refs "$remote")" "$pfx" "$cur_" + else + __gitcomp_nl "$(__git_refs)" "$pfx" "$cur_" + fi + ;; + push) + if [ $lhs = 1 ]; then + __gitcomp_nl "$(__git_refs)" "$pfx" "$cur_" + else + __gitcomp_nl "$(__git_refs "$remote")" "$pfx" "$cur_" + fi + ;; + esac +} + +__git_complete_strategy () +{ + __git_compute_merge_strategies + case "$prev" in + -s|--strategy) + __gitcomp "$__git_merge_strategies" + return 0 + esac + case "$cur" in + --strategy=*) + __gitcomp "$__git_merge_strategies" "" "${cur##--strategy=}" + return 0 + ;; + esac + return 1 +} + +__git_commands () { + if test -n "${GIT_TESTING_COMMAND_COMPLETION:-}" + then + printf "%s" "${GIT_TESTING_COMMAND_COMPLETION}" + else + git help -a|egrep '^ [a-zA-Z0-9]' + fi +} + +__git_list_all_commands () +{ + local i IFS=" "$'\n' + for i in $(__git_commands) + do + case $i in + *--*) : helper pattern;; + *) echo $i;; + esac + done +} + +__git_all_commands= +__git_compute_all_commands () +{ + test -n "$__git_all_commands" || + __git_all_commands=$(__git_list_all_commands) +} + +__git_list_porcelain_commands () +{ + local i IFS=" "$'\n' + __git_compute_all_commands + for i in $__git_all_commands + do + case $i in + *--*) : helper pattern;; + applymbox) : ask gittus;; + applypatch) : ask gittus;; + archimport) : import;; + cat-file) : plumbing;; + check-attr) : plumbing;; + check-ignore) : plumbing;; + check-mailmap) : plumbing;; + check-ref-format) : plumbing;; + checkout-index) : plumbing;; + commit-tree) : plumbing;; + count-objects) : infrequent;; + credential-cache) : credentials helper;; + credential-store) : credentials helper;; + cvsexportcommit) : export;; + cvsimport) : import;; + cvsserver) : daemon;; + daemon) : daemon;; + diff-files) : plumbing;; + diff-index) : plumbing;; + diff-tree) : plumbing;; + fast-import) : import;; + fast-export) : export;; + fsck-objects) : plumbing;; + fetch-pack) : plumbing;; + fmt-merge-msg) : plumbing;; + for-each-ref) : plumbing;; + hash-object) : plumbing;; + http-*) : transport;; + index-pack) : plumbing;; + init-db) : deprecated;; + local-fetch) : plumbing;; + lost-found) : infrequent;; + ls-files) : plumbing;; + ls-remote) : plumbing;; + ls-tree) : plumbing;; + mailinfo) : plumbing;; + mailsplit) : plumbing;; + merge-*) : plumbing;; + mktree) : plumbing;; + mktag) : plumbing;; + pack-objects) : plumbing;; + pack-redundant) : plumbing;; + pack-refs) : plumbing;; + parse-remote) : plumbing;; + patch-id) : plumbing;; + peek-remote) : plumbing;; + prune) : plumbing;; + prune-packed) : plumbing;; + quiltimport) : import;; + read-tree) : plumbing;; + receive-pack) : plumbing;; + remote-*) : transport;; + repo-config) : deprecated;; + rerere) : plumbing;; + rev-list) : plumbing;; + rev-parse) : plumbing;; + runstatus) : plumbing;; + sh-setup) : internal;; + shell) : daemon;; + show-ref) : plumbing;; + send-pack) : plumbing;; + show-index) : plumbing;; + ssh-*) : transport;; + stripspace) : plumbing;; + symbolic-ref) : plumbing;; + tar-tree) : deprecated;; + unpack-file) : plumbing;; + unpack-objects) : plumbing;; + update-index) : plumbing;; + update-ref) : plumbing;; + update-server-info) : daemon;; + upload-archive) : plumbing;; + upload-pack) : plumbing;; + write-tree) : plumbing;; + var) : infrequent;; + verify-pack) : infrequent;; + verify-tag) : plumbing;; + *) echo $i;; + esac + done +} + +__git_porcelain_commands= +__git_compute_porcelain_commands () +{ + __git_compute_all_commands + test -n "$__git_porcelain_commands" || + __git_porcelain_commands=$(__git_list_porcelain_commands) +} + +__git_pretty_aliases () +{ + local i IFS=$'\n' + for i in $(git --git-dir="$(__gitdir)" config --get-regexp "pretty\..*" 2>/dev/null); do + case "$i" in + pretty.*) + i="${i#pretty.}" + echo "${i/ */}" + ;; + esac + done +} + +__git_aliases () +{ + local i IFS=$'\n' + for i in $(git --git-dir="$(__gitdir)" config --get-regexp "alias\..*" 2>/dev/null); do + case "$i" in + alias.*) + i="${i#alias.}" + echo "${i/ */}" + ;; + esac + done +} + +# __git_aliased_command requires 1 argument +__git_aliased_command () +{ + local word cmdline=$(git --git-dir="$(__gitdir)" \ + config --get "alias.$1") + for word in $cmdline; do + case "$word" in + \!gitk|gitk) + echo "gitk" + return + ;; + \!*) : shell command alias ;; + -*) : option ;; + *=*) : setting env ;; + git) : git itself ;; + *) + echo "$word" + return + esac + done +} + +# __git_find_on_cmdline requires 1 argument +__git_find_on_cmdline () +{ + local word subcommand c=1 + while [ $c -lt $cword ]; do + word="${words[c]}" + for subcommand in $1; do + if [ "$subcommand" = "$word" ]; then + echo "$subcommand" + return + fi + done + ((c++)) + done +} + +__git_has_doubledash () +{ + local c=1 + while [ $c -lt $cword ]; do + if [ "--" = "${words[c]}" ]; then + return 0 + fi + ((c++)) + done + return 1 +} + +# Try to count non option arguments passed on the command line for the +# specified git command. +# When options are used, it is necessary to use the special -- option to +# tell the implementation were non option arguments begin. +# XXX this can not be improved, since options can appear everywhere, as +# an example: +# git mv x -n y +# +# __git_count_arguments requires 1 argument: the git command executed. +__git_count_arguments () +{ + local word i c=0 + + # Skip "git" (first argument) + for ((i=1; i < ${#words[@]}; i++)); do + word="${words[i]}" + + case "$word" in + --) + # Good; we can assume that the following are only non + # option arguments. + ((c = 0)) + ;; + "$1") + # Skip the specified git command and discard git + # main options + ((c = 0)) + ;; + ?*) + ((c++)) + ;; + esac + done + + printf "%d" $c +} + +__git_whitespacelist="nowarn warn error error-all fix" + +_git_am () +{ + local dir="$(__gitdir)" + if [ -d "$dir"/rebase-apply ]; then + __gitcomp "--skip --continue --resolved --abort" + return + fi + case "$cur" in + --whitespace=*) + __gitcomp "$__git_whitespacelist" "" "${cur##--whitespace=}" + return + ;; + --*) + __gitcomp " + --3way --committer-date-is-author-date --ignore-date + --ignore-whitespace --ignore-space-change + --interactive --keep --no-utf8 --signoff --utf8 + --whitespace= --scissors + " + return + esac +} + +_git_apply () +{ + case "$cur" in + --whitespace=*) + __gitcomp "$__git_whitespacelist" "" "${cur##--whitespace=}" + return + ;; + --*) + __gitcomp " + --stat --numstat --summary --check --index + --cached --index-info --reverse --reject --unidiff-zero + --apply --no-add --exclude= + --ignore-whitespace --ignore-space-change + --whitespace= --inaccurate-eof --verbose + " + return + esac +} + +_git_add () +{ + case "$cur" in + --*) + __gitcomp " + --interactive --refresh --patch --update --dry-run + --ignore-errors --intent-to-add + " + return + esac + + # XXX should we check for --update and --all options ? + __git_complete_index_file "--others --modified" +} + +_git_archive () +{ + case "$cur" in + --format=*) + __gitcomp "$(git archive --list)" "" "${cur##--format=}" + return + ;; + --remote=*) + __gitcomp_nl "$(__git_remotes)" "" "${cur##--remote=}" + return + ;; + --*) + __gitcomp " + --format= --list --verbose + --prefix= --remote= --exec= + " + return + ;; + esac + __git_complete_file +} + +_git_bisect () +{ + __git_has_doubledash && return + + local subcommands="start bad good skip reset visualize replay log run" + local subcommand="$(__git_find_on_cmdline "$subcommands")" + if [ -z "$subcommand" ]; then + if [ -f "$(__gitdir)"/BISECT_START ]; then + __gitcomp "$subcommands" + else + __gitcomp "replay start" + fi + return + fi + + case "$subcommand" in + bad|good|reset|skip|start) + __gitcomp_nl "$(__git_refs)" + ;; + *) + ;; + esac +} + +_git_branch () +{ + local i c=1 only_local_ref="n" has_r="n" + + while [ $c -lt $cword ]; do + i="${words[c]}" + case "$i" in + -d|-m) only_local_ref="y" ;; + -r) has_r="y" ;; + esac + ((c++)) + done + + case "$cur" in + --set-upstream-to=*) + __gitcomp "$(__git_refs)" "" "${cur##--set-upstream-to=}" + ;; + --*) + __gitcomp " + --color --no-color --verbose --abbrev= --no-abbrev + --track --no-track --contains --merged --no-merged + --set-upstream-to= --edit-description --list + --unset-upstream + " + ;; + *) + if [ $only_local_ref = "y" -a $has_r = "n" ]; then + __gitcomp_nl "$(__git_heads)" + else + __gitcomp_nl "$(__git_refs)" + fi + ;; + esac +} + +_git_bundle () +{ + local cmd="${words[2]}" + case "$cword" in + 2) + __gitcomp "create list-heads verify unbundle" + ;; + 3) + # looking for a file + ;; + *) + case "$cmd" in + create) + __git_complete_revlist + ;; + esac + ;; + esac +} + +_git_checkout () +{ + __git_has_doubledash && return + + case "$cur" in + --conflict=*) + __gitcomp "diff3 merge" "" "${cur##--conflict=}" + ;; + --*) + __gitcomp " + --quiet --ours --theirs --track --no-track --merge + --conflict= --orphan --patch + " + ;; + *) + # check if --track, --no-track, or --no-guess was specified + # if so, disable DWIM mode + local flags="--track --no-track --no-guess" track=1 + if [ -n "$(__git_find_on_cmdline "$flags")" ]; then + track='' + fi + __gitcomp_nl "$(__git_refs '' $track)" + ;; + esac +} + +_git_cherry () +{ + __gitcomp "$(__git_refs)" +} + +_git_cherry_pick () +{ + local dir="$(__gitdir)" + if [ -f "$dir"/CHERRY_PICK_HEAD ]; then + __gitcomp "--continue --quit --abort" + return + fi + case "$cur" in + --*) + __gitcomp "--edit --no-commit --signoff --strategy= --mainline" + ;; + *) + __gitcomp_nl "$(__git_refs)" + ;; + esac +} + +_git_clean () +{ + case "$cur" in + --*) + __gitcomp "--dry-run --quiet" + return + ;; + esac + + # XXX should we check for -x option ? + __git_complete_index_file "--others" +} + +_git_clone () +{ + case "$cur" in + --*) + __gitcomp " + --local + --no-hardlinks + --shared + --reference + --quiet + --no-checkout + --bare + --mirror + --origin + --upload-pack + --template= + --depth + --single-branch + --branch + " + return + ;; + esac +} + +_git_commit () +{ + case "$prev" in + -c|-C) + __gitcomp_nl "$(__git_refs)" "" "${cur}" + return + ;; + esac + + case "$cur" in + --cleanup=*) + __gitcomp "default strip verbatim whitespace + " "" "${cur##--cleanup=}" + return + ;; + --reuse-message=*|--reedit-message=*|\ + --fixup=*|--squash=*) + __gitcomp_nl "$(__git_refs)" "" "${cur#*=}" + return + ;; + --untracked-files=*) + __gitcomp "all no normal" "" "${cur##--untracked-files=}" + return + ;; + --*) + __gitcomp " + --all --author= --signoff --verify --no-verify + --edit --no-edit + --amend --include --only --interactive + --dry-run --reuse-message= --reedit-message= + --reset-author --file= --message= --template= + --cleanup= --untracked-files --untracked-files= + --verbose --quiet --fixup= --squash= + " + return + esac + + if git rev-parse --verify --quiet HEAD >/dev/null; then + __git_complete_index_file "--committable" + else + # This is the first commit + __git_complete_index_file "--cached" + fi +} + +_git_describe () +{ + case "$cur" in + --*) + __gitcomp " + --all --tags --contains --abbrev= --candidates= + --exact-match --debug --long --match --always + " + return + esac + __gitcomp_nl "$(__git_refs)" +} + +__git_diff_algorithms="myers minimal patience histogram" + +__git_diff_common_options="--stat --numstat --shortstat --summary + --patch-with-stat --name-only --name-status --color + --no-color --color-words --no-renames --check + --full-index --binary --abbrev --diff-filter= + --find-copies-harder + --text --ignore-space-at-eol --ignore-space-change + --ignore-all-space --exit-code --quiet --ext-diff + --no-ext-diff + --no-prefix --src-prefix= --dst-prefix= + --inter-hunk-context= + --patience --histogram --minimal + --raw --word-diff + --dirstat --dirstat= --dirstat-by-file + --dirstat-by-file= --cumulative + --diff-algorithm= +" + +_git_diff () +{ + __git_has_doubledash && return + + case "$cur" in + --diff-algorithm=*) + __gitcomp "$__git_diff_algorithms" "" "${cur##--diff-algorithm=}" + return + ;; + --*) + __gitcomp "--cached --staged --pickaxe-all --pickaxe-regex + --base --ours --theirs --no-index + $__git_diff_common_options + " + return + ;; + esac + __git_complete_revlist_file +} + +__git_mergetools_common="diffuse ecmerge emerge kdiff3 meld opendiff + tkdiff vimdiff gvimdiff xxdiff araxis p4merge bc3 codecompare +" + +_git_difftool () +{ + __git_has_doubledash && return + + case "$cur" in + --tool=*) + __gitcomp "$__git_mergetools_common kompare" "" "${cur##--tool=}" + return + ;; + --*) + __gitcomp "--cached --staged --pickaxe-all --pickaxe-regex + --base --ours --theirs + --no-renames --diff-filter= --find-copies-harder + --relative --ignore-submodules + --tool=" + return + ;; + esac + __git_complete_revlist_file +} + +__git_fetch_options=" + --quiet --verbose --append --upload-pack --force --keep --depth= + --tags --no-tags --all --prune --dry-run +" + +_git_fetch () +{ + case "$cur" in + --*) + __gitcomp "$__git_fetch_options" + return + ;; + esac + __git_complete_remote_or_refspec +} + +__git_format_patch_options=" + --stdout --attach --no-attach --thread --thread= --no-thread + --numbered --start-number --numbered-files --keep-subject --signoff + --signature --no-signature --in-reply-to= --cc= --full-index --binary + --not --all --cover-letter --no-prefix --src-prefix= --dst-prefix= + --inline --suffix= --ignore-if-in-upstream --subject-prefix= + --output-directory --reroll-count --to= --quiet --notes +" + +_git_format_patch () +{ + case "$cur" in + --thread=*) + __gitcomp " + deep shallow + " "" "${cur##--thread=}" + return + ;; + --*) + __gitcomp "$__git_format_patch_options" + return + ;; + esac + __git_complete_revlist +} + +_git_fsck () +{ + case "$cur" in + --*) + __gitcomp " + --tags --root --unreachable --cache --no-reflogs --full + --strict --verbose --lost-found + " + return + ;; + esac +} + +_git_gc () +{ + case "$cur" in + --*) + __gitcomp "--prune --aggressive" + return + ;; + esac +} + +_git_gitk () +{ + _gitk +} + +__git_match_ctag() { + awk "/^${1////\\/}/ { print \$1 }" "$2" +} + +_git_grep () +{ + __git_has_doubledash && return + + case "$cur" in + --*) + __gitcomp " + --cached + --text --ignore-case --word-regexp --invert-match + --full-name --line-number + --extended-regexp --basic-regexp --fixed-strings + --perl-regexp + --files-with-matches --name-only + --files-without-match + --max-depth + --count + --and --or --not --all-match + " + return + ;; + esac + + case "$cword,$prev" in + 2,*|*,-*) + if test -r tags; then + __gitcomp_nl "$(__git_match_ctag "$cur" tags)" + return + fi + ;; + esac + + __gitcomp_nl "$(__git_refs)" +} + +_git_help () +{ + case "$cur" in + --*) + __gitcomp "--all --info --man --web" + return + ;; + esac + __git_compute_all_commands + __gitcomp "$__git_all_commands $(__git_aliases) + attributes cli core-tutorial cvs-migration + diffcore gitk glossary hooks ignore modules + namespaces repository-layout tutorial tutorial-2 + workflows + " +} + +_git_init () +{ + case "$cur" in + --shared=*) + __gitcomp " + false true umask group all world everybody + " "" "${cur##--shared=}" + return + ;; + --*) + __gitcomp "--quiet --bare --template= --shared --shared=" + return + ;; + esac +} + +_git_ls_files () +{ + case "$cur" in + --*) + __gitcomp "--cached --deleted --modified --others --ignored + --stage --directory --no-empty-directory --unmerged + --killed --exclude= --exclude-from= + --exclude-per-directory= --exclude-standard + --error-unmatch --with-tree= --full-name + --abbrev --ignored --exclude-per-directory + " + return + ;; + esac + + # XXX ignore options like --modified and always suggest all cached + # files. + __git_complete_index_file "--cached" +} + +_git_ls_remote () +{ + __gitcomp_nl "$(__git_remotes)" +} + +_git_ls_tree () +{ + __git_complete_file +} + +# Options that go well for log, shortlog and gitk +__git_log_common_options=" + --not --all + --branches --tags --remotes + --first-parent --merges --no-merges + --max-count= + --max-age= --since= --after= + --min-age= --until= --before= + --min-parents= --max-parents= + --no-min-parents --no-max-parents +" +# Options that go well for log and gitk (not shortlog) +__git_log_gitk_options=" + --dense --sparse --full-history + --simplify-merges --simplify-by-decoration + --left-right --notes --no-notes +" +# Options that go well for log and shortlog (not gitk) +__git_log_shortlog_options=" + --author= --committer= --grep= + --all-match +" + +__git_log_pretty_formats="oneline short medium full fuller email raw format:" +__git_log_date_formats="relative iso8601 rfc2822 short local default raw" + +_git_log () +{ + __git_has_doubledash && return + + local g="$(git rev-parse --git-dir 2>/dev/null)" + local merge="" + if [ -f "$g/MERGE_HEAD" ]; then + merge="--merge" + fi + case "$cur" in + --pretty=*|--format=*) + __gitcomp "$__git_log_pretty_formats $(__git_pretty_aliases) + " "" "${cur#*=}" + return + ;; + --date=*) + __gitcomp "$__git_log_date_formats" "" "${cur##--date=}" + return + ;; + --decorate=*) + __gitcomp "long short" "" "${cur##--decorate=}" + return + ;; + --*) + __gitcomp " + $__git_log_common_options + $__git_log_shortlog_options + $__git_log_gitk_options + --root --topo-order --date-order --reverse + --follow --full-diff + --abbrev-commit --abbrev= + --relative-date --date= + --pretty= --format= --oneline + --cherry-pick + --graph + --decorate --decorate= + --walk-reflogs + --parents --children + $merge + $__git_diff_common_options + --pickaxe-all --pickaxe-regex + " + return + ;; + esac + __git_complete_revlist +} + +__git_merge_options=" + --no-commit --no-stat --log --no-log --squash --strategy + --commit --stat --no-squash --ff --no-ff --ff-only --edit --no-edit +" + +_git_merge () +{ + __git_complete_strategy && return + + case "$cur" in + --*) + __gitcomp "$__git_merge_options" + return + esac + __gitcomp_nl "$(__git_refs)" +} + +_git_mergetool () +{ + case "$cur" in + --tool=*) + __gitcomp "$__git_mergetools_common tortoisemerge" "" "${cur##--tool=}" + return + ;; + --*) + __gitcomp "--tool=" + return + ;; + esac +} + +_git_merge_base () +{ + __gitcomp_nl "$(__git_refs)" +} + +_git_mv () +{ + case "$cur" in + --*) + __gitcomp "--dry-run" + return + ;; + esac + + if [ $(__git_count_arguments "mv") -gt 0 ]; then + # We need to show both cached and untracked files (including + # empty directories) since this may not be the last argument. + __git_complete_index_file "--cached --others --directory" + else + __git_complete_index_file "--cached" + fi +} + +_git_name_rev () +{ + __gitcomp "--tags --all --stdin" +} + +_git_notes () +{ + local subcommands='add append copy edit list prune remove show' + local subcommand="$(__git_find_on_cmdline "$subcommands")" + + case "$subcommand,$cur" in + ,--*) + __gitcomp '--ref' + ;; + ,*) + case "$prev" in + --ref) + __gitcomp_nl "$(__git_refs)" + ;; + *) + __gitcomp "$subcommands --ref" + ;; + esac + ;; + add,--reuse-message=*|append,--reuse-message=*|\ + add,--reedit-message=*|append,--reedit-message=*) + __gitcomp_nl "$(__git_refs)" "" "${cur#*=}" + ;; + add,--*|append,--*) + __gitcomp '--file= --message= --reedit-message= + --reuse-message=' + ;; + copy,--*) + __gitcomp '--stdin' + ;; + prune,--*) + __gitcomp '--dry-run --verbose' + ;; + prune,*) + ;; + *) + case "$prev" in + -m|-F) + ;; + *) + __gitcomp_nl "$(__git_refs)" + ;; + esac + ;; + esac +} + +_git_pull () +{ + __git_complete_strategy && return + + case "$cur" in + --*) + __gitcomp " + --rebase --no-rebase + $__git_merge_options + $__git_fetch_options + " + return + ;; + esac + __git_complete_remote_or_refspec +} + +_git_push () +{ + case "$prev" in + --repo) + __gitcomp_nl "$(__git_remotes)" + return + esac + case "$cur" in + --repo=*) + __gitcomp_nl "$(__git_remotes)" "" "${cur##--repo=}" + return + ;; + --*) + __gitcomp " + --all --mirror --tags --dry-run --force --verbose + --receive-pack= --repo= --set-upstream + " + return + ;; + esac + __git_complete_remote_or_refspec +} + +_git_rebase () +{ + local dir="$(__gitdir)" + if [ -d "$dir"/rebase-apply ] || [ -d "$dir"/rebase-merge ]; then + __gitcomp "--continue --skip --abort" + return + fi + __git_complete_strategy && return + case "$cur" in + --whitespace=*) + __gitcomp "$__git_whitespacelist" "" "${cur##--whitespace=}" + return + ;; + --*) + __gitcomp " + --onto --merge --strategy --interactive + --preserve-merges --stat --no-stat + --committer-date-is-author-date --ignore-date + --ignore-whitespace --whitespace= + --autosquash + " + + return + esac + __gitcomp_nl "$(__git_refs)" +} + +_git_reflog () +{ + local subcommands="show delete expire" + local subcommand="$(__git_find_on_cmdline "$subcommands")" + + if [ -z "$subcommand" ]; then + __gitcomp "$subcommands" + else + __gitcomp_nl "$(__git_refs)" + fi +} + +__git_send_email_confirm_options="always never auto cc compose" +__git_send_email_suppresscc_options="author self cc bodycc sob cccmd body all" + +_git_send_email () +{ + case "$cur" in + --confirm=*) + __gitcomp " + $__git_send_email_confirm_options + " "" "${cur##--confirm=}" + return + ;; + --suppress-cc=*) + __gitcomp " + $__git_send_email_suppresscc_options + " "" "${cur##--suppress-cc=}" + + return + ;; + --smtp-encryption=*) + __gitcomp "ssl tls" "" "${cur##--smtp-encryption=}" + return + ;; + --thread=*) + __gitcomp " + deep shallow + " "" "${cur##--thread=}" + return + ;; + --*) + __gitcomp "--annotate --bcc --cc --cc-cmd --chain-reply-to + --compose --confirm= --dry-run --envelope-sender + --from --identity + --in-reply-to --no-chain-reply-to --no-signed-off-by-cc + --no-suppress-from --no-thread --quiet + --signed-off-by-cc --smtp-pass --smtp-server + --smtp-server-port --smtp-encryption= --smtp-user + --subject --suppress-cc= --suppress-from --thread --to + --validate --no-validate + $__git_format_patch_options" + return + ;; + esac + __git_complete_revlist +} + +_git_stage () +{ + _git_add +} + +__git_config_get_set_variables () +{ + local prevword word config_file= c=$cword + while [ $c -gt 1 ]; do + word="${words[c]}" + case "$word" in + --system|--global|--local|--file=*) + config_file="$word" + break + ;; + -f|--file) + config_file="$word $prevword" + break + ;; + esac + prevword=$word + c=$((--c)) + done + + git --git-dir="$(__gitdir)" config $config_file --list 2>/dev/null | + while read -r line + do + case "$line" in + *.*=*) + echo "${line/=*/}" + ;; + esac + done +} + +_git_config () +{ + case "$prev" in + branch.*.remote|branch.*.pushremote) + __gitcomp_nl "$(__git_remotes)" + return + ;; + branch.*.merge) + __gitcomp_nl "$(__git_refs)" + return + ;; + branch.*.rebase) + __gitcomp "false true" + return + ;; + remote.pushdefault) + __gitcomp_nl "$(__git_remotes)" + return + ;; + remote.*.fetch) + local remote="${prev#remote.}" + remote="${remote%.fetch}" + if [ -z "$cur" ]; then + __gitcomp_nl "refs/heads/" "" "" "" + return + fi + __gitcomp_nl "$(__git_refs_remotes "$remote")" + return + ;; + remote.*.push) + local remote="${prev#remote.}" + remote="${remote%.push}" + __gitcomp_nl "$(git --git-dir="$(__gitdir)" \ + for-each-ref --format='%(refname):%(refname)' \ + refs/heads)" + return + ;; + pull.twohead|pull.octopus) + __git_compute_merge_strategies + __gitcomp "$__git_merge_strategies" + return + ;; + color.branch|color.diff|color.interactive|\ + color.showbranch|color.status|color.ui) + __gitcomp "always never auto" + return + ;; + color.pager) + __gitcomp "false true" + return + ;; + color.*.*) + __gitcomp " + normal black red green yellow blue magenta cyan white + bold dim ul blink reverse + " + return + ;; + diff.submodule) + __gitcomp "log short" + return + ;; + help.format) + __gitcomp "man info web html" + return + ;; + log.date) + __gitcomp "$__git_log_date_formats" + return + ;; + sendemail.aliasesfiletype) + __gitcomp "mutt mailrc pine elm gnus" + return + ;; + sendemail.confirm) + __gitcomp "$__git_send_email_confirm_options" + return + ;; + sendemail.suppresscc) + __gitcomp "$__git_send_email_suppresscc_options" + return + ;; + --get|--get-all|--unset|--unset-all) + __gitcomp_nl "$(__git_config_get_set_variables)" + return + ;; + *.*) + return + ;; + esac + case "$cur" in + --*) + __gitcomp " + --system --global --local --file= + --list --replace-all + --get --get-all --get-regexp + --add --unset --unset-all + --remove-section --rename-section + " + return + ;; + branch.*.*) + local pfx="${cur%.*}." cur_="${cur##*.}" + __gitcomp "remote pushremote merge mergeoptions rebase" "$pfx" "$cur_" + return + ;; + branch.*) + local pfx="${cur%.*}." cur_="${cur#*.}" + __gitcomp_nl "$(__git_heads)" "$pfx" "$cur_" "." + return + ;; + guitool.*.*) + local pfx="${cur%.*}." cur_="${cur##*.}" + __gitcomp " + argprompt cmd confirm needsfile noconsole norescan + prompt revprompt revunmerged title + " "$pfx" "$cur_" + return + ;; + difftool.*.*) + local pfx="${cur%.*}." cur_="${cur##*.}" + __gitcomp "cmd path" "$pfx" "$cur_" + return + ;; + man.*.*) + local pfx="${cur%.*}." cur_="${cur##*.}" + __gitcomp "cmd path" "$pfx" "$cur_" + return + ;; + mergetool.*.*) + local pfx="${cur%.*}." cur_="${cur##*.}" + __gitcomp "cmd path trustExitCode" "$pfx" "$cur_" + return + ;; + pager.*) + local pfx="${cur%.*}." cur_="${cur#*.}" + __git_compute_all_commands + __gitcomp_nl "$__git_all_commands" "$pfx" "$cur_" + return + ;; + remote.*.*) + local pfx="${cur%.*}." cur_="${cur##*.}" + __gitcomp " + url proxy fetch push mirror skipDefaultUpdate + receivepack uploadpack tagopt pushurl + " "$pfx" "$cur_" + return + ;; + remote.*) + local pfx="${cur%.*}." cur_="${cur#*.}" + __gitcomp_nl "$(__git_remotes)" "$pfx" "$cur_" "." + return + ;; + url.*.*) + local pfx="${cur%.*}." cur_="${cur##*.}" + __gitcomp "insteadOf pushInsteadOf" "$pfx" "$cur_" + return + ;; + esac + __gitcomp " + add.ignoreErrors + advice.commitBeforeMerge + advice.detachedHead + advice.implicitIdentity + advice.pushNonFastForward + advice.resolveConflict + advice.statusHints + alias. + am.keepcr + apply.ignorewhitespace + apply.whitespace + branch.autosetupmerge + branch.autosetuprebase + browser. + clean.requireForce + color.branch + color.branch.current + color.branch.local + color.branch.plain + color.branch.remote + color.decorate.HEAD + color.decorate.branch + color.decorate.remoteBranch + color.decorate.stash + color.decorate.tag + color.diff + color.diff.commit + color.diff.frag + color.diff.func + color.diff.meta + color.diff.new + color.diff.old + color.diff.plain + color.diff.whitespace + color.grep + color.grep.context + color.grep.filename + color.grep.function + color.grep.linenumber + color.grep.match + color.grep.selected + color.grep.separator + color.interactive + color.interactive.error + color.interactive.header + color.interactive.help + color.interactive.prompt + color.pager + color.showbranch + color.status + color.status.added + color.status.changed + color.status.header + color.status.nobranch + color.status.untracked + color.status.updated + color.ui + commit.status + commit.template + core.abbrev + core.askpass + core.attributesfile + core.autocrlf + core.bare + core.bigFileThreshold + core.compression + core.createObject + core.deltaBaseCacheLimit + core.editor + core.eol + core.excludesfile + core.fileMode + core.fsyncobjectfiles + core.gitProxy + core.ignoreStat + core.ignorecase + core.logAllRefUpdates + core.loosecompression + core.notesRef + core.packedGitLimit + core.packedGitWindowSize + core.pager + core.preferSymlinkRefs + core.preloadindex + core.quotepath + core.repositoryFormatVersion + core.safecrlf + core.sharedRepository + core.sparseCheckout + core.symlinks + core.trustctime + core.warnAmbiguousRefs + core.whitespace + core.worktree + diff.autorefreshindex + diff.external + diff.ignoreSubmodules + diff.mnemonicprefix + diff.noprefix + diff.renameLimit + diff.renames + diff.statGraphWidth + diff.submodule + diff.suppressBlankEmpty + diff.tool + diff.wordRegex + diff.algorithm + difftool. + difftool.prompt + fetch.recurseSubmodules + fetch.unpackLimit + format.attach + format.cc + format.headers + format.numbered + format.pretty + format.signature + format.signoff + format.subjectprefix + format.suffix + format.thread + format.to + gc. + gc.aggressiveWindow + gc.auto + gc.autopacklimit + gc.packrefs + gc.pruneexpire + gc.reflogexpire + gc.reflogexpireunreachable + gc.rerereresolved + gc.rerereunresolved + gitcvs.allbinary + gitcvs.commitmsgannotation + gitcvs.dbTableNamePrefix + gitcvs.dbdriver + gitcvs.dbname + gitcvs.dbpass + gitcvs.dbuser + gitcvs.enabled + gitcvs.logfile + gitcvs.usecrlfattr + guitool. + gui.blamehistoryctx + gui.commitmsgwidth + gui.copyblamethreshold + gui.diffcontext + gui.encoding + gui.fastcopyblame + gui.matchtrackingbranch + gui.newbranchtemplate + gui.pruneduringfetch + gui.spellingdictionary + gui.trustmtime + help.autocorrect + help.browser + help.format + http.lowSpeedLimit + http.lowSpeedTime + http.maxRequests + http.minSessions + http.noEPSV + http.postBuffer + http.proxy + http.sslCAInfo + http.sslCAPath + http.sslCert + http.sslCertPasswordProtected + http.sslKey + http.sslVerify + http.useragent + i18n.commitEncoding + i18n.logOutputEncoding + imap.authMethod + imap.folder + imap.host + imap.pass + imap.port + imap.preformattedHTML + imap.sslverify + imap.tunnel + imap.user + init.templatedir + instaweb.browser + instaweb.httpd + instaweb.local + instaweb.modulepath + instaweb.port + interactive.singlekey + log.date + log.decorate + log.showroot + mailmap.file + man. + man.viewer + merge. + merge.conflictstyle + merge.log + merge.renameLimit + merge.renormalize + merge.stat + merge.tool + merge.verbosity + mergetool. + mergetool.keepBackup + mergetool.keepTemporaries + mergetool.prompt + notes.displayRef + notes.rewrite. + notes.rewrite.amend + notes.rewrite.rebase + notes.rewriteMode + notes.rewriteRef + pack.compression + pack.deltaCacheLimit + pack.deltaCacheSize + pack.depth + pack.indexVersion + pack.packSizeLimit + pack.threads + pack.window + pack.windowMemory + pager. + pretty. + pull.octopus + pull.twohead + push.default + rebase.autosquash + rebase.stat + receive.autogc + receive.denyCurrentBranch + receive.denyDeleteCurrent + receive.denyDeletes + receive.denyNonFastForwards + receive.fsckObjects + receive.unpackLimit + receive.updateserverinfo + remote.pushdefault + remotes. + repack.usedeltabaseoffset + rerere.autoupdate + rerere.enabled + sendemail. + sendemail.aliasesfile + sendemail.aliasfiletype + sendemail.bcc + sendemail.cc + sendemail.cccmd + sendemail.chainreplyto + sendemail.confirm + sendemail.envelopesender + sendemail.from + sendemail.identity + sendemail.multiedit + sendemail.signedoffbycc + sendemail.smtpdomain + sendemail.smtpencryption + sendemail.smtppass + sendemail.smtpserver + sendemail.smtpserveroption + sendemail.smtpserverport + sendemail.smtpuser + sendemail.suppresscc + sendemail.suppressfrom + sendemail.thread + sendemail.to + sendemail.validate + showbranch.default + status.relativePaths + status.showUntrackedFiles + status.submodulesummary + submodule. + tar.umask + transfer.unpackLimit + url. + user.email + user.name + user.signingkey + web.browser + branch. remote. + " +} + +_git_remote () +{ + local subcommands="add rename remove set-head set-branches set-url show prune update" + local subcommand="$(__git_find_on_cmdline "$subcommands")" + if [ -z "$subcommand" ]; then + __gitcomp "$subcommands" + return + fi + + case "$subcommand" in + rename|remove|set-url|show|prune) + __gitcomp_nl "$(__git_remotes)" + ;; + set-head|set-branches) + __git_complete_remote_or_refspec + ;; + update) + local i c='' IFS=$'\n' + for i in $(git --git-dir="$(__gitdir)" config --get-regexp "remotes\..*" 2>/dev/null); do + i="${i#remotes.}" + c="$c ${i/ */}" + done + __gitcomp "$c" + ;; + *) + ;; + esac +} + +_git_replace () +{ + __gitcomp_nl "$(__git_refs)" +} + +_git_reset () +{ + __git_has_doubledash && return + + case "$cur" in + --*) + __gitcomp "--merge --mixed --hard --soft --patch" + return + ;; + esac + __gitcomp_nl "$(__git_refs)" +} + +_git_revert () +{ + case "$cur" in + --*) + __gitcomp "--edit --mainline --no-edit --no-commit --signoff" + return + ;; + esac + __gitcomp_nl "$(__git_refs)" +} + +_git_rm () +{ + case "$cur" in + --*) + __gitcomp "--cached --dry-run --ignore-unmatch --quiet" + return + ;; + esac + + __git_complete_index_file "--cached" +} + +_git_shortlog () +{ + __git_has_doubledash && return + + case "$cur" in + --*) + __gitcomp " + $__git_log_common_options + $__git_log_shortlog_options + --numbered --summary + " + return + ;; + esac + __git_complete_revlist +} + +_git_show () +{ + __git_has_doubledash && return + + case "$cur" in + --pretty=*|--format=*) + __gitcomp "$__git_log_pretty_formats $(__git_pretty_aliases) + " "" "${cur#*=}" + return + ;; + --diff-algorithm=*) + __gitcomp "$__git_diff_algorithms" "" "${cur##--diff-algorithm=}" + return + ;; + --*) + __gitcomp "--pretty= --format= --abbrev-commit --oneline + $__git_diff_common_options + " + return + ;; + esac + __git_complete_revlist_file +} + +_git_show_branch () +{ + case "$cur" in + --*) + __gitcomp " + --all --remotes --topo-order --current --more= + --list --independent --merge-base --no-name + --color --no-color + --sha1-name --sparse --topics --reflog + " + return + ;; + esac + __git_complete_revlist +} + +_git_stash () +{ + local save_opts='--keep-index --no-keep-index --quiet --patch' + local subcommands='save list show apply clear drop pop create branch' + local subcommand="$(__git_find_on_cmdline "$subcommands")" + if [ -z "$subcommand" ]; then + case "$cur" in + --*) + __gitcomp "$save_opts" + ;; + *) + if [ -z "$(__git_find_on_cmdline "$save_opts")" ]; then + __gitcomp "$subcommands" + fi + ;; + esac + else + case "$subcommand,$cur" in + save,--*) + __gitcomp "$save_opts" + ;; + apply,--*|pop,--*) + __gitcomp "--index --quiet" + ;; + show,--*|drop,--*|branch,--*) + ;; + show,*|apply,*|drop,*|pop,*|branch,*) + __gitcomp_nl "$(git --git-dir="$(__gitdir)" stash list \ + | sed -n -e 's/:.*//p')" + ;; + *) + ;; + esac + fi +} + +_git_submodule () +{ + __git_has_doubledash && return + + local subcommands="add status init deinit update summary foreach sync" + if [ -z "$(__git_find_on_cmdline "$subcommands")" ]; then + case "$cur" in + --*) + __gitcomp "--quiet --cached" + ;; + *) + __gitcomp "$subcommands" + ;; + esac + return + fi +} + +_git_svn () +{ + local subcommands=" + init fetch clone rebase dcommit log find-rev + set-tree commit-diff info create-ignore propget + proplist show-ignore show-externals branch tag blame + migrate mkdirs reset gc + " + local subcommand="$(__git_find_on_cmdline "$subcommands")" + if [ -z "$subcommand" ]; then + __gitcomp "$subcommands" + else + local remote_opts="--username= --config-dir= --no-auth-cache" + local fc_opts=" + --follow-parent --authors-file= --repack= + --no-metadata --use-svm-props --use-svnsync-props + --log-window-size= --no-checkout --quiet + --repack-flags --use-log-author --localtime + --ignore-paths= --include-paths= $remote_opts + " + local init_opts=" + --template= --shared= --trunk= --tags= + --branches= --stdlayout --minimize-url + --no-metadata --use-svm-props --use-svnsync-props + --rewrite-root= --prefix= --use-log-author + --add-author-from $remote_opts + " + local cmt_opts=" + --edit --rmdir --find-copies-harder --copy-similarity= + " + + case "$subcommand,$cur" in + fetch,--*) + __gitcomp "--revision= --fetch-all $fc_opts" + ;; + clone,--*) + __gitcomp "--revision= $fc_opts $init_opts" + ;; + init,--*) + __gitcomp "$init_opts" + ;; + dcommit,--*) + __gitcomp " + --merge --strategy= --verbose --dry-run + --fetch-all --no-rebase --commit-url + --revision --interactive $cmt_opts $fc_opts + " + ;; + set-tree,--*) + __gitcomp "--stdin $cmt_opts $fc_opts" + ;; + create-ignore,--*|propget,--*|proplist,--*|show-ignore,--*|\ + show-externals,--*|mkdirs,--*) + __gitcomp "--revision=" + ;; + log,--*) + __gitcomp " + --limit= --revision= --verbose --incremental + --oneline --show-commit --non-recursive + --authors-file= --color + " + ;; + rebase,--*) + __gitcomp " + --merge --verbose --strategy= --local + --fetch-all --dry-run $fc_opts + " + ;; + commit-diff,--*) + __gitcomp "--message= --file= --revision= $cmt_opts" + ;; + info,--*) + __gitcomp "--url" + ;; + branch,--*) + __gitcomp "--dry-run --message --tag" + ;; + tag,--*) + __gitcomp "--dry-run --message" + ;; + blame,--*) + __gitcomp "--git-format" + ;; + migrate,--*) + __gitcomp " + --config-dir= --ignore-paths= --minimize + --no-auth-cache --username= + " + ;; + reset,--*) + __gitcomp "--revision= --parent" + ;; + *) + ;; + esac + fi +} + +_git_tag () +{ + local i c=1 f=0 + while [ $c -lt $cword ]; do + i="${words[c]}" + case "$i" in + -d|-v) + __gitcomp_nl "$(__git_tags)" + return + ;; + -f) + f=1 + ;; + esac + ((c++)) + done + + case "$prev" in + -m|-F) + ;; + -*|tag) + if [ $f = 1 ]; then + __gitcomp_nl "$(__git_tags)" + fi + ;; + *) + __gitcomp_nl "$(__git_refs)" + ;; + esac +} + +_git_whatchanged () +{ + _git_log +} + +__git_main () +{ + local i c=1 command __git_dir + + while [ $c -lt $cword ]; do + i="${words[c]}" + case "$i" in + --git-dir=*) __git_dir="${i#--git-dir=}" ;; + --git-dir) ((c++)) ; __git_dir="${words[c]}" ;; + --bare) __git_dir="." ;; + --help) command="help"; break ;; + -c|--work-tree|--namespace) ((c++)) ;; + -*) ;; + *) command="$i"; break ;; + esac + ((c++)) + done + + if [ -z "$command" ]; then + case "$cur" in + --*) __gitcomp " + --paginate + --no-pager + --git-dir= + --bare + --version + --exec-path + --exec-path= + --html-path + --man-path + --info-path + --work-tree= + --namespace= + --no-replace-objects + --help + " + ;; + *) __git_compute_porcelain_commands + __gitcomp "$__git_porcelain_commands $(__git_aliases)" ;; + esac + return + fi + + local completion_func="_git_${command//-/_}" + declare -f $completion_func >/dev/null && $completion_func && return + + local expansion=$(__git_aliased_command "$command") + if [ -n "$expansion" ]; then + completion_func="_git_${expansion//-/_}" + declare -f $completion_func >/dev/null && $completion_func + fi +} + +__gitk_main () +{ + __git_has_doubledash && return + + local g="$(__gitdir)" + local merge="" + if [ -f "$g/MERGE_HEAD" ]; then + merge="--merge" + fi + case "$cur" in + --*) + __gitcomp " + $__git_log_common_options + $__git_log_gitk_options + $merge + " + return + ;; + esac + __git_complete_revlist +} + +if [[ -n ${ZSH_VERSION-} ]]; then + echo "WARNING: this script is deprecated, please see git-completion.zsh" 1>&2 + + autoload -U +X compinit && compinit + + __gitcomp () + { + emulate -L zsh + + local cur_="${3-$cur}" + + case "$cur_" in + --*=) + ;; + *) + local c IFS=$' \t\n' + local -a array + for c in ${=1}; do + c="$c${4-}" + case $c in + --*=*|*.) ;; + *) c="$c " ;; + esac + array[$#array+1]="$c" + done + compset -P '*[=:]' + compadd -Q -S '' -p "${2-}" -a -- array && _ret=0 + ;; + esac + } + + __gitcomp_nl () + { + emulate -L zsh + + local IFS=$'\n' + compset -P '*[=:]' + compadd -Q -S "${4- }" -p "${2-}" -- ${=1} && _ret=0 + } + + __gitcomp_file () + { + emulate -L zsh + + local IFS=$'\n' + compset -P '*[=:]' + compadd -Q -p "${2-}" -f -- ${=1} && _ret=0 + } + + _git () + { + local _ret=1 cur cword prev + cur=${words[CURRENT]} + prev=${words[CURRENT-1]} + let cword=CURRENT-1 + emulate ksh -c __${service}_main + let _ret && _default && _ret=0 + return _ret + } + + compdef _git git gitk + return +fi + +__git_func_wrap () +{ + local cur words cword prev + _get_comp_words_by_ref -n =: cur words cword prev + $1 +} + +# Setup completion for certain functions defined above by setting common +# variables and workarounds. +# This is NOT a public function; use at your own risk. +__git_complete () +{ + local wrapper="__git_wrap${2}" + eval "$wrapper () { __git_func_wrap $2 ; }" + complete -o bashdefault -o default -o nospace -F $wrapper $1 2>/dev/null \ + || complete -o default -o nospace -F $wrapper $1 +} + +# wrapper for backwards compatibility +_git () +{ + __git_wrap__git_main +} + +# wrapper for backwards compatibility +_gitk () +{ + __git_wrap__gitk_main +} + +__git_complete git __git_main +__git_complete gitk __gitk_main + +# The following are necessary only for Cygwin, and only are needed +# when the user has tab-completed the executable name and consequently +# included the '.exe' suffix. +# +if [ Cygwin = "$(uname -o 2>/dev/null)" ]; then +__git_complete git.exe __git_main +fi diff --git a/paddle/scripts/docker/root/.scripts/git-prompt.sh b/paddle/scripts/docker/root/.scripts/git-prompt.sh new file mode 100755 index 0000000000000000000000000000000000000000..576f4ec14c94a24ebffa9e2620acf881e6b5ddaa --- /dev/null +++ b/paddle/scripts/docker/root/.scripts/git-prompt.sh @@ -0,0 +1,445 @@ +# bash/zsh git prompt support +# +# Copyright (C) 2006,2007 Shawn O. Pearce +# Distributed under the GNU General Public License, version 2.0. +# +# This script allows you to see repository status in your prompt. +# +# To enable: +# +# 1) Copy this file to somewhere (e.g. ~/.git-prompt.sh). +# 2) Add the following line to your .bashrc/.zshrc: +# source ~/.git-prompt.sh +# 3a) Change your PS1 to call __git_ps1 as +# command-substitution: +# Bash: PS1='[\u@\h \W$(__git_ps1 " (%s)")]\$ ' +# ZSH: setopt PROMPT_SUBST ; PS1='[%n@%m %c$(__git_ps1 " (%s)")]\$ ' +# the optional argument will be used as format string. +# 3b) Alternatively, for a slightly faster prompt, __git_ps1 can +# be used for PROMPT_COMMAND in Bash or for precmd() in Zsh +# with two parameters,
 and , which are strings
+#        you would put in $PS1 before and after the status string
+#        generated by the git-prompt machinery.  e.g.
+#        Bash: PROMPT_COMMAND='__git_ps1 "\u@\h:\w" "\\\$ "'
+#          will show username, at-sign, host, colon, cwd, then
+#          various status string, followed by dollar and SP, as
+#          your prompt.
+#        ZSH:  precmd () { __git_ps1 "%n" ":%~$ " "|%s" }
+#          will show username, pipe, then various status string,
+#          followed by colon, cwd, dollar and SP, as your prompt.
+#        Optionally, you can supply a third argument with a printf
+#        format string to finetune the output of the branch status
+#
+# The repository status will be displayed only if you are currently in a
+# git repository. The %s token is the placeholder for the shown status.
+#
+# The prompt status always includes the current branch name.
+#
+# In addition, if you set GIT_PS1_SHOWDIRTYSTATE to a nonempty value,
+# unstaged (*) and staged (+) changes will be shown next to the branch
+# name.  You can configure this per-repository with the
+# bash.showDirtyState variable, which defaults to true once
+# GIT_PS1_SHOWDIRTYSTATE is enabled.
+#
+# You can also see if currently something is stashed, by setting
+# GIT_PS1_SHOWSTASHSTATE to a nonempty value. If something is stashed,
+# then a '$' will be shown next to the branch name.
+#
+# If you would like to see if there're untracked files, then you can set
+# GIT_PS1_SHOWUNTRACKEDFILES to a nonempty value. If there're untracked
+# files, then a '%' will be shown next to the branch name.  You can
+# configure this per-repository with the bash.showUntrackedFiles
+# variable, which defaults to true once GIT_PS1_SHOWUNTRACKEDFILES is
+# enabled.
+#
+# If you would like to see the difference between HEAD and its upstream,
+# set GIT_PS1_SHOWUPSTREAM="auto".  A "<" indicates you are behind, ">"
+# indicates you are ahead, "<>" indicates you have diverged and "="
+# indicates that there is no difference. You can further control
+# behaviour by setting GIT_PS1_SHOWUPSTREAM to a space-separated list
+# of values:
+#
+#     verbose       show number of commits ahead/behind (+/-) upstream
+#     legacy        don't use the '--count' option available in recent
+#                   versions of git-rev-list
+#     git           always compare HEAD to @{upstream}
+#     svn           always compare HEAD to your SVN upstream
+#
+# By default, __git_ps1 will compare HEAD to your SVN upstream if it can
+# find one, or @{upstream} otherwise.  Once you have set
+# GIT_PS1_SHOWUPSTREAM, you can override it on a per-repository basis by
+# setting the bash.showUpstream config variable.
+#
+# If you would like to see more information about the identity of
+# commits checked out as a detached HEAD, set GIT_PS1_DESCRIBE_STYLE
+# to one of these values:
+#
+#     contains      relative to newer annotated tag (v1.6.3.2~35)
+#     branch        relative to newer tag or branch (master~4)
+#     describe      relative to older annotated tag (v1.6.3.1-13-gdd42c2f)
+#     default       exactly matching tag
+#
+# If you would like a colored hint about the current dirty state, set
+# GIT_PS1_SHOWCOLORHINTS to a nonempty value. The colors are based on
+# the colored output of "git status -sb" and are available only when
+# using __git_ps1 for PROMPT_COMMAND or precmd.
+
+# stores the divergence from upstream in $p
+# used by GIT_PS1_SHOWUPSTREAM
+__git_ps1_show_upstream ()
+{
+  local key value
+  local svn_remote svn_url_pattern count n
+  local upstream=git legacy="" verbose=""
+
+  svn_remote=()
+  # get some config options from git-config
+  local output="$(git config -z --get-regexp '^(svn-remote\..*\.url|bash\.showupstream)$' 2>/dev/null | tr '\0\n' '\n ')"
+  while read -r key value; do
+    case "$key" in
+    bash.showupstream)
+      GIT_PS1_SHOWUPSTREAM="$value"
+      if [[ -z "${GIT_PS1_SHOWUPSTREAM}" ]]; then
+        p=""
+        return
+      fi
+      ;;
+    svn-remote.*.url)
+      svn_remote[$((${#svn_remote[@]} + 1))]="$value"
+      svn_url_pattern+="\\|$value"
+      upstream=svn+git # default upstream is SVN if available, else git
+      ;;
+    esac
+  done <<< "$output"
+
+  # parse configuration values
+  for option in ${GIT_PS1_SHOWUPSTREAM}; do
+    case "$option" in
+    git|svn) upstream="$option" ;;
+    verbose) verbose=1 ;;
+    legacy)  legacy=1  ;;
+    esac
+  done
+
+  # Find our upstream
+  case "$upstream" in
+  git)    upstream="@{upstream}" ;;
+  svn*)
+    # get the upstream from the "git-svn-id: ..." in a commit message
+    # (git-svn uses essentially the same procedure internally)
+    local -a svn_upstream
+    svn_upstream=($(git log --first-parent -1 \
+          --grep="^git-svn-id: \(${svn_url_pattern#??}\)" 2>/dev/null))
+    if [[ 0 -ne ${#svn_upstream[@]} ]]; then
+      svn_upstream=${svn_upstream[${#svn_upstream[@]} - 2]}
+      svn_upstream=${svn_upstream%@*}
+      local n_stop="${#svn_remote[@]}"
+      for ((n=1; n <= n_stop; n++)); do
+        svn_upstream=${svn_upstream#${svn_remote[$n]}}
+      done
+
+      if [[ -z "$svn_upstream" ]]; then
+        # default branch name for checkouts with no layout:
+        upstream=${GIT_SVN_ID:-git-svn}
+      else
+        upstream=${svn_upstream#/}
+      fi
+    elif [[ "svn+git" = "$upstream" ]]; then
+      upstream="@{upstream}"
+    fi
+    ;;
+  esac
+
+  # Find how many commits we are ahead/behind our upstream
+  if [[ -z "$legacy" ]]; then
+    count="$(git rev-list --count --left-right \
+        "$upstream"...HEAD 2>/dev/null)"
+  else
+    # produce equivalent output to --count for older versions of git
+    local commits
+    if commits="$(git rev-list --left-right "$upstream"...HEAD 2>/dev/null)"
+    then
+      local commit behind=0 ahead=0
+      for commit in $commits
+      do
+        case "$commit" in
+        "<"*) ((behind++)) ;;
+        *)    ((ahead++))  ;;
+        esac
+      done
+      count="$behind  $ahead"
+    else
+      count=""
+    fi
+  fi
+
+  # calculate the result
+  if [[ -z "$verbose" ]]; then
+    case "$count" in
+    "") # no upstream
+      p="" ;;
+    "0  0") # equal to upstream
+      p="=" ;;
+    "0  "*) # ahead of upstream
+      p=">" ;;
+    *"  0") # behind upstream
+      p="<" ;;
+    *)      # diverged from upstream
+      p="<>" ;;
+    esac
+  else
+    case "$count" in
+    "") # no upstream
+      p="" ;;
+    "0  0") # equal to upstream
+      p=" u=" ;;
+    "0  "*) # ahead of upstream
+      p=" u+${count#0 }" ;;
+    *"  0") # behind upstream
+      p=" u-${count%  0}" ;;
+    *)      # diverged from upstream
+      p=" u+${count#* }-${count%  *}" ;;
+    esac
+  fi
+
+}
+
+# Helper function that is meant to be called from __git_ps1.  It
+# injects color codes into the appropriate gitstring variables used
+# to build a gitstring.
+__git_ps1_colorize_gitstring ()
+{
+  if [[ -n ${ZSH_VERSION-} ]]; then
+    local c_red='%F{red}'
+    local c_green='%F{green}'
+    local c_lblue='%F{blue}'
+    local c_clear='%f'
+  else
+    # Using \[ and \] around colors is necessary to prevent
+    # issues with command line editing/browsing/completion!
+    local c_red='\[\e[31m\]'
+    local c_green='\[\e[32m\]'
+    local c_lblue='\[\e[1;34m\]'
+    local c_clear='\[\e[0m\]'
+  fi
+  local bad_color=$c_red
+  local ok_color=$c_green
+  local flags_color="$c_lblue"
+
+  local branch_color=""
+  if [ $detached = no ]; then
+    branch_color="$ok_color"
+  else
+    branch_color="$bad_color"
+  fi
+  c="$branch_color$c"
+
+  z="$c_clear$z"
+  if [ "$w" = "*" ]; then
+    w="$bad_color$w"
+  fi
+  if [ -n "$i" ]; then
+    i="$ok_color$i"
+  fi
+  if [ -n "$s" ]; then
+    s="$flags_color$s"
+  fi
+  if [ -n "$u" ]; then
+    u="$bad_color$u"
+  fi
+  r="$c_clear$r"
+}
+
+# __git_ps1 accepts 0 or 1 arguments (i.e., format string)
+# when called from PS1 using command substitution
+# in this mode it prints text to add to bash PS1 prompt (includes branch name)
+#
+# __git_ps1 requires 2 or 3 arguments when called from PROMPT_COMMAND (pc)
+# in that case it _sets_ PS1. The arguments are parts of a PS1 string.
+# when two arguments are given, the first is prepended and the second appended
+# to the state string when assigned to PS1.
+# The optional third parameter will be used as printf format string to further
+# customize the output of the git-status string.
+# In this mode you can request colored hints using GIT_PS1_SHOWCOLORHINTS=true
+__git_ps1 ()
+{
+  local pcmode=no
+  local detached=no
+  local ps1pc_start='\u@\h:\w '
+  local ps1pc_end='\$ '
+  local printf_format=' (%s)'
+
+  case "$#" in
+    2|3)  pcmode=yes
+      ps1pc_start="$1"
+      ps1pc_end="$2"
+      printf_format="${3:-$printf_format}"
+    ;;
+    0|1)  printf_format="${1:-$printf_format}"
+    ;;
+    *)  return
+    ;;
+  esac
+
+  local repo_info rev_parse_exit_code
+  repo_info="$(git rev-parse --git-dir --is-inside-git-dir \
+    --is-bare-repository --is-inside-work-tree \
+    --short HEAD 2>/dev/null)"
+  rev_parse_exit_code="$?"
+
+  if [ -z "$repo_info" ]; then
+    if [ $pcmode = yes ]; then
+      #In PC mode PS1 always needs to be set
+      PS1="$ps1pc_start$ps1pc_end"
+    fi
+    return
+  fi
+
+  local short_sha
+  if [ "$rev_parse_exit_code" = "0" ]; then
+    short_sha="${repo_info##*$'\n'}"
+    repo_info="${repo_info%$'\n'*}"
+  fi
+  local inside_worktree="${repo_info##*$'\n'}"
+  repo_info="${repo_info%$'\n'*}"
+  local bare_repo="${repo_info##*$'\n'}"
+  repo_info="${repo_info%$'\n'*}"
+  local inside_gitdir="${repo_info##*$'\n'}"
+  local g="${repo_info%$'\n'*}"
+
+  local r=""
+  local b=""
+  local step=""
+  local total=""
+  if [ -d "$g/rebase-merge" ]; then
+    read b 2>/dev/null <"$g/rebase-merge/head-name"
+    read step 2>/dev/null <"$g/rebase-merge/msgnum"
+    read total 2>/dev/null <"$g/rebase-merge/end"
+    if [ -f "$g/rebase-merge/interactive" ]; then
+      r="|REBASE-i"
+    else
+      r="|REBASE-m"
+    fi
+  else
+    if [ -d "$g/rebase-apply" ]; then
+      read step 2>/dev/null <"$g/rebase-apply/next"
+      read total 2>/dev/null <"$g/rebase-apply/last"
+      if [ -f "$g/rebase-apply/rebasing" ]; then
+        read b 2>/dev/null <"$g/rebase-apply/head-name"
+        r="|REBASE"
+      elif [ -f "$g/rebase-apply/applying" ]; then
+        r="|AM"
+      else
+        r="|AM/REBASE"
+      fi
+    elif [ -f "$g/MERGE_HEAD" ]; then
+      r="|MERGING"
+    elif [ -f "$g/CHERRY_PICK_HEAD" ]; then
+      r="|CHERRY-PICKING"
+    elif [ -f "$g/REVERT_HEAD" ]; then
+      r="|REVERTING"
+    elif [ -f "$g/BISECT_LOG" ]; then
+      r="|BISECTING"
+    fi
+
+    if [ -n "$b" ]; then
+      :
+    elif [ -h "$g/HEAD" ]; then
+      # symlink symbolic ref
+      b="$(git symbolic-ref HEAD 2>/dev/null)"
+    else
+      local head=""
+      if ! read head 2>/dev/null <"$g/HEAD"; then
+        if [ $pcmode = yes ]; then
+          PS1="$ps1pc_start$ps1pc_end"
+        fi
+        return
+      fi
+      # is it a symbolic ref?
+      b="${head#ref: }"
+      if [ "$head" = "$b" ]; then
+        detached=yes
+        b="$(
+        case "${GIT_PS1_DESCRIBE_STYLE-}" in
+        (contains)
+          git describe --contains HEAD ;;
+        (branch)
+          git describe --contains --all HEAD ;;
+        (describe)
+          git describe HEAD ;;
+        (* | default)
+          git describe --tags --exact-match HEAD ;;
+        esac 2>/dev/null)" ||
+
+        b="$short_sha..."
+        b="($b)"
+      fi
+    fi
+  fi
+
+  if [ -n "$step" ] && [ -n "$total" ]; then
+    r="$r $step/$total"
+  fi
+
+  local w=""
+  local i=""
+  local s=""
+  local u=""
+  local c=""
+  local p=""
+
+  if [ "true" = "$inside_gitdir" ]; then
+    if [ "true" = "$bare_repo" ]; then
+      c="BARE:"
+    else
+      b="GIT_DIR!"
+    fi
+  elif [ "true" = "$inside_worktree" ]; then
+    if [ -n "${GIT_PS1_SHOWDIRTYSTATE-}" ] &&
+       [ "$(git config --bool bash.showDirtyState)" != "false" ]
+    then
+      git diff --no-ext-diff --quiet --exit-code || w="*"
+      if [ -n "$short_sha" ]; then
+        git diff-index --cached --quiet HEAD -- || i="+"
+      else
+        i="#"
+      fi
+    fi
+    if [ -n "${GIT_PS1_SHOWSTASHSTATE-}" ] &&
+       [ -r "$g/refs/stash" ]; then
+      s="$"
+    fi
+
+    if [ -n "${GIT_PS1_SHOWUNTRACKEDFILES-}" ] &&
+       [ "$(git config --bool bash.showUntrackedFiles)" != "false" ] &&
+       git ls-files --others --exclude-standard --error-unmatch -- '*' >/dev/null 2>/dev/null
+    then
+      u="%${ZSH_VERSION+%}"
+    fi
+
+    if [ -n "${GIT_PS1_SHOWUPSTREAM-}" ]; then
+      __git_ps1_show_upstream
+    fi
+  fi
+
+  local z="${GIT_PS1_STATESEPARATOR-" "}"
+
+  # NO color option unless in PROMPT_COMMAND mode
+  if [ $pcmode = yes ] && [ -n "${GIT_PS1_SHOWCOLORHINTS-}" ]; then
+    __git_ps1_colorize_gitstring
+  fi
+
+  local f="$w$i$s$u"
+  local gitstring="$c${b##refs/heads/}${f:+$z$f}$r$p"
+
+  if [ $pcmode = yes ]; then
+    if [[ -n ${ZSH_VERSION-} ]]; then
+      gitstring=$(printf -- "$printf_format" "$gitstring")
+    else
+      printf -v gitstring -- "$printf_format" "$gitstring"
+    fi
+    PS1="$ps1pc_start$gitstring$ps1pc_end"
+  else
+    printf -- "$printf_format" "$gitstring"
+  fi
+}
diff --git a/paddle/scripts/travis/before_install.osx.sh b/paddle/scripts/travis/before_install.osx.sh
deleted file mode 100755
index 80f031a74e7052d183b5ef21d432476ff1cce722..0000000000000000000000000000000000000000
--- a/paddle/scripts/travis/before_install.osx.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-brew update
-brew tap homebrew/science
-brew install openblas swig md5sha1sum
diff --git a/paddle/scripts/travis/build_and_test.sh b/paddle/scripts/travis/build_and_test.sh
index 5e6350b57458594163f23cca41a546d7bd9b1eda..7deb3e62e88de7e1306fcbfc5a28aa4372d678e6 100755
--- a/paddle/scripts/travis/build_and_test.sh
+++ b/paddle/scripts/travis/build_and_test.sh
@@ -2,18 +2,11 @@
 source ./common.sh
 
 NPROC=1
-if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
-  export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages
-  export PYTHONHOME=/opt/python/2.7.12
-  export PATH=/opt/python/2.7.12/bin:${PATH}
-  cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
-  NRPOC=`nproc`
-  make -j $NPROC
-  make coveralls
-  sudo make install
-elif [[ "$TRAVIS_OS_NAME" == "osx" ]]; then
-  export PYTHONPATH=/usr/local/lib/python2.7/site-packages
-  cmake .. -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
-  NPROC=`sysctl -n hw.ncpu`
-  make -j $NPROC
-fi
+export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages
+export PYTHONHOME=/opt/python/2.7.12
+export PATH=/opt/python/2.7.12/bin:${PATH}
+cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
+NRPOC=`nproc`
+make -j $NPROC
+make coveralls
+sudo make install
diff --git a/paddle/scripts/travis/docs.sh b/paddle/scripts/travis/docs.sh
index 6b43cad20b76e9abeb3cb10a726d3d8e3da5f8e2..53e998ef6c1b96d9e7d82b7effd12a27e6dc69f2 100755
--- a/paddle/scripts/travis/docs.sh
+++ b/paddle/scripts/travis/docs.sh
@@ -2,8 +2,12 @@
 
 # Add set -e, cd to directory.
 source ./common.sh
-
 # Compile Documentation only.
+cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF ${EXTRA_CMAKE_OPTS}
+mkdir output
+make DESTDIR=./output install -j `nproc`
+pip install ./output/usr/local/opt/paddle/share/wheels/*
+rm -rf *
 cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS}
 make paddle_docs paddle_docs_cn
 
@@ -25,26 +29,41 @@ TARGET_BRANCH="gh-pages"
 # Only deploy master branch to build latest documentation.
 SOURCE_BRANCH="master"
 
-# If is not a Github pull request, and in master branch.
-if [ "$TRAVIS_PULL_REQUEST" != "false" -o "$TRAVIS_BRANCH" != "$SOURCE_BRANCH"  ]; then
-  exit 0
-fi
-
 # Clone the repo to output directory
 git clone $REPO output
 cd output
 
-# checkout github page branch
-git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH
+function deploy_docs() {
+  SOURCE_BRANCH=$1
+  DIR=$2
+  # If is not a Github pull request
+  if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then
+    exit 0
+  fi
+  # If it is not watched branch.
+  if [ "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ]; then
+    return
+  fi
 
-# remove old docs. mv new docs.
-rm -rf doc doc_cn
-mv ../doc/cn/html doc_cn
-mv ../doc/en/html doc
+  # checkout github page branch
+  git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH
+  
+  mkdir -p ${DIR}
+  # remove old docs. mv new docs.
+  set +e
+  rm -rf ${DIR}/doc ${DIR}/doc_cn
+  set -e
+  mv ../doc/cn/html ${DIR}/doc_cn
+  mv ../doc/en/html ${DIR}/doc
+  git add .
+}
+
+deploy_docs "master" "." 
+deploy_docs "develop" "./develop/"
 
 # Check is there anything changed.
 set +e
-git diff --exit-code >/dev/null
+git diff --cached --exit-code >/dev/null
 if [ $? -eq 0 ]; then
   echo "No changes to the output on this push; exiting."
   exit 0
@@ -57,7 +76,6 @@ if [ -n $SSL_KEY ]; then  # Only push updated docs for github.com/PaddlePaddle/P
   git config user.name "Travis CI"
   git config user.email "paddle-dev@baidu.com"
   git commit -m "Deploy to GitHub Pages: ${SHA}"
-
   # Set ssh private key
   openssl aes-256-cbc -K $SSL_KEY -iv $SSL_IV -in ../../paddle/scripts/travis/deploy_key.enc -out deploy_key -d
   chmod 600 deploy_key
diff --git a/paddle/setup.py.in b/paddle/setup.py.in
index c79666bc81b6f343f166422697cd3901ce8ff441..382d5be6ecfc26b4a524bb6a775bd1a805a34d96 100644
--- a/paddle/setup.py.in
+++ b/paddle/setup.py.in
@@ -55,6 +55,9 @@ elif is_osx == True:
 
 include_dirs = [np.get_include(), "../"]    # include numpy and paddle.
 
+os.environ["CC"] = "@CMAKE_C_COMPILER@"
+os.environ["CXX"] = "@CMAKE_CXX_COMPILER@"
+
 setup(name="py_paddle",
   version="@PADDLE_VERSION@",
   ext_modules=[
@@ -69,6 +72,7 @@ setup(name="py_paddle",
   packages=['py_paddle'],
   include_dirs = include_dirs,
   install_requires = [
+    'nltk>=3.2.2',
     'numpy>=1.8.0',      # The numpy is required.
     'protobuf>=3.0.0'    # The paddle protobuf version
   ],
diff --git a/paddle/trainer/ParameterUpdater.h b/paddle/trainer/ParameterUpdater.h
index c3207e63ce72b73a57c2e40c72c5259f0ae61bc9..9e9e948b8856d2712f8894b3d14db9c795d5f694 100644
--- a/paddle/trainer/ParameterUpdater.h
+++ b/paddle/trainer/ParameterUpdater.h
@@ -184,7 +184,6 @@ protected:
    * @param para
    */
   virtual void updateImpl(Parameter* para) {}
-  virtual void update(Parameter* para) {}
 };
 
 /**
diff --git a/paddle/trainer/Tester.cpp b/paddle/trainer/Tester.cpp
index 13aa28ae5d9699d267858d48e46797c756487ddd..80664fa877b324af73e3e3effa11e46eac6294e2 100644
--- a/paddle/trainer/Tester.cpp
+++ b/paddle/trainer/Tester.cpp
@@ -208,7 +208,7 @@ real Tester::forwardOneBatch(const DataBatch& dataBatch,
     return 0.0;  // In this case, there is no meaning to calculate cost
   }
 
-  return Argument::sumCosts(outArgs);
+  return Argument::sum(outArgs);
 }
 
 void Tester::testOnePassBatch(int passId) {
diff --git a/paddle/trainer/Trainer.cpp b/paddle/trainer/Trainer.cpp
index 8465addaf9e03831e914be2c73901c3b1a9d537f..b68e29cd5ea223272151e7a8b52d998832f47103 100644
--- a/paddle/trainer/Trainer.cpp
+++ b/paddle/trainer/Trainer.cpp
@@ -90,16 +90,6 @@ DEFINE_string(model_list, "", "File that saves the model list when evaluation");
 
 namespace paddle {
 
-void Trainer::init(int argc, char** argv) {
-  initMain(argc, argv);
-  initPython(argc, argv);
-
-  auto config = TrainerConfigHelper::createFromFlagConfig();
-  feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW);
-
-  init(config);
-}
-
 void Trainer::init(const std::shared_ptr& config,
                    bool testing,
                    const std::shared_ptr& gradientMachine,
@@ -320,7 +310,7 @@ real Trainer::checkGradient() {
   std::vector outArgs;
 
   trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
-  real cost = Argument::sumCosts(outArgs);
+  real cost = Argument::sum(outArgs);
   LOG(INFO) << "original cost=" << cost;
   trainerInternal_.getGradientMachine()->backward();
 
@@ -350,7 +340,7 @@ real Trainer::checkGradient() {
     parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara);
     parameter->setValueUpdated();
     trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
-    real newCost1 = Argument::sumCosts(outArgs);
+    real newCost1 = Argument::sum(outArgs);
 
     for (size_t i = 0; i < dim; ++i) {
       newp[i] = oldp[i] - step * d[i];
@@ -359,7 +349,7 @@ real Trainer::checkGradient() {
     parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara);
     parameter->setValueUpdated();
     trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
-    real newCost2 = Argument::sumCosts(outArgs);
+    real newCost2 = Argument::sum(outArgs);
 
     real trueDelta = 0.5 * (newCost1 - newCost2);
     real diff = (1e-20 + trueDelta) / (1e-20 + delta) - 1;
@@ -585,7 +575,7 @@ real Trainer::calcGradient(const DataBatch& dataBatch,
 
   trainerInternal_.getGradientMachine()->forwardBackward(
       inArgs, &outArgs, PASS_TRAIN);
-  real cost = Argument::sumCosts(outArgs);
+  real cost = Argument::sum(outArgs);
 
   offset = 0;
   for (auto& para : parameters) {
diff --git a/paddle/trainer/Trainer.h b/paddle/trainer/Trainer.h
index 7cbf18ace7a5fed053653c73e62d36c388b15123..c8ee4726c24c335ceda22ea3a20049b01d11c149 100644
--- a/paddle/trainer/Trainer.h
+++ b/paddle/trainer/Trainer.h
@@ -71,11 +71,6 @@ public:
       const std::shared_ptr& dataProvider = nullptr,
       const std::shared_ptr& testDataProvider = nullptr);
 
-  /**
-   * Initialize Trainer from command line flags.
-   */
-  void init(int argc, char** argv);
-
   /**
    * Train until num_passes reached.
    * One pass means neural network train through all training data.
diff --git a/paddle/trainer/TrainerInternal.cpp b/paddle/trainer/TrainerInternal.cpp
index f3b465b444167d4624a5e99c30e1257eda53ca2c..4c5d4a0913aaf3a9932b3d67806378ece4245304 100644
--- a/paddle/trainer/TrainerInternal.cpp
+++ b/paddle/trainer/TrainerInternal.cpp
@@ -134,7 +134,7 @@ void TrainerInternal::trainOneBatch(int64_t batchId,
   real cost = 0;
   {
     REGISTER_TIMER("sumCost");
-    cost = Argument::sumCosts(*outArgs);
+    cost = Argument::sum(*outArgs);
   }
 
   if (batchId % intconfig_->log_period == 0) {
diff --git a/paddle/trainer/tests/CMakeLists.txt b/paddle/trainer/tests/CMakeLists.txt
index 22e07bd0e98a4cd36e6ed5860bcff0d4ae7cb1d2..c5c76a030d9e5f1deed63454b408442954ef5eae 100644
--- a/paddle/trainer/tests/CMakeLists.txt
+++ b/paddle/trainer/tests/CMakeLists.txt
@@ -1,11 +1,3 @@
-################# test_Prediction ######################
-add_unittest_without_exec(test_Prediction
-    test_Prediction.cpp)
-add_test(NAME test_Prediction
-  COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python
-        ${CMAKE_CURRENT_BINARY_DIR}/test_Prediction --merger=${CMAKE_CURRENT_BINARY_DIR}/../paddle_merge_model
-    WORKING_DIRECTORY ${PROJ_ROOT}/paddle/)
-
 ################# test_Compare ############################
 add_unittest_without_exec(test_Compare
     test_Compare.cpp)
diff --git a/paddle/trainer/tests/test_Prediction.cpp b/paddle/trainer/tests/test_Prediction.cpp
deleted file mode 100644
index 0c79404eee1c0902c5c8e8eefd139da3da584636..0000000000000000000000000000000000000000
--- a/paddle/trainer/tests/test_Prediction.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include 
-
-#include "paddle/trainer/Trainer.h"
-
-#include 
-
-DECLARE_string(config);
-DECLARE_string(config_args);
-DEFINE_string(merger,
-              "./paddle_merge_model",
-              "path to paddle_merge_model binary");
-
-using namespace paddle;  // NOLINT
-using namespace std;     // NOLINT
-
-static const string& configFile = "trainer/tests/sample_trainer_config.conf";
-static const string& mergedModelFile = "./test_model_file";
-static const string& modelDir = "./test_model_dir";
-
-void checkBuffer(real* vec1, real* vec2, size_t len) {
-  for (size_t i = 0; i < len; i++) {
-    EXPECT_EQ(vec1[i], vec2[i]) << "vec1:" << vec1[i] << " vec2:" << vec2[i];
-  }
-}
-
-void checkParameters(vector A, vector B) {
-  CHECK_EQ(B.size(), A.size()) << "parameter size not equal";
-  for (size_t i = 0; i < A.size(); i++) {
-    auto vec1 = A[i]->getBuf(PARAMETER_VALUE);
-    auto vec2 = B[i]->getBuf(PARAMETER_VALUE);
-    CHECK_EQ(vec1->useGpu_, vec2->useGpu_) << "use gpu not equal";
-    CHECK_EQ(vec1->getSize(), vec2->getSize()) << "size not equal";
-
-    if (vec1->useGpu_ == false) {
-      checkBuffer(vec1->getData(), vec2->getData(), vec1->getSize());
-    } else {
-      VectorPtr cpuVec1 = Vector::create(vec1->getSize(), false);
-      VectorPtr cpuVec2 = Vector::create(vec2->getSize(), false);
-      cpuVec1->copyFrom(*vec1, HPPL_STREAM_DEFAULT);
-      cpuVec2->copyFrom(*vec2, HPPL_STREAM_DEFAULT);
-      hl_stream_synchronize(HPPL_STREAM_DEFAULT);
-      checkBuffer(cpuVec1->getData(), cpuVec2->getData(), cpuVec1->getSize());
-    }
-  }
-}
-
-TEST(GradientMachine, create) {
-#ifdef PADDLE_ONLY_CPU
-  FLAGS_use_gpu = false;
-#endif
-  mkDir(modelDir.c_str());
-  FLAGS_config = configFile;
-  FLAGS_config_args = "with_cost=False";
-  auto config = TrainerConfigHelper::createFromFlagConfig();
-
-  // save model to directory
-  unique_ptr gradientMachine1(
-      GradientMachine::create(*config));
-  gradientMachine1->saveParameters(modelDir);
-  Trainer trainer;
-  trainer.init(config);
-  ParameterUtil* paramUtil = trainer.getParameterUtilPtr();
-  if (paramUtil != NULL) {
-    paramUtil->saveConfigWithPath(modelDir);
-  }
-
-  // create a different GradientMachine
-  unique_ptr gradientMachine2(
-      GradientMachine::create(*config));
-  gradientMachine2->randParameters();
-
-  // merge config and model to one file
-  string cmd = FLAGS_merger + " --model_dir=" + modelDir +
-               " --config_args=with_cost=False" + " --model_file=" +
-               mergedModelFile;
-  LOG(INFO) << cmd;
-  int ret = system(cmd.c_str());
-  EXPECT_EQ(0, ret);
-  if (ret) {
-    return;
-  }
-
-  // create GradientMachine from the merged model
-  DataConfig dataConfig;
-  unique_ptr gradientMachine3(
-      GradientMachine::create(mergedModelFile, &dataConfig));
-  CHECK(gradientMachine3);
-  EXPECT_EQ(dataConfig.type(), "simple");
-  EXPECT_EQ(dataConfig.feat_dim(), 3);
-
-  // compare the parameters of GradientMachine and GradientMachine3
-  std::vector paraMachine1 = gradientMachine1->getParameters();
-  std::vector paraMachine3 = gradientMachine3->getParameters();
-  checkParameters(paraMachine1, paraMachine3);
-
-  // Test that the GradientMachine created from the merged model
-  // is same as the orginnal one.
-  vector inArgs(1);
-  vector outArgs;
-
-  int inputDim = 3;
-  int numSamples = 2;
-  CpuMatrix cpuInput(numSamples, inputDim);
-  for (int i = 0; i < numSamples; ++i) {
-    for (int j = 0; j < inputDim; ++j) {
-      cpuInput.getData()[i * inputDim + j] =
-          rand() / (real)RAND_MAX;  // NOLINT TODO(yuyang): use rand_r
-    }
-  }
-  MatrixPtr input = Matrix::create(numSamples,
-                                   inputDim,
-                                   /* trans */ false,
-                                   FLAGS_use_gpu);
-  input->copyFrom(cpuInput);
-  inArgs[0].value = input;
-  gradientMachine1->forward(inArgs, &outArgs, PASS_TEST);
-  EXPECT_EQ((size_t)1, outArgs.size());
-
-  vector outArgs2;
-  gradientMachine2->forward(inArgs, &outArgs2, PASS_TEST);
-  CpuMatrix out1(outArgs[0].value->getHeight(), outArgs[0].value->getWidth());
-  CpuMatrix out2(outArgs2[0].value->getHeight(), outArgs2[0].value->getWidth());
-  out1.copyFrom(*outArgs[0].value);
-  out2.copyFrom(*outArgs2[0].value);
-  for (size_t i = 0; i < out1.getHeight() * out1.getWidth(); i++) {
-    EXPECT_NE(out1.getData()[i], out2.getData()[i]);
-  }
-
-  gradientMachine3->forward(inArgs, &outArgs2, PASS_TEST);
-  out2.copyFrom(*outArgs2[0].value);
-  checkBuffer(
-      out1.getData(), out2.getData(), out2.getHeight() * out2.getWidth());
-
-  cmd = " rm -rf " + modelDir + "/*";
-  LOG(INFO) << "cmd " << cmd;
-  ret = system(cmd.c_str());
-  EXPECT_EQ(0, ret);
-  if (ret) {
-    return;
-  }
-
-  cmd = " rm -rf " + mergedModelFile;
-  LOG(INFO) << "cmd " << cmd;
-  ret = system(cmd.c_str());
-  EXPECT_EQ(0, ret);
-  if (ret) {
-    return;
-  }
-
-  // clean up
-  rmDir(modelDir.c_str());
-  remove(mergedModelFile.c_str());
-}
-
-int main(int argc, char** argv) {
-  initMain(argc, argv);
-  initPython(argc, argv);
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/paddle/utils/Compiler.h b/paddle/utils/Compiler.h
new file mode 100644
index 0000000000000000000000000000000000000000..cebca5a2a3766110b83231eb0705e48800a7bda6
--- /dev/null
+++ b/paddle/utils/Compiler.h
@@ -0,0 +1,33 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+/**
+ * This header defines some useful attribute by each compiler. It is the
+ * abstract layer of compilers.
+ */
+#ifdef __GNUC__
+#define GCC_VERSION \
+  (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#else
+#define GCC_VERSION
+#endif
+
+/**
+ * __must_check macro. It make the function's return value must be used,
+ * otherwise it will raise a compile warning. And also Paddle treat all compile
+ * warnings as errors.
+ */
+#if GCC_VERSION >= 30400
+#define __must_check __attribute__((warn_unused_result))
+#else
+#define __must_check
+#endif
diff --git a/paddle/utils/Error.h b/paddle/utils/Error.h
new file mode 100644
index 0000000000000000000000000000000000000000..cda1b5c37dada8d0c6c77fc2fb03bb614d5301b5
--- /dev/null
+++ b/paddle/utils/Error.h
@@ -0,0 +1,136 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "Compiler.h"
+
+namespace paddle {
+
+/**
+ * Error is Paddle error code. It only contain a std::string as error message.
+ *
+ *
+ * There are two styles to return error in Paddle.
+ *
+ * 1. Return Error
+ *    When method return a status, the return must use `__must_check` attribute.
+ *    Example as below.
+ * @code{cpp}
+ * Error __must_check foo();
+ *
+ * Error __must_check bar() {
+ *   // do something.
+ *   Error err = foo();  // invoke other method return status.
+ *   if (err) return err;
+ *   // do something else.
+ *   return Error();
+ * }
+ * @endcode{cpp}
+ *
+ * 2. Return by parameter.
+ *    It is another way to return an error, by using a pointer parameter.
+ *    Example as below.
+ *
+ * @code{cpp}
+ * Error bar();
+ *
+ * int foo(Error* error) {
+ *   // Do something.
+ *   Error err = bar();
+ *   if (err) {
+ *     *error = s;
+ *     return 0;
+ *   }
+ *   // Do something else.
+ *   if (someInternalErrorHappend) {
+ *     *error = Error("Some dimension is too large, %d", dimension);
+ *     return 0;
+ *   }
+ *   // End of method.
+ *   return someValue;
+ * }
+ *
+ * Error foobar() {
+ *   Error err;
+ *   // do something.
+ *   foo(&err);
+ *   if (err) return err;
+ * }
+ * @endcode{cpp}
+ *
+ *
+ * Currently there is a helper method 'check' in status, because Paddle always
+ * use log(FATAL) or CHECK to make program exit before. When we clean all
+ * log(FATAL) and CHECK in Paddle, 'check' method will be removed.
+ */
+class Error {
+public:
+  /**
+   * Construct a no-error value.
+   */
+  Error() {}
+
+  /**
+   * @brief Create an Error use printf syntax.
+   */
+  explicit Error(const char* fmt, ...) {
+    va_list ap;
+    va_start(ap, fmt);
+    constexpr size_t kBufferSize = 1024;
+    char buffer[kBufferSize];
+    vsnprintf(buffer, kBufferSize, fmt, ap);
+    this->msg_.reset(new std::string(buffer));
+    va_end(ap);
+  }
+
+  /**
+   * @brief msg will return the error message. If no error, return nullptr.
+   */
+  const char* msg() const {
+    if (msg_) {
+      return msg_->c_str();
+    } else {
+      return nullptr;
+    }
+  }
+
+  /**
+   * @brief operator bool, return True if there is something error.
+   */
+  operator bool() const { return !this->isOK(); }
+
+  /**
+   * @brief isOK return True if there is no error.
+   * @return True if no error.
+   */
+  bool isOK() const { return msg_ == nullptr; }
+
+  /**
+   * @brief check this status by glog.
+   * @note It is a temp method used during cleaning Paddle code. It will be
+   *       removed later.
+   */
+  void check() const { CHECK(this->isOK()) << msg(); }
+
+private:
+  std::shared_ptr msg_;
+};
+
+}  // namespace paddle
diff --git a/paddle/utils/Flags.cpp b/paddle/utils/Flags.cpp
index 59d6cbdc513660b87cb013d8aa92c5c8f9289ecb..e8f31bc811ac30d83e8203b784ee1f93a8d35d90 100644
--- a/paddle/utils/Flags.cpp
+++ b/paddle/utils/Flags.cpp
@@ -33,12 +33,15 @@ DEFINE_int32(port, 20134, "Listening port for pserver");
 DEFINE_int32(data_server_port, 21134, "Listening port for dserver");
 DEFINE_int32(ports_num,
              1,
-             "The ports number for parameter send,"
-             " increment based on default port number");
+             "Number of ports for sending dense parameter,"
+             " following ports on parameter server will be visited"
+             " for sending dense parameter: [port, port+ports_num-1]");
 DEFINE_int32(ports_num_for_sparse,
              0,
-             "The ports number for parameter send,"
-             " increment based on default (port + ports_num)");
+             "Number of ports for sending sparse parameter,"
+             " following ports on parameter server will be visited"
+             " for sending sparse parameter:"
+             " [port+ports_num, port+ports_num+ports_num_for_sparse-1]");
 DEFINE_string(nics, "xgbe0,xgbe1", "network device name for pservers");
 DEFINE_string(rdma_tcp, "tcp", "use rdma or tcp rdma transport protocol");
 DEFINE_int32(trainer_id,
diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp
index 411a64aa8d0737a8d57e62fbd0788ffaacfbc9f7..dbab4ec43ca2fa691445131d2cb14f51721a2e4c 100644
--- a/paddle/utils/Util.cpp
+++ b/paddle/utils/Util.cpp
@@ -144,20 +144,20 @@ void runInitFunctions() {
 }
 
 void initMain(int argc, char** argv) {
-  initializeLogging(argc, argv);
   installLayerStackTracer();
   std::string line;
   for (int i = 0; i < argc; ++i) {
     line += argv[i];
     line += ' ';
   }
-  LOG(INFO) << "commandline: " << line;
 
 #ifndef GFLAGS_GFLAGS_H_
   namespace gflags = google;
 #endif
 
   gflags::ParseCommandLineFlags(&argc, &argv, true);
+  initializeLogging(argc, argv);
+  LOG(INFO) << "commandline: " << line;
   CHECK_EQ(argc, 1) << "Unknown commandline argument: " << argv[1];
 
   installProfilerSwitch();
@@ -289,6 +289,7 @@ void mkDir(const char* filename) {
 void mkDirRecursively(const char* dir) {
   struct stat sb;
 
+  if (*dir == 0) return;  // empty string
   if (!stat(dir, &sb)) return;
 
   mkDirRecursively(path::dirname(dir).c_str());
diff --git a/paddle/utils/tests/CMakeLists.txt b/paddle/utils/tests/CMakeLists.txt
index 26fafbd1ab3f2967b765b8bcb973fb745c0e6422..aa923b355377752f9b297a125f5c43c364ba9b06 100644
--- a/paddle/utils/tests/CMakeLists.txt
+++ b/paddle/utils/tests/CMakeLists.txt
@@ -4,6 +4,7 @@ add_simple_unittest(test_CustomStackTrace)
 add_simple_unittest(test_ThreadBarrier)
 add_simple_unittest(test_SpinLock)
 add_simple_unittest(test_SIMDFlags)
+add_simple_unittest(test_Error)
 
 add_executable(
     test_CustomStackTracePrint
diff --git a/paddle/utils/tests/test_Error.cpp b/paddle/utils/tests/test_Error.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fdf326b17a1c8baa87e2a17fafae253565d1e699
--- /dev/null
+++ b/paddle/utils/tests/test_Error.cpp
@@ -0,0 +1,34 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/utils/Error.h"
+
+#include 
+
+TEST(Error, testAll) {
+  paddle::Error error;
+  ASSERT_FALSE(error);
+  error = paddle::Error("I'm the error");
+  ASSERT_TRUE(error);
+  ASSERT_STREQ("I'm the error", error.msg());
+
+  error = paddle::Error("error2");
+  ASSERT_TRUE(error);
+  ASSERT_STREQ("error2", error.msg());
+
+  int i = 3;
+  auto error3 = paddle::Error("error%d", i);
+  ASSERT_TRUE(error3);
+  ASSERT_STREQ("error3", error3.msg());
+}
diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto
index 3a9d339976fff91d79e7459ad5984cf78ea8990a..65d5d50277b665e7c355202d6e8043f656ae92f1 100644
--- a/proto/ModelConfig.proto
+++ b/proto/ModelConfig.proto
@@ -255,6 +255,13 @@ message PriorBoxConfig {
   repeated float variance = 4;
 }
 
+message PadConfig {
+  required ImageConfig image_conf = 1;
+  repeated uint32 pad_c = 2;
+  repeated uint32 pad_h = 3;
+  repeated uint32 pad_w = 4;
+}
+
 message LayerInputConfig {
   required string input_layer_name = 1;
   optional string input_parameter_name = 2;
@@ -271,6 +278,7 @@ message LayerInputConfig {
   optional MaxOutConfig maxout_conf = 11;
   optional SppConfig spp_conf = 12;
   optional PriorBoxConfig priorbox_conf = 13;
+  optional PadConfig pad_conf = 14;
 }
 
 message LayerConfig {
@@ -419,14 +427,14 @@ message LayerConfig {
   // bias size
   optional uint32 bias_size = 48 [default = 0];
 
-  // this parameter can be used as a user-defined parameter when necessary, 
+  // this parameter can be used as a user-defined parameter when necessary,
   // without changing the proto file.
-  // e.g., when a new layer with a user-defined parameter is implemented, 
+  // e.g., when a new layer with a user-defined parameter is implemented,
   // it can be used to pass that parameter, without modifying the proto file.
   // string type is used for flexibility: different types can be converted
-  // to string and reinterpreted in the user's own layer implementation.  
+  // to string and reinterpreted in the user's own layer implementation.
   optional string user_arg = 49;
-  
+
   // to indicate rectangle image data
   optional uint64 height = 50;
   optional uint64 width = 51;
@@ -467,6 +475,10 @@ message EvaluatorConfig {
   // Used by ChunkEvaluator
   // chunk of these types are not counted
   repeated int32 excluded_chunk_types = 12;
+
+  // Used by ClassificationErrorEvaluator
+  // top # classification error
+  optional int32 top_k = 13 [default = 1];
 }
 
 message LinkConfig {
diff --git a/proto/ParameterServerConfig.proto b/proto/ParameterServerConfig.proto
index 3068bba8b10d89b432b41076dc6eb3ebc40b3883..404f9613792653dda72eeb98f022851adedbfbfd 100644
--- a/proto/ParameterServerConfig.proto
+++ b/proto/ParameterServerConfig.proto
@@ -27,11 +27,14 @@ message ParameterClientConfig {
  * Configuration structure for ParameterServer2.
  */
 message ParameterServerConfig {
-  // The ports number for parameter send,
-  // increment based on default port number
+  // Number of ports for sending dense parameter,
+  // following ports on parameter server will be visited
+  // for sending dense parameter: [port, port+ports_num-1]
   required int32 ports_num = 1 [default = 1];
-  // The ports number for parameter send,
-  // increment based on default (port + ports_num
+  // Number of ports for sending sparse parameter,
+  // following ports on parameter server will be visited
+  // for sending sparse parameter:
+  // [port+ports_num, port+ports_num+ports_num_for_sparse-1]
   required int32 ports_num_for_sparse = 2 [default = 0];
   // network device name for pservers
   required string nics = 3 [default = "xgbe0,xgbe1"];
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 1cda4762eb2a55175d6c9faee98aaeaa1f763890..48e0a1993d07f801e65dfa54a991995c593fe475 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -4,11 +4,13 @@ set(OUTPUT_DIR
 file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py)
 file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py)
 file(GLOB UTILS_PY_FILES . ./paddle/utils/*.py)
+file(GLOB_RECURSE V2_PY_FILES ./paddle/v2/ *.py)
 
 set(PY_FILES paddle/__init__.py
              ${TRAINER_PY_FILES}
              ${HELPERS_PY_FILES}
-             ${UTILS_PY_FILES})
+             ${UTILS_PY_FILES}
+             ${V2_PY_FILES})
 
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
     ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
@@ -22,6 +24,8 @@ add_custom_target(paddle_python ALL DEPENDS
     ${OUTPUT_DIR}/.timestamp)
 
 add_subdirectory(paddle/trainer_config_helpers/tests)
+add_subdirectory(paddle/v2/reader/tests)
+add_subdirectory(paddle/v2/tests)
 
 install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/
     DESTINATION opt/paddle/share/wheels
diff --git a/python/paddle/trainer/PyDataProvider2.py b/python/paddle/trainer/PyDataProvider2.py
index bd24c68b6fe88eab03c814f8cac70db3880316f4..0e752c117c1ecfab72e2da2f830380e9524236e7 100644
--- a/python/paddle/trainer/PyDataProvider2.py
+++ b/python/paddle/trainer/PyDataProvider2.py
@@ -45,6 +45,23 @@ class CacheType(object):
 
 
 class InputType(object):
+    """
+    InputType is the base class for paddle input types.
+
+    ..  note::
+
+        this is a base class, and should never be used by user.
+
+    :param dim: dimension of input. If the input is an integer, it means the
+                value range. Otherwise, it means the size of layer.
+    :type dim: int
+    :param seq_type: sequence type of input. 0 means it is not a sequence. 1
+                     means it is a variable length sequence. 2 means it is a
+                     nested sequence.
+    :type seq_type: int
+    :param type: data type of input.
+    :type type: int
+    """
     __slots__ = ['dim', 'seq_type', 'type']
 
     def __init__(self, dim, seq_type, tp):
@@ -54,19 +71,63 @@ class InputType(object):
 
 
 def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
+    """
+    Dense Vector. It means the input feature is dense float vector. For example,
+    if the input is an image with 28*28 pixels, the input of Paddle neural
+    network should be a dense vector with dimension 784.
+
+    :param dim: dimension of this vector.
+    :type dim: int
+    :param seq_type: sequence type of input.
+    :type seq_type: int
+    :return: An input type object.
+    :rtype: InputType
+    """
     return InputType(dim, seq_type, DataType.Dense)
 
 
 def sparse_non_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
+    """
+    Sparse binary vector. It means the input feature is a sparse vector and the
+    every element in this vector is either zero or one.
+
+    :param dim: dimension of this vector.
+    :type dim: int
+    :param seq_type: sequence type of this input.
+    :type seq_type: int
+    :return: An input type object.
+    :rtype: InputType
+    """
     return InputType(dim, seq_type, DataType.SparseNonValue)
 
 
 def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
+    """
+    Sparse vector. It means the input feature is a sparse vector. Most of the
+    elements in this vector are zero, others could be any float value.
+
+    :param dim: dimension of this vector.
+    :type dim: int
+    :param seq_type: sequence type of this input.
+    :type seq_type: int
+    :return: An input type object.
+    :rtype: InputType
+    """
     return InputType(dim, seq_type, DataType.SparseValue)
 
 
-def index_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
-    return InputType(dim, seq_type, DataType.Index)
+def index_slot(value_range, seq_type=SequenceType.NO_SEQUENCE):
+    """
+    Data type of integer.
+
+    :param seq_type: sequence type of this input.
+    :type seq_type: int
+    :param value_range: range of this integer.
+    :type value_range: int
+    :return: An input type object
+    :rtype: InputType
+    """
+    return InputType(value_range, seq_type, DataType.Index)
 
 
 dense_vector = dense_slot
@@ -76,6 +137,14 @@ integer_value = index_slot
 
 
 def dense_vector_sequence(dim):
+    """
+    Data type of a sequence of dense vector.
+
+    :param dim: dimension of dense vector.
+    :type dim: int
+    :return: An input type object
+    :rtype: InputType
+    """
     return dense_vector(dim, seq_type=SequenceType.SEQUENCE)
 
 
@@ -84,6 +153,15 @@ def dense_vector_sub_sequence(dim):
 
 
 def sparse_binary_vector_sequence(dim):
+    """
+    Data type of a sequence of sparse vector, which every element is either zero
+     or one.
+
+    :param dim: dimension of sparse vector.
+    :type dim: int
+    :return: An input type object
+    :rtype: InputType
+    """
     return sparse_binary_vector(dim, seq_type=SequenceType.SEQUENCE)
 
 
@@ -92,6 +170,15 @@ def sparse_binary_vector_sub_sequence(dim):
 
 
 def sparse_vector_sequence(dim):
+    """
+    Data type of a sequence of sparse vector, which most elements are zero,
+    others could be any float value.
+
+    :param dim: dimension of sparse vector.
+    :type dim: int
+    :return: An input type object
+    :rtype: InputType
+    """
     return sparse_vector(dim, seq_type=SequenceType.SEQUENCE)
 
 
@@ -99,8 +186,14 @@ def sparse_vector_sub_sequence(dim):
     return sparse_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
 
 
-def integer_value_sequence(dim):
-    return integer_value(dim, seq_type=SequenceType.SEQUENCE)
+def integer_value_sequence(value_range):
+    """
+    Data type of a sequence of integer.
+
+    :param value_range: range of each element.
+    :type value_range: int
+    """
+    return integer_value(value_range, seq_type=SequenceType.SEQUENCE)
 
 
 def integer_value_sub_sequence(dim):
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 8e9c40877bac59378fb352b5bd2448c5ae3f5d29..da937152ee0ce788309690c7b718943bb21b5a76 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -493,6 +493,7 @@ class Input(Cfg):
             block_expand=None,
             maxout=None,
             spp=None,
+            pad=None,
             format=None,
             nnz=None,
             is_static=None,
@@ -829,7 +830,6 @@ class Pool(Cfg):
             channels,
             size_x,
             size_y=None,
-            img_width=None,
             start=None,
             stride=None,  # 1 by defalut in protobuf
             stride_y=None,
@@ -844,6 +844,12 @@ class SpatialPyramidPool(Cfg):
         self.add_keys(locals())
 
 
+@config_class
+class Pad(Cfg):
+    def __init__(self, channels, pad_c, pad_h, pad_w):
+        self.add_keys(locals())
+
+
 @config_class
 class Norm(Cfg):
     def __init__(self,
@@ -887,11 +893,11 @@ class MaxOut(Cfg):
         self.add_keys(locals())
 
 
-def DataBase(async_load_data=False,
-             constant_slots=None,
-             data_ratio=1,
-             is_main_data=True,
-             usage_ratio=None):
+def create_data_config_proto(async_load_data=False,
+                             constant_slots=None,
+                             data_ratio=1,
+                             is_main_data=True,
+                             usage_ratio=None):
     # default: all sub dataproviders are treat as "main data".
     # see proto/DataConfig.proto for is_main_data
     data_config = DataConfig()
@@ -917,7 +923,7 @@ def SimpleData(files=None,
                context_len=None,
                buffer_capacity=None,
                **xargs):
-    data_config = DataBase(**xargs)
+    data_config = create_data_config_proto(**xargs)
     data_config.type = 'simple'
     data_config.files = files
     data_config.feat_dim = feat_dim
@@ -939,7 +945,7 @@ def PyData(files=None,
            constant_slots=None,
            load_thread_num=None,
            **xargs):
-    data_config = DataBase(**xargs)
+    data_config = create_data_config_proto(**xargs)
     data_config.type = 'py'
     if load_data_module in g_py_module_name_list:
 
@@ -990,7 +996,7 @@ def ProtoData(files=None,
               constant_slots=None,
               load_thread_num=None,
               **xargs):
-    data_config = DataBase(**xargs)
+    data_config = create_data_config_proto(**xargs)
     if type is None:
         data_config.type = 'proto'
     else:
@@ -1029,7 +1035,7 @@ def Data(type,
          buffer_capacity=None,
          **xargs):
 
-    data_config = DataBase(**xargs)
+    data_config = create_data_config_proto(**xargs)
     data_config.type = type
     data_config.files = files
     data_config.feat_dim = feat_dim
@@ -1102,7 +1108,7 @@ def parse_bilinear(bilinear, input_layer_name, bilinear_conf):
     bilinear_conf.out_size_y = bilinear.out_size_y
 
 
-def parse_pool(pool, input_layer_name, pool_conf):
+def parse_pool(pool, input_layer_name, pool_conf, ceil_mode):
     pool_conf.pool_type = pool.pool_type
     config_assert(pool.pool_type in [
         'max-projection', 'avg-projection', 'cudnn-max-pool', 'cudnn-avg-pool'
@@ -1127,10 +1133,10 @@ def parse_pool(pool, input_layer_name, pool_conf):
     pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
     pool_conf.output_x = cnn_output_size(pool_conf.img_size, pool_conf.size_x,
                                          pool_conf.padding, pool_conf.stride,
-                                         False)
+                                         not ceil_mode)
     pool_conf.output_y = cnn_output_size(pool_conf.img_size_y, pool_conf.size_y,
                                          pool_conf.padding_y,
-                                         pool_conf.stride_y, False)
+                                         pool_conf.stride_y, not ceil_mode)
 
 
 def parse_spp(spp, input_layer_name, spp_conf):
@@ -1247,6 +1253,7 @@ def Evaluator(
         dict_file=None,
         result_file=None,
         num_results=None,
+        top_k=None,
         delimited=None,
         excluded_chunk_types=None, ):
     evaluator = g_config.model_config.evaluators.add()
@@ -1274,6 +1281,8 @@ def Evaluator(
         evaluator.result_file = result_file
     if num_results is not None:
         evaluator.num_results = num_results
+    if top_k is not None:
+        evaluator.top_k = top_k
     if delimited is not None:
         evaluator.delimited = delimited
 
@@ -1803,9 +1812,8 @@ class ConvTransLayer(ConvTransLayerBase):
 
 @config_layer('norm')
 class NormLayer(LayerBase):
-    def __init__(self, name, inputs, device=None):
-        super(NormLayer, self).__init__(
-            name, 'norm', 0, inputs=inputs, device=device)
+    def __init__(self, name, inputs, **xargs):
+        super(NormLayer, self).__init__(name, 'norm', 0, inputs=inputs, **xargs)
         for input_index in xrange(len(self.inputs)):
             input_layer = self.get_input_layer(input_index)
             norm_conf = self.config.inputs[input_index].norm_conf
@@ -1817,23 +1825,22 @@ class NormLayer(LayerBase):
 
 @config_layer('pool')
 class PoolLayer(LayerBase):
-    def __init__(self, name, inputs, device=None):
-        super(PoolLayer, self).__init__(
-            name, 'pool', 0, inputs=inputs, device=device)
+    def __init__(self, name, inputs, ceil_mode=True, **xargs):
+        super(PoolLayer, self).__init__(name, 'pool', 0, inputs=inputs, **xargs)
         for input_index in xrange(len(self.inputs)):
             input_layer = self.get_input_layer(input_index)
             pool_conf = self.config.inputs[input_index].pool_conf
             parse_pool(self.inputs[input_index].pool, input_layer.name,
-                       pool_conf)
+                       pool_conf, ceil_mode)
             self.set_cnn_layer(name, pool_conf.output_y, pool_conf.output_x,
                                pool_conf.channels)
 
 
 @config_layer('spp')
 class SpatialPyramidPoolLayer(LayerBase):
-    def __init__(self, name, inputs, device=None):
+    def __init__(self, name, inputs, **xargs):
         super(SpatialPyramidPoolLayer, self).__init__(
-            name, 'spp', 0, inputs=inputs, device=device)
+            name, 'spp', 0, inputs=inputs, **xargs)
         for input_index in xrange(len(self.inputs)):
             input_layer = self.get_input_layer(input_index)
             spp_conf = self.config.inputs[input_index].spp_conf
@@ -1842,6 +1849,25 @@ class SpatialPyramidPoolLayer(LayerBase):
             self.set_cnn_layer(name, 1, output_x, spp_conf.image_conf.channels)
 
 
+@config_layer('pad')
+class PadLayer(LayerBase):
+    def __init__(self, name, inputs, **xargs):
+        super(PadLayer, self).__init__(name, 'pad', 0, inputs=inputs, **xargs)
+        pad = self.inputs[0].pad
+        self.config.inputs[0].pad_conf.pad_c.extend(pad.pad_c)
+        self.config.inputs[0].pad_conf.pad_h.extend(pad.pad_h)
+        self.config.inputs[0].pad_conf.pad_w.extend(pad.pad_w)
+
+        input_layer = self.get_input_layer(0)
+        image_conf = self.config.inputs[0].pad_conf.image_conf
+        parse_image(pad, input_layer.name, image_conf)
+        out_ch = pad.channels + pad.pad_c[0] + pad.pad_c[1]
+        out_h = image_conf.img_size_y + pad.pad_h[0] + pad.pad_h[1]
+        out_w = image_conf.img_size + pad.pad_w[0] + pad.pad_w[1]
+        self.set_cnn_layer(name, out_h, out_w, out_ch)
+        self.config.size = out_ch * out_h * out_w
+
+
 @config_layer('batch_norm')
 class BatchNormLayer(LayerBase):
     layer_type = 'batch_norm'
@@ -1851,7 +1877,6 @@ class BatchNormLayer(LayerBase):
                  inputs,
                  active_type="linear",
                  bias=True,
-                 device=None,
                  use_global_stats=True,
                  moving_average_fraction=0.9,
                  batch_norm_type=None,
@@ -1893,7 +1918,6 @@ class BatchNormLayer(LayerBase):
             0,
             active_type=active_type,
             inputs=inputs,
-            device=device,
             **xargs)
 
         if use_global_stats is not None:
@@ -1905,8 +1929,8 @@ class BatchNormLayer(LayerBase):
         image_conf = self.config.inputs[0].image_conf
         parse_image(self.inputs[0].image, input_layer.name, image_conf)
 
-        # Only pass the width and height of input to batch_norm layer 
-        # when either of it is non-zero. 
+        # Only pass the width and height of input to batch_norm layer
+        # when either of it is non-zero.
         if input_layer.width != 0 or input_layer.height != 0:
             self.set_cnn_layer(name, image_conf.img_size_y, image_conf.img_size,
                                image_conf.channels, False)
@@ -1927,9 +1951,9 @@ class BatchNormLayer(LayerBase):
 
 @config_layer('trans')
 class TransLayer(LayerBase):
-    def __init__(self, name, inputs, device=None):
+    def __init__(self, name, inputs, **xargs):
         super(TransLayer, self).__init__(
-            name, 'trans', 0, inputs=inputs, device=device)
+            name, 'trans', 0, inputs=inputs, **xargs)
         config_assert(
             len(self.inputs) == 1,
             'TransLayer must have one and only one input')
@@ -1938,19 +1962,31 @@ class TransLayer(LayerBase):
 
 @config_layer('resize')
 class ResizeLayer(LayerBase):
-    def __init__(self, name, size, inputs, device=None):
+    def __init__(self, name, size, inputs, **xargs):
         super(ResizeLayer, self).__init__(
-            name, 'resize', size=size, inputs=inputs, device=device)
+            name, 'resize', size=size, inputs=inputs, **xargs)
         config_assert(
             len(self.inputs) == 1,
             'ResizeLayer must have one and only one input')
 
 
+@config_layer('rotate')
+class RotateLayer(LayerBase):
+    def __init__(self, name, inputs, height, width, device=None):
+        super(RotateLayer, self).__init__(
+            name, 'rotate', 0, inputs=inputs, device=device)
+        config_assert(
+            len(self.inputs) == 1,
+            'RotateLayer must have one and only one input')
+        self.set_layer_height_width(height, width)
+        self.set_layer_size(self.get_input_layer(0).size)
+
+
 @config_layer('blockexpand')
 class BlockExpandLayer(LayerBase):
-    def __init__(self, name, inputs, device=None):
+    def __init__(self, name, inputs, **xargs):
         super(BlockExpandLayer, self).__init__(
-            name, 'blockexpand', 0, inputs=inputs, device=device)
+            name, 'blockexpand', 0, inputs=inputs, **xargs)
         for input_index in xrange(len(self.inputs)):
             input_layer = self.get_input_layer(input_index)
             parse_block_expand(
diff --git a/python/paddle/trainer/recurrent_units.py b/python/paddle/trainer/recurrent_units.py
old mode 100644
new mode 100755
index edca279dcadef42243cb3fc00366cec90cbc69bf..ef92107a1093d2ec2b2a41677e964fdaa60ac829
--- a/python/paddle/trainer/recurrent_units.py
+++ b/python/paddle/trainer/recurrent_units.py
@@ -15,10 +15,13 @@
 # recurrent_units.py
 # Version 2.0
 #
-# Some recurrent units can be used in recurrent layer group, 
+# Some recurrent units can be used in recurrent layer group,
 #   to use these units, import this module in your config_file:
-#     import trainer.recurrent_units 
-# 
+#     import trainer.recurrent_units
+#
+# The modules in this file are DEPRECATED.
+# If you would like to use lstm/gru
+# please use the functions defined in paddle.trainer_config_helpers.
 
 from paddle.trainer.config_parser import *
 
@@ -26,7 +29,7 @@ from paddle.trainer.config_parser import *
 # long short term memory, can be used in recurrent machine
 # *inputs* must be a list of Projections, for example:
 #   inputs = [FullMatrixProjection("input_layer_name")],
-# *para_prefix* defines parameter names, if the *para_prefix* of 
+# *para_prefix* defines parameter names, if the *para_prefix* of
 #   two LstmRecurrentUnit is same, they share same parameters
 # *out_memory* can be defined outside if it's used outside
 def LstmRecurrentUnit(name,
@@ -194,7 +197,7 @@ def LstmRecurrentLayerGroup(name,
 # gated recurrent unit, can be used in recurrent machine
 # *inputs* should be a list of Projections, for example:
 #   inputs = [FullMatrixProjection("input_layer_name")],
-# *para_prefix* defines parameter names, if the *para_prefix* of 
+# *para_prefix* defines parameter names, if the *para_prefix* of
 #   two GatedRecurrentUnit is same, they share same parameters
 # *out_memory* can be defined outside if it's used outside
 
diff --git a/python/paddle/trainer_config_helpers/data_sources.py b/python/paddle/trainer_config_helpers/data_sources.py
index 622b4fc25ccff397cd3115db316870f328466fba..ab9a2562dcccb394c0b24741ceeb10061e40cb9a 100644
--- a/python/paddle/trainer_config_helpers/data_sources.py
+++ b/python/paddle/trainer_config_helpers/data_sources.py
@@ -58,8 +58,8 @@ def define_py_data_source(file_list,
     :param obj: python object name. May be a function name if using
                 PyDataProviderWrapper.
     :type obj: basestring
-    :param args: The best practice is using dict to pass arguments into 
-                 DataProvider, and use :code:`@init_hook_wrapper` to 
+    :param args: The best practice is using dict to pass arguments into
+                 DataProvider, and use :code:`@init_hook_wrapper` to
                  receive arguments.
     :type args: string or picklable object
     :param async: Load Data asynchronously or not.
@@ -98,7 +98,7 @@ def define_py_data_sources(train_list,
     The annotation is almost the same as define_py_data_sources2, except that
     it can specific train_async and data_cls.
 
-    :param data_cls: 
+    :param data_cls:
     :param train_list: Train list name.
     :type train_list: basestring
     :param test_list: Test list name.
@@ -111,8 +111,8 @@ def define_py_data_sources(train_list,
                 a tuple or list to this argument.
     :type obj: basestring or tuple or list
     :param args: The best practice is using dict() to pass arguments into
-                 DataProvider, and use :code:`@init_hook_wrapper` to receive 
-                 arguments. If train and test is different, then pass a tuple 
+                 DataProvider, and use :code:`@init_hook_wrapper` to receive
+                 arguments. If train and test is different, then pass a tuple
                  or list to this argument.
     :type args: string or picklable object or list or tuple.
     :param train_async: Is training data load asynchronously or not.
@@ -163,12 +163,12 @@ def define_py_data_sources2(train_list, test_list, module, obj, args=None):
 
     ..  code-block:: python
 
-        define_py_data_sources2(train_list="train.list", 
-                                test_list="test.list", 
+        define_py_data_sources2(train_list="train.list",
+                                test_list="test.list",
                                 module="data_provider"
                                 # if train/test use different configurations,
                                 # obj=["process_train", "process_test"]
-                                obj="process", 
+                                obj="process",
                                 args={"dictionary": dict_name})
 
     The related data provider can refer to :ref:`api_pydataprovider2_sequential_model` .
@@ -185,8 +185,8 @@ def define_py_data_sources2(train_list, test_list, module, obj, args=None):
                 a tuple or list to this argument.
     :type obj: basestring or tuple or list
     :param args: The best practice is using dict() to pass arguments into
-                 DataProvider, and use :code:`@init_hook_wrapper` to receive 
-                 arguments. If train and test is different, then pass a tuple 
+                 DataProvider, and use :code:`@init_hook_wrapper` to receive
+                 arguments. If train and test is different, then pass a tuple
                  or list to this argument.
     :type args: string or picklable object or list or tuple.
     :return: None
@@ -195,13 +195,13 @@ def define_py_data_sources2(train_list, test_list, module, obj, args=None):
 
     def py_data2(files, load_data_module, load_data_object, load_data_args,
                  **kwargs):
-        data = DataBase()
+        data = create_data_config_proto()
         data.type = 'py2'
         data.files = files
         data.load_data_module = load_data_module
         data.load_data_object = load_data_object
         data.load_data_args = load_data_args
-        data.async_load_data = True
+        data.async_load_data = False
         return data
 
     define_py_data_sources(
diff --git a/python/paddle/trainer_config_helpers/default_decorators.py b/python/paddle/trainer_config_helpers/default_decorators.py
index ad3efcbf369411b9c42b2a32ed05b04f86bf7de6..2f25579fcdd9793e4c165439c9934a2bccb63617 100644
--- a/python/paddle/trainer_config_helpers/default_decorators.py
+++ b/python/paddle/trainer_config_helpers/default_decorators.py
@@ -52,6 +52,10 @@ def wrap_param_default(param_names=None,
                     kwargs[name] = default_factory(func)
             return func(*args, **kwargs)
 
+        if hasattr(func, 'argspec'):
+            __wrapper__.argspec = func.argspec
+        else:
+            __wrapper__.argspec = inspect.getargspec(func)
         return __wrapper__
 
     return __impl__
diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py
index bd247ea9af9d8dfb2d476cdc62638bd65c11add5..567521ee9dbadb7a2502cfb9972ef0940e1e410a 100644
--- a/python/paddle/trainer_config_helpers/evaluators.py
+++ b/python/paddle/trainer_config_helpers/evaluators.py
@@ -71,6 +71,7 @@ def evaluator_base(
         result_file=None,
         num_results=None,
         delimited=None,
+        top_k=None,
         excluded_chunk_types=None, ):
     """
     Evaluator will evaluate the network status while training/testing.
@@ -104,12 +105,15 @@ def evaluator_base(
     :param weight: An input layer which is a weight for each sample.
                    Each evaluator may calculate differently to use this weight.
     :type weight: LayerOutput.
+    :param top_k: number k in top-k error rate
+    :type top_k: int
     """
     # inputs type assertions.
     assert classification_threshold is None or isinstance(
         classification_threshold, float)
     assert positive_label is None or isinstance(positive_label, int)
     assert num_results is None or isinstance(num_results, int)
+    assert top_k is None or isinstance(top_k, int)
 
     if not isinstance(input, list):
         input = [input]
@@ -130,6 +134,8 @@ def evaluator_base(
         dict_file=dict_file,
         result_file=result_file,
         delimited=delimited,
+        num_results=num_results,
+        top_k=top_k,
         excluded_chunk_types=excluded_chunk_types, )
 
 
@@ -139,6 +145,7 @@ def classification_error_evaluator(input,
                                    label,
                                    name=None,
                                    weight=None,
+                                   top_k=None,
                                    threshold=None):
     """
     Classification Error Evaluator. It will print error rate for classification.
@@ -167,6 +174,8 @@ def classification_error_evaluator(input,
                   then means not set weight. The larger weight it is, the more
                   important this sample is.
     :type weight: LayerOutput
+    :param top_k: number k in top-k error rate
+    :type top_k: int
     :param threshold: The classification threshold.
     :type threshold: float
     :return: None.
@@ -178,6 +187,7 @@ def classification_error_evaluator(input,
         input=input,
         label=label,
         weight=weight,
+        top_k=top_k,
         classification_threshold=threshold, )
 
 
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
old mode 100644
new mode 100755
index 23c0e9174ff958cbca8963de12cb39bac0b2583d..b94f8f9a783552519ca73e7cfc0937b302d3445b
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -14,6 +14,7 @@
 
 import functools
 import collections
+import inspect
 
 from paddle.trainer.config_parser import *
 from .activations import LinearActivation, SigmoidActivation, TanhActivation, \
@@ -37,6 +38,7 @@ __all__ = [
     "dotmul_projection",
     "dotmul_operator",
     "repeat_layer",
+    "seq_reshape_layer",
     "table_projection",
     "mixed_layer",
     "data_layer",
@@ -59,6 +61,7 @@ __all__ = [
     'img_cmrnorm_layer',
     'addto_layer',
     'concat_layer',
+    'seq_concat_layer',
     'lstm_step_layer',
     'recurrent_group',
     'memory',
@@ -70,6 +73,7 @@ __all__ = [
     'interpolation_layer',
     'bilinear_interp_layer',
     'trans_layer',
+    'rotate_layer',
     'sum_to_one_norm_layer',
     'get_output_layer',
     'LayerType',
@@ -108,6 +112,9 @@ __all__ = [
     'print_layer',
     'priorbox_layer',
     'spp_layer',
+    'pad_layer',
+    'eos_layer',
+    'layer_support',
 ]
 
 
@@ -122,6 +129,7 @@ class LayerType(object):
     GRUMEMORY = "gated_recurrent"
     SEQUENCE_LAST_INSTANCE = "seqlastins"
     SEQUENCE_FIRST_INSTANCE = "seqfirstins"
+    SEQUENCE_RESHAPE = "seqreshape"
     POOLING_MAX = "max"
     POOLING_AVG = 'average'
     FC_LAYER = "fc"
@@ -142,6 +150,7 @@ class LayerType(object):
 
     CONCAT_LAYER = 'concat'
     CONCAT_PROJ_LAYER = 'concat2'
+    SEQUENCE_CONCAT_LAYER = 'seqconcat'
 
     LSTM_STEP_LAYER = 'lstm_step'
     GRU_STEP_LAYER = 'gru_step'
@@ -153,6 +162,7 @@ class LayerType(object):
     POWER_LAYER = 'power'
     SCALING_LAYER = 'scaling'
     TRANS_LAYER = 'trans'
+    ROTATE_LAYER = 'rotate'
     OUT_PROD_LAYER = 'out_prod'
     FEATURE_MAP_EXPAND_LAYER = 'featmap_expand'
 
@@ -170,6 +180,7 @@ class LayerType(object):
     BLOCK_EXPAND = "blockexpand"
     MAXOUT = "maxout"
     SPP_LAYER = "spp"
+    PAD_LAYER = "pad"
 
     PRINT_LAYER = "print"
     PRIORBOX_LAYER = "priorbox"
@@ -306,6 +317,11 @@ def layer_support(*attrs):
                     val.check(method.__name__)
             return method(*args, **kwargs)
 
+        if hasattr(method, 'argspec'):
+            wrapper.argspec = method.argspec
+        else:
+            wrapper.argspec = inspect.getargspec(method)
+
         return wrapper
 
     return decorator
@@ -700,6 +716,7 @@ class MixedLayerType(LayerOutput):
         # update the size which might be computed inside MixedLayer
         # according to the operator's output size
         self.size = ml.config.size
+        self.finalized = True
 
 
 @wrap_name_default("mixed")
@@ -778,17 +795,16 @@ def data_layer(name, size, height=None, width=None, layer_attr=None):
 
     ..  code-block:: python
 
-        data = data_layer(name="input",
-                          size=1000)
+        data = data_layer(name="input", size=1000)
 
     :param name: Name of this data layer.
     :type name: basestring
     :param size: Size of this data layer.
     :type size: int
     :param height: Height of this data layer, used for image
-    :type size: int|None
+    :type height: int|None
     :param width: Width of this data layer, used for image
-    :type size: int|None
+    :type width: int|None
     :param layer_attr: Extra Layer Attribute.
     :type layer_attr: ExtraLayerAttribute.
     :return: LayerOutput object.
@@ -1279,6 +1295,12 @@ def last_seq(input,
     """
     Get Last Timestamp Activation of a sequence.
 
+    The simple usage is:
+
+    .. code-block:: python
+
+       seq = last_seq(input=layer)
+
     :param agg_level: Aggregated level
     :param name: Layer name.
     :type name: basestring
@@ -1317,6 +1339,12 @@ def first_seq(input,
     """
     Get First Timestamp Activation of a sequence.
 
+    The simple usage is:
+
+    .. code-block:: python
+
+       seq = first_seq(input=layer)
+
     :param agg_level: aggregation level
     :param name: Layer name.
     :type name: basestring
@@ -1417,7 +1445,7 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None):
 
     .. code-block:: python
 
-       expand = repeat_layer(layer, 4)
+       expand = repeat_layer(input=layer, num_repeats=4)
 
     :param input: Input layer
     :type input: LayerOutput
@@ -1444,6 +1472,61 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None):
         parents=[input])
 
 
+@wrap_name_default("seqreshape")
+@wrap_act_default(act=IdentityActivation())
+@wrap_bias_attr_default(has_bias=False)
+@layer_support()
+def seq_reshape_layer(input,
+                      reshape_size,
+                      act=None,
+                      name=None,
+                      layer_attr=None,
+                      bias_attr=None):
+    """
+    A layer for reshaping the sequence. Assume the input sequence has T instances,
+    the dimension of each instance is M, and the input reshape_size is N, then the 
+    output sequence has T*M/N instances, the dimension of each instance is N.
+
+    Note that T*M/N must be an integer.
+
+    The example usage is:
+
+    .. code-block:: python
+
+       reshape = seq_reshape_layer(input=layer, reshape_size=4)
+
+    :param input: Input layer.
+    :type input: LayerOutput
+    :param reshape_size: the size of reshaped sequence.
+    :type reshape_size: int
+    :param name: Layer name.
+    :type name: basestring
+    :param act: Activation type.
+    :type act: BaseActivation
+    :param layer_attr: extra layer attributes.
+    :type layer_attr: ExtraLayerAttribute.
+    :param bias_attr: The Bias Attribute. If no bias, then pass False or
+                      something not type of ParameterAttribute. None will get a
+                      default Bias.
+    :type bias_attr: ParameterAttribute or None or bool
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+
+    Layer(
+        inputs=[input.name],
+        name=name,
+        size=reshape_size,
+        type=LayerType.SEQUENCE_RESHAPE,
+        bias=ParamAttr.to_bias(bias_attr),
+        **ExtraAttr.to_kwargs(layer_attr))
+    return LayerOutput(
+        name=name,
+        size=reshape_size,
+        layer_type=LayerType.SEQUENCE_RESHAPE,
+        parents=[input])
+
+
 @wrap_name_default()
 @layer_support()
 def interpolation_layer(input, weight, name=None, layer_attr=None):
@@ -1640,7 +1723,7 @@ def scaling_layer(input, weight, name=None, layer_attr=None):
 @layer_support()
 def trans_layer(input, name=None, layer_attr=None):
     """
-    A layer for transposition.
+    A layer for transposing a minibatch matrix.
 
     .. math::
        y = x^\mathrm{T}
@@ -1671,6 +1754,52 @@ def trans_layer(input, name=None, layer_attr=None):
         name, LayerType.TRANS_LAYER, parents=[input], size=input.size)
 
 
+@wrap_name_default()
+@layer_support()
+def rotate_layer(input, height, width, name=None, layer_attr=None):
+    """
+    A layer for rotating 90 degrees (clock-wise) for each feature channel,
+    usually used when the input sample is some image or feature map.
+
+    .. math::
+       y(j,i,:) = x(M-i-1,j,:)
+
+    where :math:`x` is (M x N x C) input, and :math:`y` is (N x M x C) output.
+
+    The example usage is:
+
+    .. code-block:: python
+
+       rot = rotate_layer(input=layer,
+                          height=100,
+                          width=100)
+
+    :param input: Input layer.
+    :type input: LayerOutput
+    :param height: The height of the sample matrix
+    :type height: int
+    :param name: Layer name.
+    :type name: basestring
+    :param layer_attr: extra layer attributes.
+    :type layer_attr: ExtraLayerAttribute.
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+    assert isinstance(input, LayerOutput)
+    l = Layer(
+        name=name,
+        height=height,
+        width=width,
+        type=LayerType.ROTATE_LAYER,
+        inputs=[input.name],
+        **ExtraLayerAttribute.to_kwargs(layer_attr))
+    return LayerOutput(
+        name=name,
+        layer_type=LayerType.ROTATE_LAYER,
+        parents=[input],
+        size=l.config.size)
+
+
 @wrap_name_default()
 @layer_support()
 def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None):
@@ -1688,6 +1817,12 @@ def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None):
     Note that the above computation is for one sample. Multiple samples are
     processed in one batch.
 
+    The example usage is:
+
+    .. code-block:: python
+
+       cos = cos_sim(a=layer1, b=layer2, size=3)
+
     :param name: layer name
     :type name: basestring
     :param a: input layer a
@@ -1824,14 +1959,14 @@ def img_conv_layer(input,
                    trans=False,
                    layer_type=None):
     """
-    Convolution layer for image. Paddle can support both square and non-square 
+    Convolution layer for image. Paddle can support both square and non-square
     input currently.
 
     The details of convolution layer, please refer UFLDL's `convolution
     `_ .
 
-    Convolution Transpose (deconv) layer for image. Paddle can support both square 
+    Convolution Transpose (deconv) layer for image. Paddle can support both square
     and non-square input currently.
 
     The details of convolution transpose layer,
@@ -1849,6 +1984,16 @@ def img_conv_layer(input,
     pieces. First 256/4 = 64 channels will process by first 32 filters. The
     rest channels will be processed by rest group of filters.
 
+    The example usage is:
+
+    ..  code-block:: python
+
+        conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
+                              num_channels=8,
+                              num_filters=16, stride=1,
+                              bias_attr=False,
+                              act=ReluActivation())
+
     :param name: Layer name.
     :type name: basestring
     :param input: Layer Input.
@@ -1890,7 +2035,7 @@ def img_conv_layer(input,
     :param trans: true if it is a convTransLayer, false if it is a convLayer
     :type trans: bool
     :param layer_type: specify the layer_type, default is None. If trans=True,
-                       layer_type has to be "exconvt", otherwise layer_type 
+                       layer_type has to be "exconvt", otherwise layer_type
                        has to be either "exconv" or "cudnn_conv"
     :type layer_type: String
     :return: LayerOutput object.
@@ -1979,7 +2124,8 @@ def img_pool_layer(input,
                    layer_attr=None,
                    pool_size_y=None,
                    stride_y=None,
-                   padding_y=None):
+                   padding_y=None,
+                   ceil_mode=True):
     """
     Image pooling Layer.
 
@@ -1987,6 +2133,34 @@ def img_pool_layer(input,
 
     .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/
 
+    - ceil_mode=True:
+
+    ..  math::
+
+        w = 1 + int(ceil(input\_width + 2 * padding - pool\_size) / float(stride))
+        h = 1 + int(ceil(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y))
+
+    - ceil_mode=False:
+
+    ..  math::
+
+        w = 1 + int(floor(input\_width + 2 * padding - pool\_size) / float(stride))
+        h = 1 + int(floor(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y))
+
+    The example usage is:
+
+    ..  code-block:: python
+
+        maxpool = img_pool_layer(input=conv,
+                                 pool_size=3,
+                                 pool_size_y=5,
+                                 num_channels=8,
+                                 stride=1,
+                                 stride_y=2,
+                                 padding=1,
+                                 padding_y=2,
+                                 pool_type=MaxPooling())
+
     :param padding: pooling padding width.
     :type padding: int
     :param padding_y: pooling padding height. It's equal to padding by default.
@@ -2010,6 +2184,10 @@ def img_pool_layer(input,
     :type stride_y: int|None
     :param layer_attr: Extra Layer attribute.
     :type layer_attr: ExtraLayerAttribute
+    :param ceil_mode: Wether to use ceil mode to calculate output height and with.
+                      Defalut is True. If set false, Otherwise use floor.
+
+    :type ceil_mode: bool
     :return: LayerOutput object.
     :rtype: LayerOutput
     """
@@ -2047,6 +2225,7 @@ def img_pool_layer(input,
                     stride_y=stride_y,
                     padding_y=padding_y))
         ],
+        ceil_mode=ceil_mode,
         **ExtraLayerAttribute.to_kwargs(layer_attr))
     return LayerOutput(
         name,
@@ -2069,6 +2248,15 @@ def spp_layer(input,
     The details please refer to
     `Kaiming He's paper `_.
 
+    The example usage is:
+
+    ..  code-block:: python
+
+        spp = spp_layer(input=data, 
+                        pyramid_height=2, 
+                        num_channels=16, 
+                        pool_type=MaxPooling())
+
     :param name: layer name.
     :type name: basestring
     :param input: layer's input.
@@ -2157,6 +2345,12 @@ def img_cmrnorm_layer(input,
     The details please refer to
     `Alex's paper `_.
 
+    The example usage is:
+
+    ..  code-block:: python
+    
+        norm = img_cmrnorm_layer(input=net, size=5)
+
     :param name: layer name.
     :type name: None|basestring
     :param input: layer's input.
@@ -2212,6 +2406,12 @@ def batch_norm_layer(input,
     The details of batch normalization please refer to this
     `paper `_.
 
+    The example usage is:
+
+    ..  code-block:: python
+    
+        norm = batch_norm_layer(input=net, act=ReluActivation())
+
     :param name: layer name.
     :type name: basestring
     :param input: batch normalization input. Better be linear activation.
@@ -2501,6 +2701,63 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None):
         size=sz)
 
 
+@wrap_name_default("seqconcat")
+@wrap_act_default(act=IdentityActivation())
+@wrap_bias_attr_default(has_bias=False)
+@layer_support()
+def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
+                     bias_attr=None):
+    """
+    Concat sequence a with sequence b.
+
+    Inputs: 
+      - a = [a1, a2, ..., an]
+      - b = [b1, b2, ..., bn]
+      - Note that the length of a and b should be the same.
+        
+    Output: [a1, b1, a2, b2, ..., an, bn]
+
+    The example usage is:
+
+    ..  code-block:: python
+
+        concat = seq_concat_layer(a=layer1, b=layer2)
+
+    :param name: Layer name.
+    :type name: basestring
+    :param a: input sequence layer
+    :type a: LayerOutput
+    :param b: input sequence layer
+    :type b: LayerOutput
+    :param act: Activation type.
+    :type act: BaseActivation
+    :param layer_attr: Extra Layer Attribute.
+    :type layer_attr: ExtraLayerAttribute
+    :param bias_attr: The Bias Attribute. If no bias, then pass False or
+                      something not type of ParameterAttribute. None will get a
+                      default Bias.
+    :type bias_attr: ParameterAttribute or None or bool
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+    assert isinstance(a, LayerOutput) and isinstance(b, LayerOutput)
+    assert a.size == b.size
+    Layer(
+        name=name,
+        type=LayerType.SEQUENCE_CONCAT_LAYER,
+        inputs=[a.name, b.name],
+        active_type=act.name,
+        bias=ParamAttr.to_bias(bias_attr),
+        **ExtraLayerAttribute.to_kwargs(layer_attr))
+
+    return LayerOutput(
+        name,
+        layer_type=LayerType.SEQUENCE_CONCAT_LAYER,
+        parents=[a, b],
+        activation=act,
+        size=a.size)
+
+
 def memory(name,
            size,
            is_seq=False,
@@ -2661,6 +2918,7 @@ def lstm_step_layer(input,
 
 
 @wrap_bias_attr_default()
+@wrap_param_attr_default()
 @wrap_act_default(param_names=['gate_act'], act=SigmoidActivation())
 @wrap_act_default(act=TanhActivation())
 @wrap_name_default('gru_step')
@@ -2672,6 +2930,7 @@ def gru_step_layer(input,
                    name=None,
                    gate_act=None,
                    bias_attr=None,
+                   param_attr=None,
                    layer_attr=None):
     """
 
@@ -2683,6 +2942,8 @@ def gru_step_layer(input,
     :param name:
     :param gate_act:
     :param bias_attr:
+    :param param_attr: the parameter_attribute for transforming the output_mem
+                       from previous step.
     :param layer_attr:
     :return: LayerOutput object.
     :rtype: LayerOutput
@@ -2693,7 +2954,12 @@ def gru_step_layer(input,
     Layer(
         name=name,
         type=LayerType.GRU_STEP_LAYER,
-        inputs=[input.name, output_mem.name],
+        # The parameter here is for transforming the output_mem. The input has
+        # already been transformed outside this module so it does not need
+        # parameter associated with it.
+        # The parameter here is instead grouped with input is due to
+        # backward model compatibility.
+        inputs=[Input(input.name, **param_attr.attr), output_mem.name],
         bias=ParamAttr.to_bias(bias_attr),
         size=size,
         active_type=act.name,
@@ -3342,6 +3608,7 @@ def classification_cost(input,
                         label,
                         weight=None,
                         name=None,
+                        top_k=None,
                         evaluator=classification_error_evaluator,
                         layer_attr=None):
     """
@@ -3356,6 +3623,8 @@ def classification_cost(input,
     :param weight: The weight affects the cost, namely the scale of cost.
                    It is an optional argument.
     :type weight: LayerOutput
+    :param top_k: number k in top-k error rate
+    :type top_k: int
     :param evaluator: Evaluator method.
     :param layer_attr: layer's extra attribute.
     :type layer_attr: ExtraLayerAttribute
@@ -3383,7 +3652,7 @@ def classification_cost(input,
         assert isinstance(e.for_classification, bool)
         assert e.for_classification
 
-        e(name=e.__name__, input=input, label=label, weight=weight)
+        e(name=e.__name__, input=input, label=label, weight=weight, top_k=top_k)
 
     if not isinstance(evaluator, collections.Sequence):
         evaluator = [evaluator]
@@ -3488,9 +3757,6 @@ def conv_projection(input,
                     groups=1,
                     param_attr=None):
     """
-    ConvProjection with a layer as input.
-    It performs element-wise multiplication with weight.
-
     Different from img_conv_layer and conv_op, conv_projection is an Projection,
     which can be used in mixed_layer and conat_layer. It use cudnn to implement
     conv and only support GPU mode.
@@ -3499,7 +3765,7 @@ def conv_projection(input,
 
     .. code-block:: python
 
-       proj = conv_projection(img=input1,
+       proj = conv_projection(input=input1,
                               filter_size=3,
                               num_filters=64,
                               num_channels=64)
@@ -3582,6 +3848,110 @@ def conv_projection(input,
     return proj
 
 
+@wrap_name_default("pad")
+@layer_support()
+def pad_layer(input,
+              pad_c=None,
+              pad_h=None,
+              pad_w=None,
+              name=None,
+              layer_attr=None):
+    """
+    This operation pads zeros to the input data according to pad_c,pad_h
+    and pad_w. pad_c, pad_h, pad_w specifies the which dimension and size
+    of padding. And the input data shape is NCHW.
+
+    For example, pad_c=[2,3] means padding 2 zeros before the
+    input data and 3 zeros after the input data in channel dimension.
+    pad_h means padding zeros in height dimension. pad_w means padding zeros
+    in width dimension.
+
+    For example,
+
+    .. code-block:: python
+
+       input(2,2,2,3)  = [
+                           [ [[1,2,3], [3,4,5]],
+                             [[2,3,5], [1,6,7]] ],
+                           [ [[4,3,1], [1,8,7]],
+                             [[3,8,9], [2,3,5]] ]
+                         ]
+
+       pad_c=[1,1], pad_h=[0,0], pad_w=[0,0]
+
+       output(2,4,2,3) = [
+                           [ [[0,0,0], [0,0,0]],
+                             [[1,2,3], [3,4,5]],
+                             [[2,3,5], [1,6,7]],
+                             [[0,0,0], [0,0,0]] ],
+                           [ [[0,0,0], [0,0,0]],
+                             [[4,3,1], [1,8,7]],
+                             [[3,8,9], [2,3,5]],
+                             [[0,0,0], [0,0,0]] ]
+                         ]
+
+    The simply usage is:
+
+    .. code-block:: python
+
+       pad = pad_layer(input=ipt,
+                       pad_c=[4,4],
+                       pad_h=[0,0],
+                       pad_w=[2,2])
+
+    :param input: layer's input.
+    :type input: LayerOutput
+    :param pad_c: padding size in channel dimension.
+    :type pad_c: list|None
+    :param pad_h: padding size in height dimension.
+    :type pad_h: list|None
+    :param pad_w: padding size in width dimension.
+    :type pad_w: list|None
+    :param layer_attr: Extra Layer Attribute.
+    :type layer_attr: ExtraLayerAttribute
+    :param name: layer name.
+    :type name: basestring
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+    if pad_c is not None:
+        assert isinstance(pad_c, collections.Sequence) and len(pad_c) == 2
+    else:
+        pad_c = [0, 0]
+
+    if pad_h is not None:
+        assert isinstance(pad_h, collections.Sequence) and len(pad_h) == 2
+    else:
+        pad_h = [0, 0]
+
+    if pad_w is not None:
+        assert isinstance(pad_w, collections.Sequence) and len(pad_w) == 2
+    else:
+        pad_w = [0, 0]
+
+    assert input.num_filters is not None
+    in_ch = input.num_filters
+    out_ch = in_ch + pad_c[0] + pad_c[1]
+
+    l = Layer(
+        name=name,
+        type=LayerType.PAD_LAYER,
+        inputs=Input(
+            input.name,
+            pad=Pad(
+                channels=in_ch,
+                pad_c=pad_c,
+                pad_h=pad_h,
+                pad_w=pad_w, )),
+        **ExtraLayerAttribute.to_kwargs(layer_attr))
+    return LayerOutput(
+        name,
+        layer_type=LayerType.PAD_LAYER,
+        parents=[input],
+        num_filters=out_ch,
+        size=l.config.size)
+
+
 @wrap_name_default()
 @layer_support()
 def conv_shift_layer(a, b, name=None, layer_attr=None):
@@ -3605,13 +3975,13 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
 
     .. code-block:: python
 
-       conv_shift = conv_shift_layer(input=[layer1, layer2])
+       conv_shift = conv_shift_layer(a=layer1, b=layer2)
 
     :param name: layer name
     :type name: basestring
     :param a: Input layer a.
     :type a: LayerOutput
-    :param b: input layer b
+    :param b: input layer b.
     :type b: LayerOutput
     :param layer_attr: layer's extra attribute.
     :type layer_attr: ExtraLayerAttribute
@@ -3703,8 +4073,8 @@ def tensor_layer(a,
 @wrap_act_default()
 @layer_support()
 def selective_fc_layer(input,
-                       select,
                        size,
+                       select=None,
                        act=None,
                        name=None,
                        pass_generation=False,
@@ -3731,6 +4101,7 @@ def selective_fc_layer(input,
     :type input: LayerOutput|list|tuple
     :param select: The select layer. The output of select layer should be a
                    sparse binary matrix, and treat as the mask of selective fc.
+                   If is None, acts exactly like fc_layer.
     :type select: LayerOutput
     :param size: The layer dimension.
     :type size: int
@@ -3959,7 +4330,7 @@ def block_expand_layer(input,
 
     .. code-block:: python
 
-       block_expand = block_expand_layer(input,
+       block_expand = block_expand_layer(input=layer,
                                          num_channels=128,
                                          stride_x=1,
                                          stride_y=1,
@@ -4013,13 +4384,7 @@ def block_expand_layer(input,
 
 @wrap_name_default()
 @layer_support()
-def maxout_layer(input,
-                 groups,
-                 num_channels=None,
-                 size_x=None,
-                 size_y=None,
-                 name=None,
-                 layer_attr=None):
+def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
     """
     A layer to do max out on conv layer output.
       - Input: output of a conv layer.
@@ -4049,12 +4414,6 @@ def maxout_layer(input,
     :type num_channels: int|None
     :param groups: The group number of input layer.
     :type groups: int
-    :param size_x: conv output width. If None will be set
-                   automatically from previous output.
-    :type size_x: int|None
-    :param size_y: conv output height. If None will be set
-                   automatically from previous output.
-    :type size_y: int|None
     :param name: The name of this layer, which can not specify.
     :type name: None|basestring.
     :param layer_attr: Extra Layer attribute.
@@ -4175,7 +4534,7 @@ def warp_ctc_layer(input,
         - You can set 'blank' to any value ranged in [0, num_classes], which
           should be consistent as that used in your labels.
         - As a native 'softmax' activation is interated to the warp-ctc library,
-         'linear' activation is expected instead in the 'input' layer.
+          'linear' activation is expected instead in the 'input' layer.
 
     The simple usage:
 
@@ -4308,6 +4667,13 @@ def crf_decoding_layer(input,
     this layer will also calculate error. output.value[i] is 1 for incorrect
     decoding or 0 for correct decoding.
 
+    The simple usage:
+
+    .. code-block:: python
+
+      crf_decoding = crf_decoding_layer(input=input,
+                                        size=label_dim)
+
     :param input: The first input layer.
     :type input: LayerOutput
     :param size: size of this layer.
@@ -4616,6 +4982,7 @@ def cross_entropy_with_selfnorm(input,
                                 layer_attr=None):
     """
     A loss layer for multi class entropy with selfnorm.
+    Input should be a vector of positive numbers, without normalization.
 
     .. code-block:: python
 
diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py
old mode 100644
new mode 100755
index 375bea34e8aa0ac2ea222531f313a627414495b0..cadde11ff81658cb309cd1bf7a44bac6374c1e44
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
@@ -737,12 +737,12 @@ def lstmemory_group(input,
                     lstm_layer_attr=None,
                     get_output_layer_attr=None):
     """
-    lstm_group is a recurrent layer group version Long Short Term Memory. It
+    lstm_group is a recurrent layer group version of Long Short Term Memory. It
     does exactly the same calculation as the lstmemory layer (see lstmemory in
     layers.py for the maths) does. A promising benefit is that LSTM memory
-    cell states, or hidden states in every time step are accessible to for the
+    cell states, or hidden states in every time step are accessible to the
     user. This is especially useful in attention model. If you do not need to
-    access to the internal states of the lstm, but merely use its outputs,
+    access the internal states of the lstm, but merely use its outputs,
     it is recommended to use the lstmemory, which is relatively faster than
     lstmemory_group.
 
@@ -822,6 +822,7 @@ def gru_unit(input,
              size=None,
              name=None,
              gru_bias_attr=None,
+             gru_param_attr=None,
              act=None,
              gate_act=None,
              gru_layer_attr=None):
@@ -862,6 +863,7 @@ def gru_unit(input,
         output_mem=out_mem,
         size=size,
         bias_attr=gru_bias_attr,
+        param_attr=gru_param_attr,
         act=act,
         gate_act=gate_act,
         layer_attr=gru_layer_attr)
@@ -874,15 +876,16 @@ def gru_group(input,
               name=None,
               reverse=False,
               gru_bias_attr=None,
+              gru_param_attr=None,
               act=None,
               gate_act=None,
               gru_layer_attr=None):
     """
-    gru_group is a recurrent layer group version Gated Recurrent Unit. It
+    gru_group is a recurrent layer group version of Gated Recurrent Unit. It
     does exactly the same calculation as the grumemory layer does. A promising
-    benefit is that gru hidden sates are accessible to for the user. This is
-    especially useful in attention model. If you do not need to access to
-    any internal state, but merely use the outputs of a GRU, it is recommanded
+    benefit is that gru hidden states are accessible to the user. This is
+    especially useful in attention model. If you do not need to access
+    any internal state, but merely use the outputs of a GRU, it is recommended
     to use the grumemory, which is relatively faster.
 
     Please see grumemory in layers.py for more detail about the maths.
@@ -922,6 +925,7 @@ def gru_group(input,
             name=name,
             size=size,
             gru_bias_attr=gru_bias_attr,
+            gru_param_attr=gru_param_attr,
             act=act,
             gate_act=gate_act,
             gru_layer_attr=gru_layer_attr)
@@ -942,6 +946,7 @@ def simple_gru(input,
                mixed_bias_param_attr=None,
                mixed_layer_attr=None,
                gru_bias_attr=None,
+               gru_param_attr=None,
                act=None,
                gate_act=None,
                gru_layer_attr=None):
@@ -952,22 +957,22 @@ def simple_gru(input,
     use one complete layer to implement rnn (including simple rnn, gru and lstm)
     with multiple time steps, such as recurrent_layer, lstmemory, grumemory. But,
     the multiplication operation :math:`W x_t` is not computed in these layers.
-    See details in their interfaces in layers.py. 
+    See details in their interfaces in layers.py.
     The other implementation is to use an recurrent group which can ensemble a
     series of layers to compute rnn step by step. This way is flexible for
     attenion mechanism or other complex connections.
 
     - gru_step_layer: only compute rnn by one step. It needs an memory as input
       and can be used in recurrent group.
-    - gru_unit: a wrapper of gru_step_layer with memory. 
+    - gru_unit: a wrapper of gru_step_layer with memory.
     - gru_group: a GRU cell implemented by a combination of multiple layers in
       recurrent group.
-      But :math:`W x_t` is not done in group.  
+      But :math:`W x_t` is not done in group.
     - gru_memory: a GRU cell implemented by one layer, which does same calculation
-      with gru_group and is faster than gru_group. 
-    - simple_gru: a complete GRU implementation inlcuding :math:`W x_t` and 
+      with gru_group and is faster than gru_group.
+    - simple_gru: a complete GRU implementation inlcuding :math:`W x_t` and
       gru_group. :math:`W` contains :math:`W_r`, :math:`W_z` and :math:`W`, see
-      formula in grumemory. 
+      formula in grumemory.
 
     The computational speed is that, grumemory is relatively better than
     gru_group, and gru_group is relatively better than simple_gru.
@@ -1010,6 +1015,7 @@ def simple_gru(input,
         input=m,
         reverse=reverse,
         gru_bias_attr=gru_bias_attr,
+        gru_param_attr=gru_param_attr,
         act=act,
         gate_act=gate_act,
         gru_layer_attr=gru_layer_attr)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
index 3f1d99701afe5425553feb129c7619b5e3e689fa..c9178e3c6a46a2d663ec368569e529e780b76a6f 100755
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@@ -3,7 +3,8 @@ export configs=(test_fc layer_activations projections test_print_layer
 test_sequence_pooling test_lstmemory_layer test_grumemory_layer
 last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
 img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
-test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight
-test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops)
+test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
+test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
+test_seq_concat_reshape)
 
 export whole_configs=(test_split_datasource)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..b6905824f0cb090375a38ff67e39fc626df0b2f6
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
@@ -0,0 +1,295 @@
+type: "recurrent_nn"
+layers {
+  name: "data_a"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "data_b"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__simple_gru_0___transform"
+  type: "mixed"
+  size: 600
+  active_type: ""
+  inputs {
+    input_layer_name: "data_a"
+    input_parameter_name: "mixed_param"
+    proj_conf {
+      type: "fc"
+      name: "___simple_gru_0___transform.w0"
+      input_size: 100
+      output_size: 600
+    }
+  }
+}
+layers {
+  name: "__simple_gru_0___recurrent_group"
+  type: "recurrent_layer_group"
+  active_type: ""
+}
+layers {
+  name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
+  type: "scatter_agent"
+  size: 600
+  active_type: ""
+}
+layers {
+  name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
+  type: "agent"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
+  type: "gru_step"
+  size: 200
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
+    input_parameter_name: "gru_param"
+  }
+  inputs {
+    input_layer_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
+  }
+  bias_parameter_name: "gru_bias"
+  active_gate_type: "sigmoid"
+}
+layers {
+  name: "__simple_gru_0__"
+  type: "gather_agent"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "__simple_gru_1___transform"
+  type: "mixed"
+  size: 600
+  active_type: ""
+  inputs {
+    input_layer_name: "data_b"
+    input_parameter_name: "mixed_param"
+    proj_conf {
+      type: "fc"
+      name: "___simple_gru_1___transform.w0"
+      input_size: 100
+      output_size: 600
+    }
+  }
+}
+layers {
+  name: "__simple_gru_1___recurrent_group"
+  type: "recurrent_layer_group"
+  active_type: ""
+}
+layers {
+  name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
+  type: "scatter_agent"
+  size: 600
+  active_type: ""
+}
+layers {
+  name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
+  type: "agent"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
+  type: "gru_step"
+  size: 200
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
+    input_parameter_name: "gru_param"
+  }
+  inputs {
+    input_layer_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
+  }
+  bias_parameter_name: "gru_bias"
+  active_gate_type: "sigmoid"
+}
+layers {
+  name: "__simple_gru_1__"
+  type: "gather_agent"
+  size: 200
+  active_type: ""
+}
+layers {
+  name: "__last_seq_0__"
+  type: "seqlastins"
+  size: 200
+  active_type: "linear"
+  inputs {
+    input_layer_name: "__simple_gru_0__"
+  }
+  trans_type: "non-seq"
+}
+layers {
+  name: "__last_seq_1__"
+  type: "seqlastins"
+  size: 200
+  active_type: "linear"
+  inputs {
+    input_layer_name: "__simple_gru_1__"
+  }
+  trans_type: "non-seq"
+}
+layers {
+  name: "__fc_layer_0__"
+  type: "fc"
+  size: 10
+  active_type: "softmax"
+  inputs {
+    input_layer_name: "__last_seq_0__"
+    input_parameter_name: "softmax_param"
+  }
+  inputs {
+    input_layer_name: "__last_seq_1__"
+    input_parameter_name: "softmax_param"
+  }
+}
+layers {
+  name: "label"
+  type: "data"
+  size: 10
+  active_type: ""
+}
+layers {
+  name: "__cost_0__"
+  type: "multi-class-cross-entropy"
+  size: 1
+  active_type: ""
+  inputs {
+    input_layer_name: "__fc_layer_0__"
+  }
+  inputs {
+    input_layer_name: "label"
+  }
+  coeff: 1.0
+}
+parameters {
+  name: "mixed_param"
+  size: 60000
+  initial_mean: 0.0
+  initial_std: 0.1
+  dims: 100
+  dims: 600
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "gru_param"
+  size: 120000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 600
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "gru_bias"
+  size: 600
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 600
+  initial_strategy: 0
+  initial_smart: false
+}
+parameters {
+  name: "softmax_param"
+  size: 2000
+  initial_mean: 0.0
+  initial_std: 0.0707106781187
+  dims: 200
+  dims: 10
+  initial_strategy: 0
+  initial_smart: true
+}
+input_layer_names: "data_a"
+input_layer_names: "data_b"
+input_layer_names: "label"
+output_layer_names: "__cost_0__"
+evaluators {
+  name: "classification_error_evaluator"
+  type: "classification_error"
+  input_layers: "__fc_layer_0__"
+  input_layers: "label"
+}
+sub_models {
+  name: "root"
+  layer_names: "data_a"
+  layer_names: "data_b"
+  layer_names: "__simple_gru_0___transform"
+  layer_names: "__simple_gru_0___recurrent_group"
+  layer_names: "__simple_gru_0__"
+  layer_names: "__simple_gru_1___transform"
+  layer_names: "__simple_gru_1___recurrent_group"
+  layer_names: "__simple_gru_1__"
+  layer_names: "__last_seq_0__"
+  layer_names: "__last_seq_1__"
+  layer_names: "__fc_layer_0__"
+  layer_names: "label"
+  layer_names: "__cost_0__"
+  input_layer_names: "data_a"
+  input_layer_names: "data_b"
+  input_layer_names: "label"
+  output_layer_names: "__cost_0__"
+  evaluator_names: "classification_error_evaluator"
+  is_recurrent_layer_group: false
+}
+sub_models {
+  name: "__simple_gru_0___recurrent_group"
+  layer_names: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
+  layer_names: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
+  layer_names: "__simple_gru_0__@__simple_gru_0___recurrent_group"
+  is_recurrent_layer_group: true
+  reversed: false
+  memories {
+    layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
+    link_name: "__simple_gru_0__+delay1@__simple_gru_0___recurrent_group"
+    is_sequence: false
+  }
+  in_links {
+    layer_name: "__simple_gru_0___transform"
+    link_name: "__simple_gru_0___transform@__simple_gru_0___recurrent_group"
+    has_subseq: false
+  }
+  out_links {
+    layer_name: "__simple_gru_0__@__simple_gru_0___recurrent_group"
+    link_name: "__simple_gru_0__"
+    has_subseq: false
+  }
+  target_inlinkid: -1
+}
+sub_models {
+  name: "__simple_gru_1___recurrent_group"
+  layer_names: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
+  layer_names: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
+  layer_names: "__simple_gru_1__@__simple_gru_1___recurrent_group"
+  is_recurrent_layer_group: true
+  reversed: false
+  memories {
+    layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
+    link_name: "__simple_gru_1__+delay1@__simple_gru_1___recurrent_group"
+    is_sequence: false
+  }
+  in_links {
+    layer_name: "__simple_gru_1___transform"
+    link_name: "__simple_gru_1___transform@__simple_gru_1___recurrent_group"
+    has_subseq: false
+  }
+  out_links {
+    layer_name: "__simple_gru_1__@__simple_gru_1___recurrent_group"
+    link_name: "__simple_gru_1__"
+    has_subseq: false
+  }
+  target_inlinkid: -1
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
index 41d2e2f2671f5c05425f9bd2e91d8adc33129761..3e9d28416ed5066461e960f0a9f085e057c28346 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
@@ -465,11 +465,11 @@ parameters {
   name: "___gru_group_0__@__gru_group_0___recurrent_group.w0"
   size: 30000
   initial_mean: 0.0
-  initial_std: 0.01
+  initial_std: 0.1
   dims: 100
   dims: 300
   initial_strategy: 0
-  initial_smart: false
+  initial_smart: true
 }
 parameters {
   name: "___gru_group_0__@__gru_group_0___recurrent_group.wbias"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr
new file mode 100644
index 0000000000000000000000000000000000000000..91284b4fb32fcfdbf6b9e7384ffe080574b78821
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr
@@ -0,0 +1,51 @@
+type: "nn"
+layers {
+  name: "data1"
+  type: "data"
+  size: 30
+  active_type: ""
+}
+layers {
+  name: "data2"
+  type: "data"
+  size: 30
+  active_type: ""
+}
+layers {
+  name: "__seqconcat_0__"
+  type: "seqconcat"
+  size: 30
+  active_type: ""
+  inputs {
+    input_layer_name: "data1"
+  }
+  inputs {
+    input_layer_name: "data2"
+  }
+}
+layers {
+  name: "__seqreshape_0__"
+  type: "seqreshape"
+  size: 5
+  active_type: "linear"
+  inputs {
+    input_layer_name: "data1"
+  }
+}
+input_layer_names: "data1"
+input_layer_names: "data2"
+output_layer_names: "__seqconcat_0__"
+output_layer_names: "__seqreshape_0__"
+sub_models {
+  name: "root"
+  layer_names: "data1"
+  layer_names: "data2"
+  layer_names: "__seqconcat_0__"
+  layer_names: "__seqreshape_0__"
+  input_layer_names: "data1"
+  input_layer_names: "data2"
+  output_layer_names: "__seqconcat_0__"
+  output_layer_names: "__seqreshape_0__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr
index 1cfb92255aa92fa3fbc16a816851a5c2f81c2b56..569b0b945a762e8b596e197adc06df64e33311af 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr
@@ -19,7 +19,7 @@ model_config {
 data_config {
   type: "py2"
   files: "train.list"
-  async_load_data: true
+  async_load_data: false
   for_test: false
   load_data_module: "a"
   load_data_object: "c"
@@ -58,7 +58,7 @@ opt_config {
 test_data_config {
   type: "py2"
   files: "test.list"
-  async_load_data: true
+  async_load_data: false
   for_test: true
   load_data_module: "b"
   load_data_object: "d"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py b/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
new file mode 100644
index 0000000000000000000000000000000000000000..c19bb9685aa24c4d66e4f0bbbcb004507413dbe8
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
@@ -0,0 +1,40 @@
+from paddle.trainer_config_helpers import *
+
+settings(learning_rate=1e-4, batch_size=1000)
+
+data_1 = data_layer(name='data_a', size=100)
+data_2 = data_layer(name='data_b', size=100)
+
+mixed_param = ParamAttr(name='mixed_param')
+
+gru_param = ParamAttr(name='gru_param')
+gru_bias = ParamAttr(name='gru_bias', initial_mean=0., initial_std=0.)
+
+gru1 = simple_gru(
+    input=data_1,
+    size=200,
+    mixed_param_attr=mixed_param,
+    mixed_bias_param_attr=False,
+    gru_bias_attr=gru_bias,
+    gru_param_attr=gru_param)
+
+gru2 = simple_gru(
+    input=data_2,
+    size=200,
+    mixed_param_attr=mixed_param,
+    mixed_bias_param_attr=False,
+    gru_bias_attr=gru_bias,
+    gru_param_attr=gru_param)
+
+softmax_param = ParamAttr(name='softmax_param')
+
+predict = fc_layer(
+    input=[last_seq(input=gru1), last_seq(input=gru2)],
+    size=10,
+    param_attr=[softmax_param, softmax_param],
+    bias_attr=False,
+    act=SoftmaxActivation())
+outputs(
+    classification_cost(
+        input=predict, label=data_layer(
+            name='label', size=10)))
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_pad.py b/python/paddle/trainer_config_helpers/tests/configs/test_pad.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb5f13410dbbbaeea9e28c271d33a15fb3000dcf
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_pad.py
@@ -0,0 +1,21 @@
+from paddle.trainer_config_helpers import *
+
+settings(batch_size=1000, learning_rate=1e-5)
+
+data = data_layer(name='data', size=2304, height=48, width=42)
+
+conv = img_conv_layer(
+    input=data,
+    filter_size=3,
+    num_channels=1,
+    num_filters=16,
+    padding=1,
+    act=LinearActivation(),
+    bias_attr=True)
+
+pool = img_pool_layer(
+    input=conv, num_channels=8, pool_size=2, stride=2, pool_type=MaxPooling())
+
+pad = pad_layer(input=pool, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1])
+
+outputs(pad)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c161ba805fb301e8feb8702ad61a8341df40e3f
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py
@@ -0,0 +1,12 @@
+from paddle.trainer_config_helpers import *
+
+settings(batch_size=1000, learning_rate=1e-5)
+
+din1 = data_layer(name='data1', size=30)
+din2 = data_layer(name='data2', size=30)
+
+opts = []
+opts.append(seq_concat_layer(a=din1, b=din2))
+opts.append(seq_reshape_layer(input=din1, reshape_size=5))
+
+outputs(opts)
diff --git a/python/paddle/trainer_config_helpers/tests/layers_test_config.py b/python/paddle/trainer_config_helpers/tests/layers_test_config.py
index ae275735aa2b852b3b226a4a0e5b2d4d000ba199..e6cd35ee761d1acd0b5c1943554c7ea1de6a13f5 100644
--- a/python/paddle/trainer_config_helpers/tests/layers_test_config.py
+++ b/python/paddle/trainer_config_helpers/tests/layers_test_config.py
@@ -39,6 +39,7 @@ z1 = mixed_layer(
 assert z1.size > 0
 
 y2 = fc_layer(input=y, size=15)
+z2 = rotate_layer(input=y2, height=5, width=3)
 
 cos1 = cos_sim(a=x1, b=y1)
 cos3 = cos_sim(a=x1, b=y2, size=3)
@@ -46,7 +47,7 @@ cos3 = cos_sim(a=x1, b=y2, size=3)
 linear_comb = linear_comb_layer(weights=x1, vectors=y2, size=3)
 
 out = fc_layer(
-    input=[cos1, cos3, linear_comb, z, z1],
+    input=[cos1, cos3, linear_comb, z, z1, z2],
     size=num_classes,
     act=SoftmaxActivation())
 
diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..25526bf409cf82f26979a84700ce948ac969df0c
--- /dev/null
+++ b/python/paddle/v2/__init__.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import optimizer
+import layer
+import activation
+import parameters
+import trainer
+import event
+import data_type
+import topology
+import data_feeder
+import networks
+from . import dataset
+from . import reader
+import attr
+import pooling
+import inference
+import networks
+import py_paddle.swig_paddle as api
+import minibatch
+
+__all__ = [
+    'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer',
+    'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader',
+    'topology', 'networks', 'infer'
+]
+
+
+def init(**kwargs):
+    args = []
+    for key in kwargs.keys():
+        args.append('--%s=%s' % (key, str(kwargs[key])))
+
+    api.initPaddle(*args)
+
+
+infer = inference.infer
+batch = minibatch.batch
diff --git a/python/paddle/v2/activation.py b/python/paddle/v2/activation.py
new file mode 100644
index 0000000000000000000000000000000000000000..21261a178203b633ca6cf59a5fc89edc24a868b9
--- /dev/null
+++ b/python/paddle/v2/activation.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.trainer_config_helpers.activations
+import copy
+
+__all__ = []
+
+suffix = 'Activation'
+for act in paddle.trainer_config_helpers.activations.__all__:
+    new_name = act[:-len(suffix)]
+    globals()[new_name] = copy.copy(
+        getattr(paddle.trainer_config_helpers.activations, act))
+    globals()[new_name].__name__ = new_name
+    __all__.append(new_name)
diff --git a/python/paddle/v2/attr.py b/python/paddle/v2/attr.py
new file mode 100644
index 0000000000000000000000000000000000000000..32f78614e7f8abe7cffdc7a50a9fa77f1fc1a780
--- /dev/null
+++ b/python/paddle/v2/attr.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.trainer_config_helpers.attrs
+
+__all__ = [
+    "Param",
+    "Extra",
+]
+
+Param = paddle.trainer_config_helpers.attrs.ParameterAttribute
+Extra = paddle.trainer_config_helpers.attrs.ExtraLayerAttribute
+
+for each in paddle.trainer_config_helpers.attrs.__all__:
+    globals()[each] = getattr(paddle.trainer_config_helpers.attrs, each)
+    __all__.append(each)
diff --git a/python/paddle/v2/config_base.py b/python/paddle/v2/config_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ec1d7bbdf912b940ca4b8e7b20eb11310f0e74f
--- /dev/null
+++ b/python/paddle/v2/config_base.py
@@ -0,0 +1,154 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+import re
+from paddle.trainer_config_helpers.default_decorators import wrap_name_default
+import paddle.trainer_config_helpers as conf_helps
+
+
+class LayerType(type):
+    def __new__(cls, name, bases, attrs):
+        method_name = attrs.get('METHOD_NAME', None)
+        if method_name is not None:
+            method = getattr(conf_helps, method_name)
+            if method.__doc__ is not None:
+                mapper = attrs.get("__map_docstr__", None)
+                if mapper is not None:
+                    attrs['__doc__'] = LayerType.__map_docstr__(
+                        mapper(method.__doc__),
+                        method_name=method_name,
+                        name=name)
+                else:
+                    attrs['__doc__'] = LayerType.__map_docstr__(
+                        method.__doc__, method_name=method_name, name=name)
+        return super(LayerType, cls).__new__(cls, name, bases, attrs)
+
+    @staticmethod
+    def __map_docstr__(doc, name, method_name):
+        assert isinstance(doc, basestring)
+
+        # replace LayerOutput to paddle.v2.config_base.Layer
+        doc = doc.replace("LayerOutput", "paddle.v2.config_base.Layer")
+
+        doc = doc.replace('ParameterAttribute',
+                          'paddle.v2.attr.ParameterAttribute')
+
+        doc = re.sub(r'ExtraLayerAttribute[^\s]?',
+                     'paddle.v2.attr.ExtraAttribute', doc)
+
+        # xxx_layer to xxx
+        doc = re.sub(r"(?P[a-z]+)_layer", r"\g", doc)
+
+        # XxxxActivation to paddle.v2.Activation.Xxxx
+        doc = re.sub(r"(?P[A-Z][a-zA-Z]+)Activation",
+                     r"paddle.v2.Activation.\g", doc)
+
+        # TODO(yuyang18): Add more rules if needed.
+        return doc
+
+
+class Layer(object):
+    __metaclass__ = LayerType
+
+    def __init__(self, name=None, parent_layers=None):
+        assert isinstance(parent_layers, dict)
+        self.name = name
+        self.__contex__ = {}
+        self.__parent_layers__ = parent_layers
+
+    def to_proto(self, context):
+        """
+        function to set proto attribute
+        """
+        kwargs = dict()
+        for layer_name in self.__parent_layers__:
+            if not isinstance(self.__parent_layers__[layer_name],
+                              collections.Sequence):
+                v1_layer = self.__parent_layers__[layer_name].to_proto(
+                    context=context)
+            else:
+                v1_layer = map(lambda x: x.to_proto(context=context),
+                               self.__parent_layers__[layer_name])
+            kwargs[layer_name] = v1_layer
+
+        if self.context_name() is None:
+            return self.to_proto_impl(**kwargs)
+        elif self.context_name() not in context:
+            context[self.context_name()] = self.to_proto_impl(**kwargs)
+        self.__contex__ = context
+        if self.use_context_name():
+            return context[self.context_name()]
+        else:
+            return context[self.name]
+
+    def to_proto_impl(self, **kwargs):
+        raise NotImplementedError()
+
+    def context_name(self):
+        """
+        Context name means the context which stores `to_proto_impl` result.
+        If multiple layer share same context_name, the `to_proto_impl` of them
+        will be invoked only once.
+        """
+        return self.name
+
+    def use_context_name(self):
+        return False
+
+    def calculate_size(self):
+        """
+        lazy calculate size of the layer, should be called when to_proto_impl of
+        this layer is called.
+        :return:
+        """
+        return self.__contex__[self.context_name()].size
+
+
+def __convert_to_v2__(method_name, parent_names, is_default_name=True):
+    if is_default_name:
+        wrapper = wrap_name_default(name_prefix=method_name)
+    else:
+        wrapper = None
+
+    class V2LayerImpl(Layer):
+        METHOD_NAME = method_name
+
+        def __init__(self, **kwargs):
+            parent_layers = dict()
+            other_kwargs = dict()
+            for pname in parent_names:
+                if kwargs.has_key(pname):
+                    parent_layers[pname] = kwargs[pname]
+
+            for key in kwargs.keys():
+                if key not in parent_names:
+                    other_kwargs[key] = kwargs[key]
+
+            name = kwargs.get('name', None)
+            super(V2LayerImpl, self).__init__(name, parent_layers)
+            self.__other_kwargs__ = other_kwargs
+
+        if wrapper is not None:
+            __init__ = wrapper(__init__)
+
+        def to_proto_impl(self, **kwargs):
+            args = dict()
+            for each in kwargs:
+                args[each] = kwargs[each]
+            for each in self.__other_kwargs__:
+                args[each] = self.__other_kwargs__[each]
+            return getattr(conf_helps, method_name)(**args)
+
+    return V2LayerImpl
diff --git a/python/paddle/v2/data_feeder.py b/python/paddle/v2/data_feeder.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7465238be8138e47913b7bd3f1c669ed9653958
--- /dev/null
+++ b/python/paddle/v2/data_feeder.py
@@ -0,0 +1,97 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from py_paddle import DataProviderConverter
+
+import data_type
+
+__all__ = ['DataFeeder']
+
+
+class DataFeeder(DataProviderConverter):
+    """
+    DataFeeder converts the data returned by paddle.reader into a data structure
+    of Arguments which is defined in the API. The paddle.reader usually returns
+    a list of mini-batch data entries. Each data entry in the list is one sample.
+    Each sample is a list or a tuple with one feature or multiple features.
+    DataFeeder converts this mini-batch data entries into Arguments in order
+    to feed it to C++ interface.
+    
+    The example usage:
+
+
+    ..  code-block:: python
+
+        data_types = [('image', paddle.data_type.dense_vector(784)),
+                      ('label', paddle.data_type.integer_value(10))]
+        reader_dict = {'image':0, 'label':1}
+        feeder = DataFeeder(data_types=data_types, reader_dict=reader_dict)
+        minibatch_data = [
+                           ( [1.0,2.0,3.0,4.0], 5, [6,7,8] ),  # first sample
+                           ( [1.0,2.0,3.0,4.0], 5, [6,7,8] )   # second sample
+                         ]
+        # or minibatch_data = [
+        #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ],  # first sample
+        #                       [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ]   # second sample
+        #                     ]
+        arg = feeder(minibatch_data)
+
+    ..  note::
+
+        This module is for internal use only. Users should use the `reader`
+        interface.
+
+
+
+    :param data_types: A list to specify data name and type. Each item is
+                       a tuple of (data_name, data_type).
+
+    :type data_types: list
+    :param reader_dict: A dictionary to specify the position of each data
+                        in the input data.
+    :type reader_dict: dict
+    """
+
+    def __init__(self, data_types, reader_dict):
+        self.input_names = []
+        input_types = []
+        self.reader_dict = reader_dict
+        for each in data_types:
+            self.input_names.append(each[0])
+            assert isinstance(each[1], data_type.InputType)
+            input_types.append(each[1])
+        DataProviderConverter.__init__(self, input_types)
+
+    def convert(self, dat, argument=None):
+        """
+        :param dat: A list of mini-batch data. Each sample is a list or tuple
+                    one feature or multiple features.
+
+        :type dat: list
+        :param argument: An Arguments object contains this mini-batch data with
+                         one or multiple features. The Arguments definition is
+                         in the API.
+        :type argument: py_paddle.swig_paddle.Arguments
+        """
+
+        def reorder_data(data):
+            retv = []
+            for each in data:
+                reorder = []
+                for name in self.input_names:
+                    reorder.append(each[self.reader_dict[name]])
+                retv.append(reorder)
+            return retv
+
+        return DataProviderConverter.convert(self, reorder_data(dat), argument)
diff --git a/python/paddle/v2/data_type.py b/python/paddle/v2/data_type.py
new file mode 100644
index 0000000000000000000000000000000000000000..d582f76ddf01ed3430a1d075624bbb8e0bf3f2a9
--- /dev/null
+++ b/python/paddle/v2/data_type.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.trainer.PyDataProvider2 as pydp2
+
+import_list = [
+    nm for nm in dir(pydp2)
+    if '_' in nm and nm[0] != '_' and ('value' in nm or 'vector' in nm)
+]
+import_list.extend(['InputType'])
+
+for nm in import_list:
+    globals()[nm] = getattr(pydp2, nm)
+
+__all__ = import_list
diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..80ff6295c34e853d8f69b9e78719af23a56d1fbb
--- /dev/null
+++ b/python/paddle/v2/dataset/__init__.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Dataset package.
+"""
+
+import mnist
+import imikolov
+import imdb
+import cifar
+import movielens
+import conll05
+import uci_housing
+import sentiment
+import wmt14
+
+__all__ = [
+    'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment'
+    'uci_housing', 'wmt14'
+]
diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9f7a830ee60a331b55a1e218923e690103e1c5b
--- /dev/null
+++ b/python/paddle/v2/dataset/cifar.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
+
+TODO(yuyang18): Complete the comments.
+"""
+
+import cPickle
+import itertools
+import numpy
+import paddle.v2.dataset.common
+import tarfile
+
+__all__ = ['train100', 'test100', 'train10', 'test10']
+
+URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/'
+CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz'
+CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a'
+CIFAR100_URL = URL_PREFIX + 'cifar-100-python.tar.gz'
+CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85'
+
+
+def reader_creator(filename, sub_name):
+    def read_batch(batch):
+        data = batch['data']
+        labels = batch.get('labels', batch.get('fine_labels', None))
+        assert labels is not None
+        for sample, label in itertools.izip(data, labels):
+            yield (sample / 255.0).astype(numpy.float32), int(label)
+
+    def reader():
+        with tarfile.open(filename, mode='r') as f:
+            names = (each_item.name for each_item in f
+                     if sub_name in each_item.name)
+
+            for name in names:
+                batch = cPickle.load(f.extractfile(name))
+                for item in read_batch(batch):
+                    yield item
+
+    return reader
+
+
+def train100():
+    return reader_creator(
+        paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
+        'train')
+
+
+def test100():
+    return reader_creator(
+        paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
+        'test')
+
+
+def train10():
+    return reader_creator(
+        paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
+        'data_batch')
+
+
+def test10():
+    return reader_creator(
+        paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5),
+        'test_batch')
diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..3021b68ddb02ecaa874e21681796c0912ad4cc06
--- /dev/null
+++ b/python/paddle/v2/dataset/common.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import requests
+import hashlib
+import os
+import shutil
+import sys
+
+__all__ = ['DATA_HOME', 'download', 'md5file']
+
+DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
+
+if not os.path.exists(DATA_HOME):
+    os.makedirs(DATA_HOME)
+
+
+def md5file(fname):
+    hash_md5 = hashlib.md5()
+    f = open(fname, "rb")
+    for chunk in iter(lambda: f.read(4096), b""):
+        hash_md5.update(chunk)
+    f.close()
+    return hash_md5.hexdigest()
+
+
+def download(url, module_name, md5sum):
+    dirname = os.path.join(DATA_HOME, module_name)
+    if not os.path.exists(dirname):
+        os.makedirs(dirname)
+
+    filename = os.path.join(dirname, url.split('/')[-1])
+    if not (os.path.exists(filename) and md5file(filename) == md5sum):
+        print "Cache file %s not found, downloading %s" % (filename, url)
+        r = requests.get(url, stream=True)
+        total_length = r.headers.get('content-length')
+
+        if total_length is None:
+            with open(filename, 'w') as f:
+                shutil.copyfileobj(r.raw, f)
+        else:
+            with open(filename, 'w') as f:
+                dl = 0
+                total_length = int(total_length)
+                for data in r.iter_content(chunk_size=4096):
+                    dl += len(data)
+                    f.write(data)
+                    done = int(50 * dl / total_length)
+                    sys.stdout.write("\r[%s%s]" % ('=' * done,
+                                                   ' ' * (50 - done)))
+                    sys.stdout.flush()
+
+    return filename
+
+
+def dict_add(a_dict, ele):
+    if ele in a_dict:
+        a_dict[ele] += 1
+    else:
+        a_dict[ele] = 1
diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py
new file mode 100644
index 0000000000000000000000000000000000000000..9eab49ee39325c1c60fc511e0bd834e83aa987f0
--- /dev/null
+++ b/python/paddle/v2/dataset/conll05.py
@@ -0,0 +1,198 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tarfile
+import gzip
+import itertools
+from common import download
+"""
+Conll 2005 dataset.  Paddle semantic role labeling Book and demo use this
+dataset as an example. Because Conll 2005 is not free in public, the default
+downloaded URL is test set of Conll 2005 (which is public). Users can change
+URL and MD5 to their Conll dataset.
+
+TODO(yuyang18): Complete comments.
+"""
+
+__all__ = ['test, get_dict', 'get_embedding']
+
+DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
+DATA_MD5 = '387719152ae52d60422c016e92a742fc'
+WORDDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt'
+WORDDICT_MD5 = 'ea7fb7d4c75cc6254716f0177a506baa'
+VERBDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt'
+VERBDICT_MD5 = '0d2977293bbb6cbefab5b0f97db1e77c'
+TRGDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt'
+TRGDICT_MD5 = 'd8c7f03ceb5fc2e5a0fa7503a4353751'
+EMB_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb'
+EMB_MD5 = 'bf436eb0faa1f6f9103017f8be57cdb7'
+
+UNK_IDX = 0
+
+
+def load_dict(filename):
+    d = dict()
+    with open(filename, 'r') as f:
+        for i, line in enumerate(f):
+            d[line.strip()] = i
+    return d
+
+
+def corpus_reader(data_path, words_name, props_name):
+    """
+    Read one corpus. It returns an iterator. Each element of
+    this iterator is a tuple including sentence and labels. The sentence is
+    consist of a list of word IDs. The labels include a list of label IDs.
+    :return: a iterator of data.
+    :rtype: iterator
+    """
+
+    def reader():
+        tf = tarfile.open(data_path)
+        wf = tf.extractfile(words_name)
+        pf = tf.extractfile(props_name)
+        with gzip.GzipFile(fileobj=wf) as words_file, gzip.GzipFile(
+                fileobj=pf) as props_file:
+            sentences = []
+            labels = []
+            one_seg = []
+            for word, label in itertools.izip(words_file, props_file):
+                word = word.strip()
+                label = label.strip().split()
+
+                if len(label) == 0:  # end of sentence
+                    for i in xrange(len(one_seg[0])):
+                        a_kind_lable = [x[i] for x in one_seg]
+                        labels.append(a_kind_lable)
+
+                    if len(labels) >= 1:
+                        verb_list = []
+                        for x in labels[0]:
+                            if x != '-':
+                                verb_list.append(x)
+
+                        for i, lbl in enumerate(labels[1:]):
+                            cur_tag = 'O'
+                            is_in_bracket = False
+                            lbl_seq = []
+                            verb_word = ''
+                            for l in lbl:
+                                if l == '*' and is_in_bracket == False:
+                                    lbl_seq.append('O')
+                                elif l == '*' and is_in_bracket == True:
+                                    lbl_seq.append('I-' + cur_tag)
+                                elif l == '*)':
+                                    lbl_seq.append('I-' + cur_tag)
+                                    is_in_bracket = False
+                                elif l.find('(') != -1 and l.find(')') != -1:
+                                    cur_tag = l[1:l.find('*')]
+                                    lbl_seq.append('B-' + cur_tag)
+                                    is_in_bracket = False
+                                elif l.find('(') != -1 and l.find(')') == -1:
+                                    cur_tag = l[1:l.find('*')]
+                                    lbl_seq.append('B-' + cur_tag)
+                                    is_in_bracket = True
+                                else:
+                                    raise RuntimeError('Unexpected label: %s' %
+                                                       l)
+
+                            yield sentences, verb_list[i], lbl_seq
+
+                    sentences = []
+                    labels = []
+                    one_seg = []
+                else:
+                    sentences.append(word)
+                    one_seg.append(label)
+
+        pf.close()
+        wf.close()
+        tf.close()
+
+    return reader
+
+
+def reader_creator(corpus_reader,
+                   word_dict=None,
+                   predicate_dict=None,
+                   label_dict=None):
+    def reader():
+        for sentence, predicate, labels in corpus_reader():
+
+            sen_len = len(sentence)
+
+            verb_index = labels.index('B-V')
+            mark = [0] * len(labels)
+            if verb_index > 0:
+                mark[verb_index - 1] = 1
+                ctx_n1 = sentence[verb_index - 1]
+            else:
+                ctx_n1 = 'bos'
+
+            if verb_index > 1:
+                mark[verb_index - 2] = 1
+                ctx_n2 = sentence[verb_index - 2]
+            else:
+                ctx_n2 = 'bos'
+
+            mark[verb_index] = 1
+            ctx_0 = sentence[verb_index]
+
+            if verb_index < len(labels) - 1:
+                mark[verb_index + 1] = 1
+                ctx_p1 = sentence[verb_index + 1]
+            else:
+                ctx_p1 = 'eos'
+
+            if verb_index < len(labels) - 2:
+                mark[verb_index + 2] = 1
+                ctx_p2 = sentence[verb_index + 2]
+            else:
+                ctx_p2 = 'eos'
+
+            word_idx = [word_dict.get(w, UNK_IDX) for w in sentence]
+
+            ctx_n2_idx = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len
+            ctx_n1_idx = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len
+            ctx_0_idx = [word_dict.get(ctx_0, UNK_IDX)] * sen_len
+            ctx_p1_idx = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len
+            ctx_p2_idx = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len
+
+            pred_idx = [predicate_dict.get(predicate)] * sen_len
+            label_idx = [label_dict.get(w) for w in labels]
+
+            yield word_idx, ctx_n2_idx, ctx_n1_idx, \
+              ctx_0_idx, ctx_p1_idx, ctx_p2_idx, pred_idx, mark, label_idx
+
+    return reader
+
+
+def get_dict():
+    word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5))
+    verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5))
+    label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5))
+    return word_dict, verb_dict, label_dict
+
+
+def get_embedding():
+    return download(EMB_URL, 'conll05st', EMB_MD5)
+
+
+def test():
+    word_dict, verb_dict, label_dict = get_dict()
+    reader = corpus_reader(
+        download(DATA_URL, 'conll05st', DATA_MD5),
+        words_name='conll05st-release/test.wsj/words/test.wsj.words.gz',
+        props_name='conll05st-release/test.wsj/props/test.wsj.props.gz')
+    return reader_creator(reader, word_dict, verb_dict, label_dict)
diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py
new file mode 100644
index 0000000000000000000000000000000000000000..76019d9f54020ff6f02c17eb6047cbd014a8ccf2
--- /dev/null
+++ b/python/paddle/v2/dataset/imdb.py
@@ -0,0 +1,125 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
+
+TODO(yuyang18): Complete comments.
+"""
+
+import paddle.v2.dataset.common
+import tarfile
+import Queue
+import re
+import string
+import threading
+
+__all__ = ['build_dict', 'train', 'test']
+
+URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz'
+MD5 = '7c2ac02c03563afcf9b574c7e56c153a'
+
+
+# Read files that match pattern.  Tokenize and yield each file.
+def tokenize(pattern):
+    with tarfile.open(paddle.v2.dataset.common.download(URL, 'imdb',
+                                                        MD5)) as tarf:
+        # Note that we should use tarfile.next(), which does
+        # sequential access of member files, other than
+        # tarfile.extractfile, which does random access and might
+        # destroy hard disks.
+        tf = tarf.next()
+        while tf != None:
+            if bool(pattern.match(tf.name)):
+                # newline and punctuations removal and ad-hoc tokenization.
+                yield tarf.extractfile(tf).read().rstrip("\n\r").translate(
+                    None, string.punctuation).lower().split()
+            tf = tarf.next()
+
+
+def build_dict(pattern, cutoff):
+    word_freq = {}
+    for doc in tokenize(pattern):
+        for word in doc:
+            paddle.v2.dataset.common.dict_add(word_freq, word)
+
+    # Not sure if we should prune less-frequent words here.
+    word_freq = filter(lambda x: x[1] > cutoff, word_freq.items())
+
+    dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0]))
+    words, _ = list(zip(*dictionary))
+    word_idx = dict(zip(words, xrange(len(words))))
+    word_idx[''] = len(words)
+    return word_idx
+
+
+def reader_creator(pos_pattern, neg_pattern, word_idx, buffer_size):
+    UNK = word_idx['']
+
+    qs = [Queue.Queue(maxsize=buffer_size), Queue.Queue(maxsize=buffer_size)]
+
+    def load(pattern, queue):
+        for doc in tokenize(pattern):
+            queue.put(doc)
+        queue.put(None)
+
+    def reader():
+        # Creates two threads that loads positive and negative samples
+        # into qs.
+        t0 = threading.Thread(
+            target=load, args=(
+                pos_pattern,
+                qs[0], ))
+        t0.daemon = True
+        t0.start()
+
+        t1 = threading.Thread(
+            target=load, args=(
+                neg_pattern,
+                qs[1], ))
+        t1.daemon = True
+        t1.start()
+
+        # Read alternatively from qs[0] and qs[1].
+        i = 0
+        doc = qs[i].get()
+        while doc != None:
+            yield [word_idx.get(w, UNK) for w in doc], i % 2
+            i += 1
+            doc = qs[i % 2].get()
+
+        # If any queue is empty, reads from the other queue.
+        i += 1
+        doc = qs[i % 2].get()
+        while doc != None:
+            yield [word_idx.get(w, UNK) for w in doc], i % 2
+            doc = qs[i % 2].get()
+
+    return reader()
+
+
+def train(word_idx):
+    return reader_creator(
+        re.compile("aclImdb/train/pos/.*\.txt$"),
+        re.compile("aclImdb/train/neg/.*\.txt$"), word_idx, 1000)
+
+
+def test(word_idx):
+    return reader_creator(
+        re.compile("aclImdb/test/pos/.*\.txt$"),
+        re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000)
+
+
+def word_dict():
+    return build_dict(
+        re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)
diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py
new file mode 100644
index 0000000000000000000000000000000000000000..97c160f111d09d61eb860c7f02552e635f2400a7
--- /dev/null
+++ b/python/paddle/v2/dataset/imikolov.py
@@ -0,0 +1,91 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
+
+Complete comments.
+"""
+import paddle.v2.dataset.common
+import tarfile
+
+__all__ = ['train', 'test', 'build_dict']
+
+URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
+MD5 = '30177ea32e27c525793142b6bf2c8e2d'
+
+
+def word_count(f, word_freq=None):
+    add = paddle.v2.dataset.common.dict_add
+    if word_freq == None:
+        word_freq = {}
+
+    for l in f:
+        for w in l.strip().split():
+            add(word_freq, w)
+        add(word_freq, '')
+        add(word_freq, '')
+
+    return word_freq
+
+
+def build_dict():
+    train_filename = './simple-examples/data/ptb.train.txt'
+    test_filename = './simple-examples/data/ptb.valid.txt'
+    with tarfile.open(
+            paddle.v2.dataset.common.download(
+                paddle.v2.dataset.imikolov.URL, 'imikolov',
+                paddle.v2.dataset.imikolov.MD5)) as tf:
+        trainf = tf.extractfile(train_filename)
+        testf = tf.extractfile(test_filename)
+        word_freq = word_count(testf, word_count(trainf))
+        if '' in word_freq:
+            # remove  for now, since we will set it as last index
+            del word_freq['']
+
+        TYPO_FREQ = 50
+        word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items())
+
+        word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
+        words, _ = list(zip(*word_freq_sorted))
+        word_idx = dict(zip(words, xrange(len(words))))
+        word_idx[''] = len(words)
+
+    return word_idx
+
+
+def reader_creator(filename, word_idx, n):
+    def reader():
+        with tarfile.open(
+                paddle.v2.dataset.common.download(
+                    paddle.v2.dataset.imikolov.URL, 'imikolov',
+                    paddle.v2.dataset.imikolov.MD5)) as tf:
+            f = tf.extractfile(filename)
+
+            UNK = word_idx['']
+            for l in f:
+                l = [''] + l.strip().split() + ['']
+                if len(l) >= n:
+                    l = [word_idx.get(w, UNK) for w in l]
+                    for i in range(n, len(l) + 1):
+                        yield tuple(l[i - n:i])
+
+    return reader
+
+
+def train(word_idx, n):
+    return reader_creator('./simple-examples/data/ptb.train.txt', word_idx, n)
+
+
+def test(word_idx, n):
+    return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n)
diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py
new file mode 100644
index 0000000000000000000000000000000000000000..16f2fcb99de4cb1971a7375a97b5daa209ee95ef
--- /dev/null
+++ b/python/paddle/v2/dataset/mnist.py
@@ -0,0 +1,108 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+MNIST dataset.
+
+This module will download dataset from http://yann.lecun.com/exdb/mnist/ and
+parse train set and test set into paddle reader creators.
+"""
+import paddle.v2.dataset.common
+import subprocess
+import numpy
+import platform
+__all__ = ['train', 'test']
+
+URL_PREFIX = 'http://yann.lecun.com/exdb/mnist/'
+TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz'
+TEST_IMAGE_MD5 = '9fb629c4189551a2d022fa330f9573f3'
+TEST_LABEL_URL = URL_PREFIX + 't10k-labels-idx1-ubyte.gz'
+TEST_LABEL_MD5 = 'ec29112dd5afa0611ce80d1b7f02629c'
+TRAIN_IMAGE_URL = URL_PREFIX + 'train-images-idx3-ubyte.gz'
+TRAIN_IMAGE_MD5 = 'f68b3c2dcbeaaa9fbdd348bbdeb94873'
+TRAIN_LABEL_URL = URL_PREFIX + 'train-labels-idx1-ubyte.gz'
+TRAIN_LABEL_MD5 = 'd53e105ee54ea40749a09fcbcd1e9432'
+
+
+def reader_creator(image_filename, label_filename, buffer_size):
+    def reader():
+        if platform.system() == 'Darwin':
+            zcat_cmd = 'gzcat'
+        elif platform.system() == 'Linux':
+            zcat_cmd = 'zcat'
+        else:
+            raise NotImplementedError()
+
+        # According to http://stackoverflow.com/a/38061619/724872, we
+        # cannot use standard package gzip here.
+        m = subprocess.Popen([zcat_cmd, image_filename], stdout=subprocess.PIPE)
+        m.stdout.read(16)  # skip some magic bytes
+
+        l = subprocess.Popen([zcat_cmd, label_filename], stdout=subprocess.PIPE)
+        l.stdout.read(8)  # skip some magic bytes
+
+        try:  # reader could be break.
+            while True:
+                labels = numpy.fromfile(
+                    l.stdout, 'ubyte', count=buffer_size).astype("int")
+
+                if labels.size != buffer_size:
+                    break  # numpy.fromfile returns empty slice after EOF.
+
+                images = numpy.fromfile(
+                    m.stdout, 'ubyte', count=buffer_size * 28 * 28).reshape(
+                        (buffer_size, 28 * 28)).astype('float32')
+
+                images = images / 255.0 * 2.0 - 1.0
+
+                for i in xrange(buffer_size):
+                    yield images[i, :], int(labels[i])
+        finally:
+            m.terminate()
+            l.terminate()
+
+    return reader
+
+
+def train():
+    """
+    MNIST train set creator.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Train reader creator
+    :rtype: callable
+    """
+    return reader_creator(
+        paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist',
+                                          TRAIN_IMAGE_MD5),
+        paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist',
+                                          TRAIN_LABEL_MD5), 100)
+
+
+def test():
+    """
+    MNIST test set cretor.
+
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+
+    :return: Test reader creator.
+    :rtype: callable
+    """
+    return reader_creator(
+        paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist',
+                                          TEST_IMAGE_MD5),
+        paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist',
+                                          TEST_LABEL_MD5), 100)
diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc65e8f8b6f04b078a3449c622478095086cecbe
--- /dev/null
+++ b/python/paddle/v2/dataset/movielens.py
@@ -0,0 +1,139 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Movielens 1-M dataset.
+
+TODO(yuyang18): Complete comments.
+"""
+
+import zipfile
+from common import download
+import re
+import random
+import functools
+
+__all__ = ['train_creator', 'test_creator']
+
+
+class MovieInfo(object):
+    def __init__(self, index, categories, title):
+        self.index = int(index)
+        self.categories = categories
+        self.title = title
+
+    def value(self):
+        return [
+            self.index, [CATEGORIES_DICT[c] for c in self.categories],
+            [MOVIE_TITLE_DICT[w.lower()] for w in self.title.split()]
+        ]
+
+
+class UserInfo(object):
+    def __init__(self, index, gender, age, job_id):
+        self.index = int(index)
+        self.is_male = gender == 'M'
+        self.age = [1, 18, 25, 35, 45, 50, 56].index(int(age))
+        self.job_id = int(job_id)
+
+    def value(self):
+        return [self.index, 0 if self.is_male else 1, self.age, self.job_id]
+
+
+MOVIE_INFO = None
+MOVIE_TITLE_DICT = None
+CATEGORIES_DICT = None
+USER_INFO = None
+
+
+def __initialize_meta_info__():
+    fn = download(
+        url='http://files.grouplens.org/datasets/movielens/ml-1m.zip',
+        md5='c4d9eecfca2ab87c1945afe126590906')
+    global MOVIE_INFO
+    if MOVIE_INFO is None:
+        pattern = re.compile(r'^(.*)\((\d+)\)$')
+        with zipfile.ZipFile(file=fn) as package:
+            for info in package.infolist():
+                assert isinstance(info, zipfile.ZipInfo)
+                MOVIE_INFO = dict()
+                title_word_set = set()
+                categories_set = set()
+                with package.open('ml-1m/movies.dat') as movie_file:
+                    for i, line in enumerate(movie_file):
+                        movie_id, title, categories = line.strip().split('::')
+                        categories = categories.split('|')
+                        for c in categories:
+                            categories_set.add(c)
+                        title = pattern.match(title).group(1)
+                        MOVIE_INFO[int(movie_id)] = MovieInfo(
+                            index=movie_id, categories=categories, title=title)
+                        for w in title.split():
+                            title_word_set.add(w.lower())
+
+                global MOVIE_TITLE_DICT
+                MOVIE_TITLE_DICT = dict()
+                for i, w in enumerate(title_word_set):
+                    MOVIE_TITLE_DICT[w] = i
+
+                global CATEGORIES_DICT
+                CATEGORIES_DICT = dict()
+                for i, c in enumerate(categories_set):
+                    CATEGORIES_DICT[c] = i
+
+                global USER_INFO
+                USER_INFO = dict()
+                with package.open('ml-1m/users.dat') as user_file:
+                    for line in user_file:
+                        uid, gender, age, job, _ = line.strip().split("::")
+                        USER_INFO[int(uid)] = UserInfo(
+                            index=uid, gender=gender, age=age, job_id=job)
+    return fn
+
+
+def __reader__(rand_seed=0, test_ratio=0.1, is_test=False):
+    fn = __initialize_meta_info__()
+    rand = random.Random(x=rand_seed)
+    with zipfile.ZipFile(file=fn) as package:
+        with package.open('ml-1m/ratings.dat') as rating:
+            for line in rating:
+                if (rand.random() < test_ratio) == is_test:
+                    uid, mov_id, rating, _ = line.strip().split("::")
+                    uid = int(uid)
+                    mov_id = int(mov_id)
+                    rating = float(rating) * 2 - 5.0
+
+                    mov = MOVIE_INFO[mov_id]
+                    usr = USER_INFO[uid]
+                    yield usr.value() + mov.value() + [[rating]]
+
+
+def __reader_creator__(**kwargs):
+    return lambda: __reader__(**kwargs)
+
+
+train_creator = functools.partial(__reader_creator__, is_test=False)
+test_creator = functools.partial(__reader_creator__, is_test=True)
+
+
+def unittest():
+    for train_count, _ in enumerate(train_creator()()):
+        pass
+    for test_count, _ in enumerate(test_creator()()):
+        pass
+
+    print train_count, test_count
+
+
+if __name__ == '__main__':
+    unittest()
diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py
new file mode 100644
index 0000000000000000000000000000000000000000..71689fd61b6b14a7b5072caff4e2fd48a7f74072
--- /dev/null
+++ b/python/paddle/v2/dataset/sentiment.py
@@ -0,0 +1,127 @@
+# /usr/bin/env python
+# -*- coding:utf-8 -*-
+
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+The script fetch and preprocess movie_reviews data set that provided by NLTK
+
+TODO(yuyang18): Complete dataset.
+"""
+
+import collections
+from itertools import chain
+
+import nltk
+from nltk.corpus import movie_reviews
+
+import common
+
+__all__ = ['train', 'test', 'get_word_dict']
+NUM_TRAINING_INSTANCES = 1600
+NUM_TOTAL_INSTANCES = 2000
+
+
+def download_data_if_not_yet():
+    """
+    Download the data set, if the data set is not download.
+    """
+    try:
+        # make sure that nltk can find the data
+        if common.DATA_HOME not in nltk.data.path:
+            nltk.data.path.append(common.DATA_HOME)
+        movie_reviews.categories()
+    except LookupError:
+        print "Downloading movie_reviews data set, please wait....."
+        nltk.download('movie_reviews', download_dir=common.DATA_HOME)
+        print "Download data set success....."
+        print "Path is " + nltk.data.find('corpora/movie_reviews').path
+
+
+def get_word_dict():
+    """
+    Sorted the words by the frequency of words which occur in sample
+    :return:
+        words_freq_sorted
+    """
+    words_freq_sorted = list()
+    word_freq_dict = collections.defaultdict(int)
+    download_data_if_not_yet()
+
+    for category in movie_reviews.categories():
+        for field in movie_reviews.fileids(category):
+            for words in movie_reviews.words(field):
+                word_freq_dict[words] += 1
+    words_sort_list = word_freq_dict.items()
+    words_sort_list.sort(cmp=lambda a, b: b[1] - a[1])
+    for index, word in enumerate(words_sort_list):
+        words_freq_sorted.append((word[0], index))
+    return words_freq_sorted
+
+
+def sort_files():
+    """
+    Sorted the sample for cross reading the sample
+    :return:
+        files_list
+    """
+    files_list = list()
+    neg_file_list = movie_reviews.fileids('neg')
+    pos_file_list = movie_reviews.fileids('pos')
+    files_list = list(chain.from_iterable(zip(neg_file_list, pos_file_list)))
+    return files_list
+
+
+def load_sentiment_data():
+    """
+    Load the data set
+    :return:
+        data_set
+    """
+    data_set = list()
+    download_data_if_not_yet()
+    words_ids = dict(get_word_dict())
+    for sample_file in sort_files():
+        words_list = list()
+        category = 0 if 'neg' in sample_file else 1
+        for word in movie_reviews.words(sample_file):
+            words_list.append(words_ids[word.lower()])
+        data_set.append((words_list, category))
+    return data_set
+
+
+def reader_creator(data):
+    """
+    Reader creator, generate an iterator for data set
+    :param data:
+        train data set or test data set
+    """
+    for each in data:
+        yield each[0], each[1]
+
+
+def train():
+    """
+    Default train set reader creator
+    """
+    data_set = load_sentiment_data()
+    return reader_creator(data_set[0:NUM_TRAINING_INSTANCES])
+
+
+def test():
+    """
+    Default test set reader creator
+    """
+    data_set = load_sentiment_data()
+    return reader_creator(data_set[NUM_TRAINING_INSTANCES:])
diff --git a/python/paddle/v2/dataset/tests/cifar_test.py b/python/paddle/v2/dataset/tests/cifar_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0e18229da7818be5752ee592e094a00da286ad9
--- /dev/null
+++ b/python/paddle/v2/dataset/tests/cifar_test.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.v2.dataset.cifar
+import unittest
+
+
+class TestCIFAR(unittest.TestCase):
+    def check_reader(self, reader):
+        sum = 0
+        label = 0
+        for l in reader():
+            self.assertEqual(l[0].size, 3072)
+            if l[1] > label:
+                label = l[1]
+            sum += 1
+        return sum, label
+
+    def test_test10(self):
+        instances, max_label_value = self.check_reader(
+            paddle.v2.dataset.cifar.test10())
+        self.assertEqual(instances, 10000)
+        self.assertEqual(max_label_value, 9)
+
+    def test_train10(self):
+        instances, max_label_value = self.check_reader(
+            paddle.v2.dataset.cifar.train10())
+        self.assertEqual(instances, 50000)
+        self.assertEqual(max_label_value, 9)
+
+    def test_test100(self):
+        instances, max_label_value = self.check_reader(
+            paddle.v2.dataset.cifar.test100())
+        self.assertEqual(instances, 10000)
+        self.assertEqual(max_label_value, 99)
+
+    def test_train100(self):
+        instances, max_label_value = self.check_reader(
+            paddle.v2.dataset.cifar.train100())
+        self.assertEqual(instances, 50000)
+        self.assertEqual(max_label_value, 99)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/dataset/tests/common_test.py b/python/paddle/v2/dataset/tests/common_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..5babcef0eb4345d243904877d323c37d4889a643
--- /dev/null
+++ b/python/paddle/v2/dataset/tests/common_test.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.v2.dataset.common
+import unittest
+import tempfile
+
+
+class TestCommon(unittest.TestCase):
+    def test_md5file(self):
+        _, temp_path = tempfile.mkstemp()
+        with open(temp_path, 'w') as f:
+            f.write("Hello\n")
+        self.assertEqual('09f7e02f1290be211da707a266f153b3',
+                         paddle.v2.dataset.common.md5file(temp_path))
+
+    def test_download(self):
+        yi_avatar = 'https://avatars0.githubusercontent.com/u/1548775?v=3&s=460'
+        self.assertEqual(
+            paddle.v2.dataset.common.DATA_HOME + '/test/1548775?v=3&s=460',
+            paddle.v2.dataset.common.download(
+                yi_avatar, 'test', 'f75287202d6622414c706c36c16f8e0d'))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/dataset/tests/imdb_test.py b/python/paddle/v2/dataset/tests/imdb_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4d82f26895d77d05c6e936bd636b1239e1a0cd8
--- /dev/null
+++ b/python/paddle/v2/dataset/tests/imdb_test.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.v2.dataset.imdb
+import unittest
+import re
+
+TRAIN_POS_PATTERN = re.compile("aclImdb/train/pos/.*\.txt$")
+TRAIN_NEG_PATTERN = re.compile("aclImdb/train/neg/.*\.txt$")
+TRAIN_PATTERN = re.compile("aclImdb/train/.*\.txt$")
+
+TEST_POS_PATTERN = re.compile("aclImdb/test/pos/.*\.txt$")
+TEST_NEG_PATTERN = re.compile("aclImdb/test/neg/.*\.txt$")
+TEST_PATTERN = re.compile("aclImdb/test/.*\.txt$")
+
+
+class TestIMDB(unittest.TestCase):
+    word_idx = None
+
+    def test_build_dict(self):
+        if self.word_idx == None:
+            self.word_idx = paddle.v2.dataset.imdb.build_dict(TRAIN_PATTERN,
+                                                              150)
+
+        self.assertEqual(len(self.word_idx), 7036)
+
+    def check_dataset(self, dataset, expected_size):
+        if self.word_idx == None:
+            self.word_idx = paddle.v2.dataset.imdb.build_dict(TRAIN_PATTERN,
+                                                              150)
+
+        sum = 0
+        for l in dataset(self.word_idx):
+            self.assertEqual(l[1], sum % 2)
+            sum += 1
+        self.assertEqual(sum, expected_size)
+
+    def test_train(self):
+        self.check_dataset(paddle.v2.dataset.imdb.train, 25000)
+
+    def test_test(self):
+        self.check_dataset(paddle.v2.dataset.imdb.test, 25000)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/dataset/tests/imikolov_test.py b/python/paddle/v2/dataset/tests/imikolov_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..009e55243a594e5e235c36fb0223ec70754d17f3
--- /dev/null
+++ b/python/paddle/v2/dataset/tests/imikolov_test.py
@@ -0,0 +1,26 @@
+import paddle.v2.dataset.imikolov
+import unittest
+
+WORD_DICT = paddle.v2.dataset.imikolov.build_dict()
+
+
+class TestMikolov(unittest.TestCase):
+    def check_reader(self, reader, n):
+        for l in reader():
+            self.assertEqual(len(l), n)
+
+    def test_train(self):
+        n = 5
+        self.check_reader(paddle.v2.dataset.imikolov.train(WORD_DICT, n), n)
+
+    def test_test(self):
+        n = 5
+        self.check_reader(paddle.v2.dataset.imikolov.test(WORD_DICT, n), n)
+
+    def test_total(self):
+        _, idx = zip(*WORD_DICT.items())
+        self.assertEqual(sorted(idx)[-1], len(WORD_DICT) - 1)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/dataset/tests/mnist_test.py b/python/paddle/v2/dataset/tests/mnist_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d344cac3e7483a351033570fbec75a4d19f4a55
--- /dev/null
+++ b/python/paddle/v2/dataset/tests/mnist_test.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.v2.dataset.mnist
+import unittest
+
+
+class TestMNIST(unittest.TestCase):
+    def check_reader(self, reader):
+        sum = 0
+        label = 0
+        for l in reader():
+            self.assertEqual(l[0].size, 784)
+            if l[1] > label:
+                label = l[1]
+            sum += 1
+        return sum, label
+
+    def test_train(self):
+        instances, max_label_value = self.check_reader(
+            paddle.v2.dataset.mnist.train())
+        self.assertEqual(instances, 60000)
+        self.assertEqual(max_label_value, 9)
+
+    def test_test(self):
+        instances, max_label_value = self.check_reader(
+            paddle.v2.dataset.mnist.test())
+        self.assertEqual(instances, 10000)
+        self.assertEqual(max_label_value, 9)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/dataset/tests/test_sentiment.py b/python/paddle/v2/dataset/tests/test_sentiment.py
new file mode 100644
index 0000000000000000000000000000000000000000..407405290734609059c1767600748d530e8a13a6
--- /dev/null
+++ b/python/paddle/v2/dataset/tests/test_sentiment.py
@@ -0,0 +1,55 @@
+# /usr/bin/env python
+# -*- coding:utf-8 -*-
+
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import nltk
+import paddle.v2.dataset.sentiment as st
+from nltk.corpus import movie_reviews
+
+
+class TestSentimentMethods(unittest.TestCase):
+    def test_get_word_dict(self):
+        word_dict = st.get_word_dict()[0:10]
+        test_word_list = [(u',', 0), (u'the', 1), (u'.', 2), (u'a', 3),
+                          (u'and', 4), (u'of', 5), (u'to', 6), (u"'", 7),
+                          (u'is', 8), (u'in', 9)]
+        for idx, each in enumerate(word_dict):
+            self.assertEqual(each, test_word_list[idx])
+        self.assertTrue("/root/.cache/paddle/dataset" in nltk.data.path)
+
+    def test_sort_files(self):
+        last_label = ''
+        for sample_file in st.sort_files():
+            current_label = sample_file.split("/")[0]
+            self.assertNotEqual(current_label, last_label)
+            last_label = current_label
+
+    def test_data_set(self):
+        data_set = st.load_sentiment_data()
+        last_label = -1
+        for each in st.test():
+            self.assertNotEqual(each[1], last_label)
+            last_label = each[1]
+        self.assertEqual(len(data_set), st.NUM_TOTAL_INSTANCES)
+        self.assertEqual(len(list(st.train())), st.NUM_TRAINING_INSTANCES)
+        self.assertEqual(
+            len(list(st.test())),
+            (st.NUM_TOTAL_INSTANCES - st.NUM_TRAINING_INSTANCES))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py
new file mode 100644
index 0000000000000000000000000000000000000000..27f454b137e3a40febd19cf085e2f4034cc16b24
--- /dev/null
+++ b/python/paddle/v2/dataset/uci_housing.py
@@ -0,0 +1,91 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+UCI Housing dataset.
+
+TODO(yuyang18): Complete comments.
+"""
+
+import numpy as np
+import os
+from common import download
+
+__all__ = ['train', 'test']
+
+URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data'
+MD5 = 'd4accdce7a25600298819f8e28e8d593'
+feature_names = [
+    'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
+    'PTRATIO', 'B', 'LSTAT'
+]
+
+UCI_TRAIN_DATA = None
+UCI_TEST_DATA = None
+
+
+def feature_range(maximums, minimums):
+    import matplotlib
+    matplotlib.use('Agg')
+    import matplotlib.pyplot as plt
+    fig, ax = plt.subplots()
+    feature_num = len(maximums)
+    ax.bar(range(feature_num), maximums - minimums, color='r', align='center')
+    ax.set_title('feature scale')
+    plt.xticks(range(feature_num), feature_names)
+    plt.xlim([-1, feature_num])
+    fig.set_figheight(6)
+    fig.set_figwidth(10)
+    if not os.path.exists('./image'):
+        os.makedirs('./image')
+    fig.savefig('image/ranges.png', dpi=48)
+    plt.close(fig)
+
+
+def load_data(filename, feature_num=14, ratio=0.8):
+    global UCI_TRAIN_DATA, UCI_TEST_DATA
+    if UCI_TRAIN_DATA is not None and UCI_TEST_DATA is not None:
+        return
+
+    data = np.fromfile(filename, sep=' ')
+    data = data.reshape(data.shape[0] / feature_num, feature_num)
+    maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum(
+        axis=0) / data.shape[0]
+    feature_range(maximums[:-1], minimums[:-1])
+    for i in xrange(feature_num - 1):
+        data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])
+    offset = int(data.shape[0] * ratio)
+    UCI_TRAIN_DATA = data[:offset]
+    UCI_TEST_DATA = data[offset:]
+
+
+def train():
+    global UCI_TRAIN_DATA
+    load_data(download(URL, 'uci_housing', MD5))
+
+    def reader():
+        for d in UCI_TRAIN_DATA:
+            yield d[:-1], d[-1:]
+
+    return reader
+
+
+def test():
+    global UCI_TEST_DATA
+    load_data(download(URL, 'uci_housing', MD5))
+
+    def reader():
+        for d in UCI_TEST_DATA:
+            yield d[:-1], d[-1:]
+
+    return reader
diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py
new file mode 100644
index 0000000000000000000000000000000000000000..9904848b5d3ef95dc331fc0ba1a98f29f8b1dfeb
--- /dev/null
+++ b/python/paddle/v2/dataset/wmt14.py
@@ -0,0 +1,142 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+wmt14 dataset
+"""
+import paddle.v2.dataset.common
+import tarfile
+import os.path
+import itertools
+
+__all__ = ['train', 'test', 'build_dict']
+
+URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
+MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
+URL_TRAIN = 'http://localhost:8000/train.tgz'
+MD5_TRAIN = '72de99da2830ea5a3a2c4eb36092bbc7'
+
+
+def word_count(f, word_freq=None):
+    add = paddle.v2.dataset.common.dict_add
+    if word_freq == None:
+        word_freq = {}
+
+    for l in f:
+        for w in l.strip().split():
+            add(word_freq, w)
+        add(word_freq, '')
+        add(word_freq, '')
+
+    return word_freq
+
+
+def get_word_dix(word_freq):
+    TYPO_FREQ = 50
+    word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items())
+    word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
+    words, _ = list(zip(*word_freq_sorted))
+    word_idx = dict(zip(words, xrange(len(words))))
+    word_idx[''] = len(words)
+    return word_idx
+
+
+def get_word_freq(train, dev):
+    word_freq = word_count(train, word_count(dev))
+    if '' in word_freq:
+        # remove  for now, since we will set it as last index
+        del word_freq['']
+    return word_freq
+
+
+def build_dict():
+    base_dir = './wmt14-data'
+    train_en_filename = base_dir + '/train/train.en'
+    train_fr_filename = base_dir + '/train/train.fr'
+    dev_en_filename = base_dir + '/dev/ntst1213.en'
+    dev_fr_filename = base_dir + '/dev/ntst1213.fr'
+
+    if not os.path.exists(train_en_filename) or not os.path.exists(
+            train_fr_filename):
+        with tarfile.open(
+                paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14',
+                                                  MD5_TRAIN)) as tf:
+            tf.extractall(base_dir)
+
+    if not os.path.exists(dev_en_filename) or not os.path.exists(
+            dev_fr_filename):
+        with tarfile.open(
+                paddle.v2.dataset.common.download(URL_DEV_TEST, 'wmt14',
+                                                  MD5_DEV_TEST)) as tf:
+            tf.extractall(base_dir)
+
+    f_en = open(train_en_filename)
+    f_fr = open(train_fr_filename)
+    f_en_dev = open(dev_en_filename)
+    f_fr_dev = open(dev_fr_filename)
+
+    word_freq_en = get_word_freq(f_en, f_en_dev)
+    word_freq_fr = get_word_freq(f_fr, f_fr_dev)
+
+    f_en.close()
+    f_fr.close()
+    f_en_dev.close()
+    f_fr_dev.close()
+
+    return get_word_dix(word_freq_en), get_word_dix(word_freq_fr)
+
+
+def reader_creator(directory, path_en, path_fr, URL, MD5, dict_en, dict_fr):
+    def reader():
+        if not os.path.exists(path_en) or not os.path.exists(path_fr):
+            with tarfile.open(
+                    paddle.v2.dataset.common.download(URL, 'wmt14', MD5)) as tf:
+                tf.extractall(directory)
+
+        f_en = open(path_en)
+        f_fr = open(path_fr)
+        UNK_en = dict_en['']
+        UNK_fr = dict_fr['']
+
+        for en, fr in itertools.izip(f_en, f_fr):
+            src_ids = [dict_en.get(w, UNK_en) for w in en.strip().split()]
+            tar_ids = [
+                dict_fr.get(w, UNK_fr)
+                for w in [''] + fr.strip().split() + ['']
+            ]
+
+            # remove sequence whose length > 80 in training mode
+            if len(src_ids) == 0 or len(tar_ids) <= 1 or len(
+                    src_ids) > 80 or len(tar_ids) > 80:
+                continue
+
+            yield src_ids, tar_ids[:-1], tar_ids[1:]
+
+        f_en.close()
+        f_fr.close()
+
+    return reader
+
+
+def train(dict_en, dict_fr):
+    directory = './wmt14-data'
+    return reader_creator(directory, directory + '/train/train.en',
+                          directory + '/train/train.fr', URL_TRAIN, MD5_TRAIN,
+                          dict_en, dict_fr)
+
+
+def test(dict_en, dict_fr):
+    directory = './wmt14-data'
+    return reader_creator(directory, directory + '/dev/ntst1213.en',
+                          directory + '/dev/ntst1213.fr', URL_DEV_TEST,
+                          MD5_DEV_TEST, dict_en, dict_fr)
diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ad52b8baa411269d29732685871a875df5185cc
--- /dev/null
+++ b/python/paddle/v2/event.py
@@ -0,0 +1,84 @@
+"""
+All training events.
+
+There are:
+
+* BeginIteration
+* EndIteration
+* BeginPass
+* EndPass
+
+TODO(yuyang18): Complete it!
+"""
+import py_paddle.swig_paddle as api
+
+__all__ = [
+    'EndIteration', 'BeginIteration', 'BeginPass', 'EndPass', 'TestResult'
+]
+
+
+class WithMetric(object):
+    def __init__(self, evaluator):
+        if not isinstance(evaluator, api.Evaluator):
+            raise TypeError("Evaluator should be api.Evaluator type")
+        self.__evaluator__ = evaluator
+
+    @property
+    def metrics(self):
+        names = self.__evaluator__.getNames()
+        retv = dict()
+        for each_name in names:
+            val = self.__evaluator__.getValue(each_name)
+            retv[each_name] = val
+        return retv
+
+
+class TestResult(WithMetric):
+    """
+    Result that trainer.test return.
+    """
+
+    def __init__(self, evaluator, cost):
+        super(TestResult, self).__init__(evaluator)
+        self.cost = cost
+
+
+class BeginPass(object):
+    """
+    Event On One Pass Training Start.
+    """
+
+    def __init__(self, pass_id):
+        self.pass_id = pass_id
+
+
+class EndPass(WithMetric):
+    """
+    Event On One Pass Training Complete.
+    """
+
+    def __init__(self, pass_id, evaluator):
+        self.pass_id = pass_id
+        WithMetric.__init__(self, evaluator)
+
+
+class BeginIteration(object):
+    """
+    Event On One Batch Training Start.
+    """
+
+    def __init__(self, pass_id, batch_id):
+        self.pass_id = pass_id
+        self.batch_id = batch_id
+
+
+class EndIteration(WithMetric):
+    """
+    Event On One Batch Training Complete.
+    """
+
+    def __init__(self, pass_id, batch_id, cost, evaluator):
+        self.pass_id = pass_id
+        self.batch_id = batch_id
+        self.cost = cost
+        WithMetric.__init__(self, evaluator)
diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..476fd3fa4523a77709f68c73c73e6851e04064aa
--- /dev/null
+++ b/python/paddle/v2/inference.py
@@ -0,0 +1,59 @@
+import py_paddle.swig_paddle as api
+
+import topology
+from data_feeder import DataFeeder
+import itertools
+import numpy
+
+__all__ = ['infer']
+
+
+class Inference(object):
+    def __init__(self, output, parameters):
+        topo = topology.Topology(output)
+        gm = api.GradientMachine.createFromConfigProto(
+            topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE])
+        for param in gm.getParameters():
+            val = param.getBuf(api.PARAMETER_VALUE)
+            name = param.getName()
+            assert isinstance(val, api.Vector)
+            val.copyFromNumpyArray(parameters.get(name).flatten())
+        self.__gradient_machine__ = gm
+        self.__data_types__ = topo.data_type()
+
+    def iter_infer(self, reader, reader_dict=None):
+        if reader_dict is None:
+            reader_dict = self.default_reader_dict()
+        feeder = DataFeeder(self.__data_types__, reader_dict)
+        self.__gradient_machine__.start()
+        for data_batch in reader():
+            yield self.__gradient_machine__.forwardTest(feeder(data_batch))
+        self.__gradient_machine__.finish()
+
+    def iter_infer_field(self, field, **kwargs):
+        for result in self.iter_infer(**kwargs):
+            yield [each_result[field] for each_result in result]
+
+    def infer(self, field='value', **kwargs):
+        retv = None
+        for result in self.iter_infer_field(field=field, **kwargs):
+            if retv is None:
+                retv = [[]] * len(result)
+            for i, item in enumerate(result):
+                retv[i].append(item)
+        retv = [numpy.concatenate(out) for out in retv]
+        if len(retv) == 1:
+            return retv[0]
+        else:
+            return retv
+
+    def default_reader_dict(self):
+        reader_dict = dict()
+        for i, tp in enumerate(self.__data_types__):
+            reader_dict[tp[0]] = i
+        return reader_dict
+
+
+def infer(output, parameters, reader, reader_dict=None, field='value'):
+    inferer = Inference(output=output, parameters=parameters)
+    return inferer.infer(field=field, reader=reader, reader_dict=reader_dict)
diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e4efedde363f20fde168941adcb6e8a594b533a
--- /dev/null
+++ b/python/paddle/v2/layer.py
@@ -0,0 +1,498 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+`paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2,
+we want to make Paddle a plain Python package. The model config package defined
+the way how to configure a neural network topology in Paddle Python code.
+
+The primary usage shows below.
+
+..  code-block:: python
+
+    import paddle.v2 as paddle
+
+    img = paddle.layer.data(name='img', type=paddle.data_type.dense_vector(784))
+    hidden = paddle.layer.fc(input=img, size=200)
+    prediction = paddle.layer.fc(input=hidden, size=10,
+                                 act=paddle.activation.Softmax())
+
+    # use prediction instance where needed.
+    parameters = paddle.parameters.create(cost)
+"""
+
+import collections
+import inspect
+from config_base import Layer, __convert_to_v2__
+import paddle.trainer_config_helpers as conf_helps
+from paddle.trainer_config_helpers.config_parser_utils import \
+    parse_network_config as __parse__
+from paddle.trainer_config_helpers.default_decorators import wrap_act_default
+from paddle.trainer_config_helpers.default_decorators import \
+    wrap_bias_attr_default
+from paddle.trainer_config_helpers.default_decorators import wrap_name_default
+from paddle.trainer_config_helpers.layers import layer_support
+from paddle.trainer.config_parser import \
+    RecurrentLayerGroupWithoutOutLinksBegin, RecurrentLayerGroupSetOutLink, \
+    RecurrentLayerGroupEnd, model_type
+
+import activation
+import re
+import data_type
+
+__all__ = ['parse_network', 'data']
+
+
+def parse_network(*outputs):
+    """
+    Parse all output layers and then generate a ModelConfig object.
+
+    ..  note::
+
+        This function is used internally in paddle.v2 module. User should never
+        invoke this method.
+
+    :param outputs: Output layers.
+    :type outputs: Layer
+    :return: A ModelConfig object instance.
+    :rtype: ModelConfig
+    """
+
+    def __real_func__():
+        """
+        __real_func__ is the function that config_parser.parse invoked. It is
+        the plain old paddle configuration function.
+        """
+        context = dict()
+        real_output = [each.to_proto(context=context) for each in outputs]
+        conf_helps.outputs(real_output)
+
+    return __parse__(__real_func__)
+
+
+"""
+Some layer may need some special config, and can not use __convert_to_v2__ to convert.
+So we also need to implement some special LayerV2.
+"""
+
+
+class DataLayerV2(Layer):
+    METHOD_NAME = 'data_layer'
+
+    def __init__(self, name, type, **kwargs):
+        assert isinstance(type, data_type.InputType)
+
+        self.type = type
+        self.__method_name__ = 'data_layer'
+        self.__kwargs__ = kwargs
+
+        super(DataLayerV2, self).__init__(name=name, parent_layers=dict())
+
+    def to_proto_impl(self, **kwargs):
+        args = dict()
+        args['size'] = self.type.dim
+        for each in kwargs:
+            args[each] = kwargs[each]
+        for each in self.__kwargs__:
+            args[each] = self.__kwargs__[each]
+        return getattr(conf_helps, self.__method_name__)(name=self.name, **args)
+
+    def __map_docstr__(doc):
+        doc = re.sub(r'(data = [^\)]+)\).*',
+                     "data = paddle.layer.data(name=\"input\", "
+                     "type=paddle.data_type.dense_vector(1000))", doc)
+
+        doc = re.sub(r':param size:.*',
+                     ':param type: Data type of this data layer', doc)
+        doc = re.sub(r':type size:.*',
+                     ":type size: paddle.v2.data_type.InputType", doc)
+        return doc
+
+
+class WithExtraParent(Layer):
+    def extra_parent(self):
+        return self.__extra_parent__
+
+    def __init__(self, name=None, parent_layers=None):
+        self.__extra_parent__ = []
+        super(WithExtraParent, self).__init__(
+            name=name, parent_layers=parent_layers)
+
+    def append_extra_parent(self, parent):
+        self.__extra_parent__.append(parent)
+
+    def to_proto(self, context):
+        """
+        function to set proto attribute
+        """
+        kwargs = dict()
+        for p in self.__extra_parent__:
+            p.to_proto(context=context)
+
+        for layer_name in self.__parent_layers__:
+            if not isinstance(self.__parent_layers__[layer_name],
+                              collections.Sequence):
+                v1_layer = self.__parent_layers__[layer_name].to_proto(
+                    context=context)
+            else:
+                v1_layer = map(lambda x: x.to_proto(context=context),
+                               self.__parent_layers__[layer_name])
+            kwargs[layer_name] = v1_layer
+
+        if self.context_name() is None:
+            return self.to_proto_impl(context=context, **kwargs)
+        elif self.context_name() not in context:
+            context[self.context_name()] = self.to_proto_impl(
+                context=context, **kwargs)
+
+        if self.use_context_name():
+            return context[self.context_name()]
+        else:
+            return context[self.name]
+
+
+class MemoryV2(WithExtraParent):
+    def __init__(self, name, **kwargs):
+        self.name = name
+        super(MemoryV2, self).__init__(name=name, parent_layers=dict())
+        self.__kwargs__ = kwargs
+        self.__boot_layer_name__ = None
+        if 'boot_layer' in kwargs:
+            begin_of_current_rnn = []
+            # TODO(yuyang18): Fix inspect, it could be wrong when user invoke a
+            # function inside step.
+            st = inspect.stack()
+            for i in xrange(len(st)):
+                locs = inspect.stack()[i][0].f_locals
+                keys = locs.keys()
+                for key in keys:
+                    val = locs[key]
+                    if isinstance(val, RecurrentLayerInput):
+                        begin_of_current_rnn.append(val)
+                    elif isinstance(val, collections.Sequence):
+                        for v in val:
+                            if isinstance(v, RecurrentLayerInput):
+                                begin_of_current_rnn.append(v)
+
+                if begin_of_current_rnn:
+                    break
+            assert begin_of_current_rnn is not None
+            for extra in begin_of_current_rnn:
+                self.append_extra_parent(extra)
+                assert isinstance(extra, WithExtraParent)
+                extra.append_extra_parent(kwargs['boot_layer'])
+                self.__boot_layer_name__ = kwargs['boot_layer'].name
+
+    def to_proto_impl(self, context, **kwargs):
+        args = dict()
+        for each in kwargs:
+            args[each] = kwargs[each]
+        for each in self.__kwargs__:
+            args[each] = self.__kwargs__[each]
+
+        if self.__boot_layer_name__ is not None:
+            args['boot_layer'] = context[self.__boot_layer_name__]
+
+        size = args.get('size', None)
+        if size is not None:
+            if callable(size):
+                real_size = size()
+            else:
+                real_size = size
+            args['size'] = real_size
+        return conf_helps.memory(name=self.name, **args)
+
+    def context_name(self):
+        return self.name + "#memory"
+
+    def use_context_name(self):
+        """
+        memory layer will have the same name with some layer
+        :return:
+        """
+        return True
+
+
+class LayerOutputV2(Layer):
+    """
+    LayerOutputV2 is used to store the result of LayerOutput in v1 api.
+    It will not store it's parents because layer_output has been parsed already.
+    """
+
+    def __init__(self, layer_output):
+        assert isinstance(layer_output, conf_helps.LayerOutput)
+        self.layer_output = layer_output
+        super(LayerOutputV2, self).__init__(
+            name=layer_output.name, parent_layers=dict())
+
+    def to_proto_impl(self):
+        return self.layer_output
+
+
+class StaticInputV2(object):
+    def __init__(self, input, is_seq=False, size=None):
+        assert isinstance(input, LayerV2)
+        self.name = input.name
+        self.input = input
+        self.is_seq = is_seq
+        self.size = size
+        # TODO(add size check)
+        # assert input.size is not None or size is not None
+
+
+class MixedLayerV2(Layer):
+    """
+    This class is use to support `with` grammar. If not, the following code
+    could convert mixed_layer simply.
+
+        mixed = __convert_to_v2__(
+            'mixed_layer', name_prefix='mixed', parent_names=['input'])
+    """
+
+    class AddToSealedMixedLayerExceptionV2(Exception):
+        pass
+
+    def __init__(self,
+                 size=0,
+                 input=None,
+                 name=None,
+                 act=None,
+                 bias_attr=None,
+                 layer_attr=None):
+        self.__method_name__ = 'mixed_layer'
+        self.finalized = False
+        self.__inputs__ = []
+        if input is not None:
+            self.__inputs__ = input
+
+        other_kwargs = dict()
+        other_kwargs['name'] = name
+        other_kwargs['size'] = size
+        other_kwargs['act'] = act
+        other_kwargs['bias_attr'] = bias_attr
+        other_kwargs['layer_attr'] = layer_attr
+        parent_layers = {"input": self.__inputs__}
+        super(MixedLayerV2, self).__init__(name, parent_layers)
+        self.__other_kwargs__ = other_kwargs
+
+    def __iadd__(self, other):
+        if not self.finalized:
+            self.__inputs__.append(other)
+            return self
+        else:
+            raise MixedLayerV2.AddToSealedMixedLayerExceptionV2()
+
+    def __enter__(self):
+        assert len(self.__inputs__) == 0
+        return self
+
+    def __exit__(self, *args, **kwargs):
+        self.finalized = True
+
+    def to_proto_impl(self, **kwargs):
+        args = dict()
+        for each in kwargs:
+            args[each] = kwargs[each]
+        for each in self.__other_kwargs__:
+            args[each] = self.__other_kwargs__[each]
+        size = args.get('size', None)
+        if size is not None:
+            if callable(size):
+                real_size = size()
+            else:
+                real_size = size
+            args['size'] = real_size
+        return getattr(conf_helps, self.__method_name__)(**args)
+
+
+@wrap_name_default("mixed")
+@wrap_act_default(act=activation.Linear())
+@wrap_bias_attr_default(has_bias=False)
+@layer_support(conf_helps.layers.ERROR_CLIPPING, conf_helps.layers.DROPOUT)
+def mixed(size=0,
+          name=None,
+          input=None,
+          act=None,
+          bias_attr=False,
+          layer_attr=None):
+    return MixedLayerV2(size, input, name, act, bias_attr, layer_attr)
+
+
+class RecurrentLayerInput(WithExtraParent):
+    def __init__(self, recurrent_name, index, parent_layers):
+        assert len(parent_layers) == 1
+        self.__parents__ = parent_layers.values()[0]
+        super(RecurrentLayerInput, self).__init__(
+            name=self.__parents__[index].name, parent_layers=parent_layers)
+        self.__recurrent_name__ = recurrent_name
+
+    def context_name(self):
+        return self.__recurrent_name__ + ".begin"
+
+    def to_proto_impl(self, context, **kwargs):
+        model_type('recurrent_nn')
+        RecurrentLayerGroupWithoutOutLinksBegin(
+            name=self.__recurrent_name__,
+            in_links=map(lambda x: x.name, self.__parents__))
+        return self
+
+
+class RecurrentLayerOutput(Layer):
+    def __init__(self, recurrent_name, index, parent_layers):
+        assert len(parent_layers) == 1
+        self.__parents__ = parent_layers.values()[0]
+        super(RecurrentLayerOutput, self).__init__(
+            name=self.__parents__[index].name, parent_layers=parent_layers)
+        self.__recurrent_name__ = recurrent_name
+
+    def context_name(self):
+        return self.__recurrent_name__ + ".end"
+
+    def to_proto_impl(self, **kwargs):
+        for l in self.__parents__:
+            RecurrentLayerGroupSetOutLink(l.name)
+        RecurrentLayerGroupEnd(name=self.__recurrent_name__)
+
+
+LayerV2 = Layer
+data = DataLayerV2
+data.__name__ = 'data'
+AggregateLevel = conf_helps.layers.AggregateLevel
+ExpandLevel = conf_helps.layers.ExpandLevel
+memory = MemoryV2
+
+
+def __layer_name_mapping__(inname):
+    if inname in ['data_layer', 'memory', 'mixed_layer', 'recurrent_group']:
+        # Do Not handle these layers
+        return
+    elif inname == 'maxid_layer':
+        return 'max_id'
+    elif inname.endswith('memory') or inname.endswith(
+            '_seq') or inname.endswith('_sim') or inname == 'hsigmoid':
+        return inname
+    elif inname in [
+            'cross_entropy', 'multi_binary_label_cross_entropy',
+            'cross_entropy_with_selfnorm'
+    ]:
+        return inname + "_cost"
+    elif inname.endswith('_cost'):
+        return inname
+    elif inname.endswith("_layer"):
+        return inname[:-len("_layer")]
+
+
+def __layer_name_mapping_parent_names__(inname):
+    all_args = getattr(conf_helps, inname).argspec.args
+    return filter(
+        lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b',
+                        'expand_as',
+                        'weights', 'vectors', 'weight', 'score', 'left',
+                        'right', 'output_mem'],
+        all_args)
+
+
+def __convert_layer__(_new_name_, _old_name_, _parent_names_):
+    global __all__
+    __all__.append(_new_name_)
+    globals()[new_name] = __convert_to_v2__(_old_name_, _parent_names_)
+    globals()[new_name].__name__ = new_name
+
+
+for each_layer_name in dir(conf_helps):
+    new_name = __layer_name_mapping__(each_layer_name)
+    if new_name is not None:
+        parent_names = __layer_name_mapping_parent_names__(each_layer_name)
+        assert len(parent_names) != 0, each_layer_name
+        __convert_layer__(new_name, each_layer_name, parent_names)
+
+del parent_names
+del new_name
+del each_layer_name
+
+
+@wrap_name_default()
+def recurrent_group(step, input, name=None):
+    if not isinstance(input, collections.Sequence):
+        input = [input]
+
+    non_static_inputs = filter(lambda x: not isinstance(x, StaticInputV2),
+                               input)
+    actual_input = [
+        RecurrentLayerInput(
+            recurrent_name=name,
+            index=i,
+            parent_layers={'recurrent_inputs': non_static_inputs})
+        for i in xrange(len(non_static_inputs))
+    ]
+
+    def __real_step__(*args):
+        rnn_input = list(args)
+        static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input)
+        for static_input in static_inputs:
+            mem_name = "__%s_memory__" % static_input.input.name
+            mem = memory(
+                name=mem_name,
+                is_seq=static_input.is_seq,
+                size=static_input.input.calculate_size,
+                boot_layer=static_input.input)
+            with mixed(
+                    name=mem_name,
+                    size=static_input.input.calculate_size,
+                    act=activation.Identity()) as mix:
+                mix += identity_projection(input=mem)
+            rnn_input.insert(input.index(static_input), mix)
+        return step(*rnn_input)
+
+    actual_output = __real_step__(*actual_input)
+
+    if not isinstance(actual_output, collections.Sequence):
+        actual_output = [actual_output]
+
+    retv = [
+        RecurrentLayerOutput(
+            recurrent_name=name,
+            index=i,
+            parent_layers={'recurrent_outputs': actual_output})
+        for i in xrange(len(actual_output))
+    ]
+    if len(retv) == 1:
+        return retv[0]
+    else:
+        return retv
+
+
+__projection_names__ = filter(lambda x: x.endswith('_projection'),
+                              dir(conf_helps))
+
+__all__ += __projection_names__
+
+__operator_names__ = filter(lambda x: x.endswith('_operator'), dir(conf_helps))
+__all__ += __operator_names__
+
+# convert projection
+for prj in __projection_names__:
+    globals()[prj] = __convert_to_v2__(
+        prj, parent_names=['input'], is_default_name=False)
+    globals()[prj].__name__ = prj
+
+# convert operator
+operator_list = [
+    # [V1_method_name, parent_names],
+    ['dotmul_operator', ['a', 'b']],
+    ['conv_operator', ['img', 'filter']]
+]
+for op in operator_list:
+    globals()[op[0]] = __convert_to_v2__(
+        op[0], parent_names=op[1], is_default_name=False)
+    globals()[op[0]].__name__ = op[0]
diff --git a/python/paddle/v2/minibatch.py b/python/paddle/v2/minibatch.py
new file mode 100644
index 0000000000000000000000000000000000000000..317cf037c69f8639e3760fbfce20565127794fcb
--- /dev/null
+++ b/python/paddle/v2/minibatch.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__all__ = ['batch']
+
+
+def batch(reader, batch_size):
+    """
+    Create a batched reader.
+
+    :param reader: the data reader to read from.
+    :type reader: callable
+    :param batch_size: size of each mini-batch
+    :type batch_size: int
+    :return: the batched reader.
+    :rtype: callable
+    """
+
+    def batch_reader():
+        r = reader()
+        b = []
+        for instance in r:
+            b.append(instance)
+            if len(b) == batch_size:
+                yield b
+                b = []
+        if b:
+            yield b
+
+    return batch_reader
diff --git a/python/paddle/v2/networks.py b/python/paddle/v2/networks.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e6644196c8242cc3fed7a4fb1503697e5b59ffb
--- /dev/null
+++ b/python/paddle/v2/networks.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.trainer_config_helpers.networks as conf_nw
+import inspect
+from config_base import __convert_to_v2__
+
+__all__ = []
+
+
+def __initialize__():
+    for each_subnetwork in conf_nw.__all__:
+        if each_subnetwork in ['inputs', 'outputs']:
+            continue
+        func = getattr(conf_nw, each_subnetwork)
+        if hasattr(func, 'argspec'):
+            argspec = func.argspec
+        else:
+            argspec = inspect.getargspec(func)
+        if each_subnetwork == 'simple_attention':
+            parents = ['encoded_sequence', 'encoded_proj', 'decoder_state']
+        else:
+            parents = filter(lambda x: x.startswith('input'), argspec.args)
+        assert len(parents) != 0, each_subnetwork
+        v2_subnet = __convert_to_v2__(
+            each_subnetwork,
+            parent_names=parents,
+            is_default_name='name' in argspec.args)
+        globals()[each_subnetwork] = v2_subnet
+        globals()[each_subnetwork].__name__ = each_subnetwork
+        global __all__
+        __all__.append(each_subnetwork)
+
+
+__initialize__()
diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a01d95c205c0626374e1814a170ce2d58f23a60
--- /dev/null
+++ b/python/paddle/v2/optimizer.py
@@ -0,0 +1,112 @@
+import py_paddle.swig_paddle as swig_api
+
+import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils
+import paddle.trainer_config_helpers.optimizers as v1_optimizers
+"""
+Optimizers(update equation) for SGD method.
+
+TODO(yuyang18): Complete comments.
+"""
+
+__all__ = [
+    'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta',
+    'RMSProp', 'ModelAverage', 'L2Regularization'
+]
+
+
+class Optimizer(object):
+    def __init__(self, **kwargs):
+        if 'batch_size' in kwargs:
+            del kwargs['batch_size']  # not important for python library.
+
+        def __impl__():
+            v1_optimizers.settings(batch_size=1, **kwargs)
+
+        self.__opt_conf_proto__ = config_parser_utils.parse_optimizer_config(
+            __impl__)
+        self.__opt_conf__ = swig_api.OptimizationConfig.createFromProto(
+            self.__opt_conf_proto__)
+
+    def enable_types(self):
+        """
+        get enable_types for each optimizer.
+        enable_types = [value, gradient, momentum, etc]
+        For each optimizer(SGD, Adam), GradientMachine should enable different
+        buffers.
+        """
+        tmp = swig_api.ParameterOptimizer.create(self.__opt_conf__)
+        assert isinstance(tmp, swig_api.ParameterOptimizer)
+        return tmp.getParameterTypes()
+
+    def create_local_updater(self):
+        return swig_api.ParameterUpdater.createLocalUpdater(self.__opt_conf__)
+
+    def create_remote_updater(self, pass_num):
+        return swig_api.ParameterUpdater.createRemoteUpdater(self.__opt_conf__,
+                                                             pass_num)
+
+
+class Momentum(Optimizer):
+    def __init__(self, momentum=None, sparse=False, **kwargs):
+        learning_method = v1_optimizers.MomentumOptimizer(
+            momentum=momentum, sparse=sparse)
+        super(Momentum, self).__init__(
+            learning_method=learning_method, **kwargs)
+
+
+class Adam(Optimizer):
+    def __init__(self, beta1=0.9, beta2=0.999, epsilon=1e-8, **kwargs):
+        learning_method = v1_optimizers.AdamOptimizer(
+            beta1=beta1, beta2=beta2, epsilon=epsilon)
+        super(Adam, self).__init__(learning_method=learning_method, **kwargs)
+
+
+class Adamax(Optimizer):
+    def __init__(self, beta1=0.9, beta2=0.999, **kwargs):
+        learning_method = v1_optimizers.AdamaxOptimizer(
+            beta1=beta1, beta2=beta2)
+        super(Adamax, self).__init__(learning_method=learning_method, **kwargs)
+
+
+class AdaGrad(Optimizer):
+    def __init__(self, **kwargs):
+        learning_method = v1_optimizers.AdaGradOptimizer()
+        super(AdaGrad, self).__init__(learning_method=learning_method, **kwargs)
+
+
+class DecayedAdaGrad(Optimizer):
+    def __init__(self, rho=0.95, epsilon=1e-06, **kwargs):
+        learning_method = v1_optimizers.DecayedAdaGradOptimizer(
+            rho=rho, epsilon=epsilon)
+        super(DecayedAdaGrad, self).__init__(
+            learning_method=learning_method, **kwargs)
+
+
+class AdaDelta(Optimizer):
+    def __init__(self, rho=0.95, epsilon=1e-06, **kwargs):
+        learning_method = v1_optimizers.AdaDeltaOptimizer(
+            rho=rho, epsilon=epsilon)
+        super(AdaDelta, self).__init__(
+            learning_method=learning_method, **kwargs)
+
+
+class RMSProp(Optimizer):
+    def __init__(self, rho=0.95, epsilon=1e-6, **kwargs):
+        learning_method = v1_optimizers.RMSPropOptimizer(
+            rho=rho, epsilon=epsilon)
+        super(RMSProp, self).__init__(learning_method=learning_method, **kwargs)
+
+
+ModelAverage = v1_optimizers.ModelAverage
+L2Regularization = v1_optimizers.L2Regularization
+
+if __name__ == '__main__':
+    swig_api.initPaddle('--use_gpu=false')
+    for opt in [
+            Momentum(), Adam(), Adamax(), AdaGrad(), DecayedAdaGrad(),
+            AdaDelta(), RMSProp(), Adam(
+                model_average=ModelAverage(average_window=0.5),
+                regularization=L2Regularization(rate=0.5),
+                gradient_clipping_threshold=25)
+    ]:
+        print opt, opt.enable_types()
diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py
new file mode 100644
index 0000000000000000000000000000000000000000..05dc5c68dd97b00fb15b74564a32313430c45345
--- /dev/null
+++ b/python/paddle/v2/parameters.py
@@ -0,0 +1,336 @@
+import numpy as np
+import py_paddle.swig_paddle as api
+from paddle.proto.ParameterConfig_pb2 import ParameterConfig
+import struct
+import tarfile
+import cStringIO
+from topology import Topology
+
+__all__ = ['Parameters', 'create']
+
+
+def create(layers):
+    """
+    Create parameter pool by topology.
+
+    :param layers:
+    :return:
+    """
+    topology = Topology(layers)
+    pool = Parameters()
+    for param in topology.proto().parameters:
+        pool.__append_config__(param)
+    return pool
+
+
+class Parameters(object):
+    """
+    Parameters is a dictionary contains Paddle's parameter. The key of
+    Parameters is the name of parameter. The value of Parameters is a plain
+    :code:`numpy.ndarry` .
+
+    Basically usage is
+
+    ..  code-block:: python
+
+        data = paddle.layers.data(...)
+        ...
+        out = paddle.layers.fc(...)
+
+        parameters = paddle.parameters.create(out)
+
+        parameter_names = parameters.names()
+        fc_mat = parameters.get('fc')
+        print fc_mat
+    """
+
+    def __init__(self):
+        self.__param_conf__ = dict()
+        self.__gradient_machines__ = []
+        self.__tmp_params__ = []
+
+    def __append_config__(self, param_conf):
+        """
+        Append a parameter configuration. It used to initialize Parameters and
+        should be invoked only in paddle.parameters.create
+
+        :param param_conf: The parameter configuration in protobuf
+        :type param_conf: ParameterConfig
+        :return: Nothing
+        """
+
+        if not isinstance(param_conf, ParameterConfig):
+            raise ValueError("param_conf must be paddle.proto.ParameterConfig")
+
+        if param_conf.name in self.__param_conf__:
+            raise ValueError("duplicated parameter %s" % param_conf.name)
+
+        self.__param_conf__[param_conf.name] = param_conf
+
+    def keys(self):
+        """
+        keys are the names of each parameter.
+
+        :return: list of parameter name
+        :rtype: list
+        """
+        return self.__param_conf__.keys()
+
+    def names(self):
+        """
+        names of each parameter.
+
+        :return: list of parameter name
+        :rtype: list
+        """
+        return self.keys()
+
+    def has_key(self, key):
+        """
+        has_key return true if there are such parameter name == key
+
+        :param key: Parameter name
+        :type key: basestring
+        :return: True if contains such key
+        """
+        return key in self.__param_conf__.keys()
+
+    def __iter__(self):
+        """
+        Return an iterator of parameter name. It is used by `for loop`
+        or `in` operator.
+
+        ..  code-block:: python
+
+            parameters = paddle.parameters.create(...)
+            if "fc_param" in parameters:
+                print 'OK'
+        :return: an iterator of parameter name
+        :rtype: iterator
+        """
+        return iter(self.__param_conf__)
+
+    def __getitem__(self, key):
+        """
+        Get parameter by parameter name. It uses Python dict syntax.
+
+        :note: It will always copy the parameter from C++ side.
+        :param key: Parameter name
+        :type key: basestring
+        :return: parameter value
+        :rtype: np.ndarray
+        """
+        shape = self.get_shape(key)
+
+        if len(self.__gradient_machines__) == 0:
+            # create new parameter in python numpy.
+            if len(self.__tmp_params__) != 0:
+                ret_list = [
+                    mat for name, mat in self.__tmp_params__ if name == key
+                ]
+                if len(ret_list) == 1:
+                    return ret_list[0]
+            return np.ndarray(shape=shape, dtype=np.float32)
+        else:
+            for each_gradient_machine in self.__gradient_machines__:
+                param = __get_parameter_in_gradient_machine__(
+                    each_gradient_machine, key)
+                # for simplify implementation now, we always copy from C++
+                assert isinstance(param, api.Parameter)
+                val = param.getBuf(api.PARAMETER_VALUE)
+                assert isinstance(val, api.Vector)
+                val = val.copyToNumpyArray()
+                return val
+                # else continue
+
+            raise RuntimeError("Unexpected branch")
+
+    def get_shape(self, key):
+        """
+        get shape of the parameter.
+
+        :param key: parameter name
+        :type key: basestring
+        :return: parameter's shape
+        :rtype: tuple
+        """
+        if not isinstance(key, basestring):
+            raise ValueError("parameter name should be string")
+        if not self.has_key(key):
+            raise ValueError("No such parameter %s" % key)
+        conf = self.__param_conf__[key]
+        return tuple(map(int, conf.dims))
+
+    def __setitem__(self, key, value):
+        """
+        Set parameter by parameter name & value. It use Python dict syntax.
+
+        :note: It will always copy the parameter to C++ side.
+        :param key: Parameter name
+        :type key: basestring
+        :param value: Parameter matrix.
+        :type value: np.ndarray
+        :return: Nothing
+        """
+
+        if not isinstance(value, np.ndarray):
+            raise ValueError("Must return ndarray")
+        value = value.astype(dtype=np.float32)
+        shape = self.get_shape(key)
+        if value.shape != shape:
+            raise ValueError("Value shape mismatch, expect %s, should %s" %
+                             (shape, value.shape))
+
+        if len(self.__gradient_machines__) == 0:
+            self.__tmp_params__.append((key, value))
+        else:
+            for each_gradient_machine in self.__gradient_machines__:
+                __copy_parameter_to_gradient_machine__(each_gradient_machine,
+                                                       key, value)
+
+    def get(self, parameter_name):
+        """
+        Get parameter by parameter name.
+
+        :note: It will always copy the parameter from C++ side.
+        :param parameter_name: parameter name
+        :type parameter_name: basestring
+        :return: The parameter matrix.
+        :rtype: np.ndarray
+        """
+        return self.__getitem__(key=parameter_name)
+
+    def set(self, parameter_name, value):
+        """
+        Set parameter by parameter name & matrix.
+
+        :param parameter_name: parameter name
+        :type parameter_name: basestring
+        :param value: parameter matrix
+        :type value: np.ndarray
+        :return: Nothing.
+        """
+        self.__setitem__(key=parameter_name, value=value)
+
+    def append_gradient_machine(self, gradient_machine):
+        """
+        append gradient machine to parameters. This method is used internally in
+        Trainer.train.
+
+        :param gradient_machine: Paddle C++ GradientMachine object.
+        :type gradient_machine: api.GradientMachine
+        :return:
+        """
+
+        if not isinstance(gradient_machine, api.GradientMachine):
+            raise ValueError("gradient_machine should be api.GradientMachine")
+
+        if len(self.__tmp_params__) != 0:
+            for name, val in self.__tmp_params__:
+                try:
+                    __copy_parameter_to_gradient_machine__(gradient_machine,
+                                                           name, val)
+                except ValueError:
+                    # If no such parameter in gradient machine, then don't copy
+                    pass
+
+        self.__gradient_machines__.append(gradient_machine)
+
+    def serialize(self, name, f):
+        """
+
+        :param name:
+        :param f:
+        :type f: file
+        :return:
+        """
+        param = self.get(name)
+        size = reduce(lambda a, b: a * b, param.shape)
+        f.write(struct.pack("IIQ", 0, 4, size))
+        param = param.astype(np.float32)
+        f.write(param.tobytes())
+
+    def deserialize(self, name, f):
+        """
+
+        :param name:
+        :param f:
+        :type f: file
+        :return:
+        """
+        f.read(16)  # header
+        arr = np.frombuffer(f.read(), dtype=np.float32)
+        self.set(name, arr.reshape(self.get_shape(name)))
+
+    def to_tar(self, f):
+        tar = tarfile.TarFile(fileobj=f, mode='w')
+        for nm in self.names():
+            buf = cStringIO.StringIO()
+            self.serialize(nm, buf)
+            tarinfo = tarfile.TarInfo(name=nm)
+            buf.seek(0)
+            tarinfo.size = len(buf.getvalue())
+            tar.addfile(tarinfo, buf)
+
+            conf = self.__param_conf__[nm]
+            confStr = conf.SerializeToString()
+            tarinfo = tarfile.TarInfo(name="%s.protobuf" % nm)
+            tarinfo.size = len(confStr)
+            buf = cStringIO.StringIO(confStr)
+            buf.seek(0)
+            tar.addfile(tarinfo, fileobj=buf)
+
+    @staticmethod
+    def from_tar(f):
+        params = Parameters()
+        tar = tarfile.TarFile(fileobj=f, mode='r')
+        for finfo in tar:
+            assert isinstance(finfo, tarfile.TarInfo)
+            if finfo.name.endswith('.protobuf'):
+                f = tar.extractfile(finfo)
+                conf = ParameterConfig()
+                conf.ParseFromString(f.read())
+                params.__append_config__(conf)
+
+        for param_name in params.names():
+            f = tar.extractfile(param_name)
+            params.deserialize(param_name, f)
+        return params
+
+
+def __get_parameter_in_gradient_machine__(gradient_machine, name):
+    """
+
+    :param gradient_machine:
+    :type gradient_machine: api.GradientMachine
+    :param name:
+    :return:
+    :rtype: api.Parameter
+    """
+    params = filter(lambda p: p.getName() == name,
+                    gradient_machine.getParameters())
+
+    if len(params) == 0:
+        raise ValueError("No such parameter")
+    elif len(params) > 1:
+        raise ValueError("Unexpected branch")
+    else:
+        return params[0]
+
+
+def __copy_parameter_to_gradient_machine__(gradient_machine, name, arr):
+    """
+    Copy a python ndarray into the gradient machine.
+
+    :param gradient_machine:
+    :type gradient_machine: api.GradientMachine
+    :param name:
+    :param arr:
+    :type arr: np.ndarray
+    :return:
+    :rtype: api.Parameter
+    """
+    param = __get_parameter_in_gradient_machine__(gradient_machine, name)
+    vec = param.getBuf(api.PARAMETER_VALUE)
+    assert isinstance(vec, api.Vector)
+    vec.copyFromNumpyArray(arr.flatten())
diff --git a/python/paddle/v2/pooling.py b/python/paddle/v2/pooling.py
new file mode 100644
index 0000000000000000000000000000000000000000..4881c27d1d6d3d926f12aab096f377164debf1ef
--- /dev/null
+++ b/python/paddle/v2/pooling.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.trainer_config_helpers.poolings
+import copy
+
+__all__ = []
+suffix = 'Pooling'
+
+for name in paddle.trainer_config_helpers.poolings.__all__:
+    new_name = name[:-len(suffix)]
+    globals()[new_name] = copy.copy(
+        getattr(paddle.trainer_config_helpers.poolings, name))
+    globals()[new_name].__name__ = new_name
+    __all__.append(new_name)
diff --git a/python/paddle/v2/reader/__init__.py b/python/paddle/v2/reader/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b059735a924d58714cd88a761eb83143f1192d6
--- /dev/null
+++ b/python/paddle/v2/reader/__init__.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+At training and testing time, PaddlePaddle programs need to read data. To ease
+the users' work to write data reading code, we define that
+
+- A *reader* is a function that reads data (from file, network, random number
+  generator, etc) and yields data items.
+- A *reader creator* is a function that returns a reader function.
+- A *reader decorator* is a function, which accepts one or more readers, and
+  returns a reader.
+- A *batch reader* is a function that reads data (from *reader*, file, network,
+  random number generator, etc) and yields a batch of data items.
+
+#####################
+Data Reader Interface
+#####################
+
+Indeed, *data reader* doesn't have to be a function that reads and yields data
+items. It can be any function with no parameter that creates a iterable
+(anything can be used in :code:`for x in iterable`)\:
+
+..  code-block:: python
+
+    iterable = data_reader()
+
+Element produced from the iterable should be a **single** entry of data,
+**not** a mini batch. That entry of data could be a single item, or a tuple of
+items.
+Item should be of `supported type `_ (e.g., numpy 1d
+array of float32, int, list of int)
+
+An example implementation for single item data reader creator:
+
+..  code-block:: python
+
+    def reader_creator_random_image(width, height):
+        def reader():
+            while True:
+                yield numpy.random.uniform(-1, 1, size=width*height)
+    return reader
+
+An example implementation for multiple item data reader creator:
+
+..  code-block:: python
+
+    def reader_creator_random_image_and_label(width, height, label):
+        def reader():
+            while True:
+                yield numpy.random.uniform(-1, 1, size=width*height), label
+    return reader
+
+
+TODO(yuyang18): Should we add whole design doc here?
+"""
+
+import decorator
+from decorator import *
+
+import creator
+
+__all__ = decorator.__all__ + ['creator']
diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py
new file mode 100644
index 0000000000000000000000000000000000000000..07142056f872db5113acdd296b17c52b343c1be6
--- /dev/null
+++ b/python/paddle/v2/reader/creator.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Creator package contains some simple reader creator, which could be used in user
+program.
+"""
+
+__all__ = ['np_array', 'text_file']
+
+
+def np_array(x):
+    """
+    Creates a reader that yields elements of x, if it is a
+    numpy vector. Or rows of x, if it is a numpy matrix.
+    Or any sub-hyperplane indexed by the highest dimension.
+
+    :param x: the numpy array to create reader from.
+    :returns: data reader created from x.
+    """
+
+    def reader():
+        if x.ndim < 1:
+            yield x
+
+        for e in x:
+            yield e
+
+    return reader
+
+
+def text_file(path):
+    """
+    Creates a data reader that outputs text line by line from given text file.
+    Trailing new line ('\\\\n') of each line will be removed.
+
+    :path: path of the text file.
+    :returns: data reader of text file
+    """
+
+    def reader():
+        f = open(path, "r")
+        for l in f:
+            yield l.rstrip('\n')
+        f.close()
+
+    return reader
diff --git a/python/paddle/v2/reader/decorator.py b/python/paddle/v2/reader/decorator.py
new file mode 100644
index 0000000000000000000000000000000000000000..104ce9a0411413bb8fc65eedf5821f98d6acdba3
--- /dev/null
+++ b/python/paddle/v2/reader/decorator.py
@@ -0,0 +1,226 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+__all__ = [
+    'map_readers', 'buffered', 'compose', 'chain', 'shuffle',
+    'ComposeNotAligned', 'firstn'
+]
+
+import itertools
+import random
+from Queue import Queue
+from threading import Thread
+
+
+def map_readers(func, *readers):
+    """
+    Creates a data reader that outputs return value of function using
+    output of each data readers as arguments.
+
+    :param func: function to use. The type of func should be (Sample) => Sample
+    :type: callable
+    :param readers: readers whose outputs will be used as arguments of func.
+    :return: the created data reader.
+    :rtype: callable
+    """
+
+    def reader():
+        rs = []
+        for r in readers:
+            rs.append(r())
+        for e in itertools.imap(func, *rs):
+            yield e
+
+    return reader
+
+
+def shuffle(reader, buf_size):
+    """
+    Creates a data reader whose data output is shuffled.
+
+    Output from the iterator that created by original reader will be
+    buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
+    is determined by argument buf_size.
+
+    :param reader: the original reader whose output will be shuffled.
+    :type reader: callable
+    :param buf_size: shuffle buffer size.
+    :type buf_size: int
+
+    :return: the new reader whose output is shuffled.
+    :rtype: callable
+    """
+
+    def data_reader():
+        buf = []
+        for e in reader():
+            buf.append(e)
+            if len(buf) >= buf_size:
+                random.shuffle(buf)
+                for b in buf:
+                    yield b
+                buf = []
+
+        if len(buf) > 0:
+            random.shuffle(buf)
+            for b in buf:
+                yield b
+
+    return data_reader
+
+
+def chain(*readers):
+    """
+    Creates a data reader whose output is the outputs of input data
+    readers chained together.
+
+    If input readers output following data entries:
+    [0, 0, 0]
+    [1, 1, 1]
+    [2, 2, 2]
+    The chained reader will output:
+    [0, 0, 0, 1, 1, 1, 2, 2, 2]
+
+    :param readers: input readers.
+    :return: the new data reader.
+    :rtype: callable
+    """
+
+    def reader():
+        rs = []
+        for r in readers:
+            rs.append(r())
+
+        for e in itertools.chain(*rs):
+            yield e
+
+    return reader
+
+
+class ComposeNotAligned(ValueError):
+    pass
+
+
+def compose(*readers, **kwargs):
+    """
+    Creates a data reader whose output is the combination of input readers.
+
+    If input readers output following data entries:
+    (1, 2)    3    (4, 5)
+    The composed reader will output:
+    (1, 2, 3, 4, 5)
+
+    :param readers: readers that will be composed together.
+    :param check_alignment: if True, will check if input readers are aligned
+        correctly. If False, will not check alignment and trailing outputs
+        will be discarded. Defaults to True.
+    :type check_alignment: bool
+
+    :return: the new data reader.
+
+    :raises ComposeNotAligned: outputs of readers are not aligned.
+        Will not raise when check_alignment is set to False.
+    """
+    check_alignment = kwargs.pop('check_alignment', True)
+
+    def make_tuple(x):
+        if isinstance(x, tuple):
+            return x
+        else:
+            return (x, )
+
+    def reader():
+        rs = []
+        for r in readers:
+            rs.append(r())
+        if not check_alignment:
+            for outputs in itertools.izip(*rs):
+                yield sum(map(make_tuple, outputs), ())
+        else:
+            for outputs in itertools.izip_longest(*rs):
+                for o in outputs:
+                    if o is None:
+                        # None will be not be present if compose is aligned
+                        raise ComposeNotAligned(
+                            "outputs of readers are not aligned.")
+                yield sum(map(make_tuple, outputs), ())
+
+    return reader
+
+
+def buffered(reader, size):
+    """
+    Creates a buffered data reader.
+
+    The buffered data reader will read and save data entries into a
+    buffer. Reading from the buffered data reader will proceed as long
+    as the buffer is not empty.
+    
+    :param reader: the data reader to read from.
+    :type reader: callable
+    :param size: max buffer size.
+    :type size: int
+    
+    :returns: the buffered data reader.
+    """
+
+    class EndSignal():
+        pass
+
+    end = EndSignal()
+
+    def read_worker(r, q):
+        for d in r:
+            q.put(d)
+        q.put(end)
+
+    def data_reader():
+        r = reader()
+        q = Queue(maxsize=size)
+        t = Thread(
+            target=read_worker, args=(
+                r,
+                q, ))
+        t.daemon = True
+        t.start()
+        e = q.get()
+        while e != end:
+            yield e
+            e = q.get()
+
+    return data_reader
+
+
+def firstn(reader, n):
+    """
+    Limit the max number of samples that reader could return.
+
+    :param reader: the data reader to read from.
+    :type reader: callable
+    :param n: the max number of samples that return.
+    :type n: int
+    :return: the decorated reader.
+    :rtype: callable
+    """
+
+    # TODO(yuyang18): Check if just drop the reader, could clean the opened
+    # resource or not?
+
+    def firstn_reader():
+        for i, item in enumerate(reader()):
+            if i == n:
+                break
+            yield item
+
+    return firstn_reader
diff --git a/python/paddle/v2/reader/tests/CMakeLists.txt b/python/paddle/v2/reader/tests/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a51f700406b48f8186e45f1ced94765e343a8b5e
--- /dev/null
+++ b/python/paddle/v2/reader/tests/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_test(NAME reader_tests
+  COMMAND bash ${PROJ_ROOT}/python/paddle/v2/reader/tests/run_tests.sh
+  ${PYTHON_EXECUTABLE})
diff --git a/python/paddle/v2/reader/tests/__init__.py b/python/paddle/v2/reader/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/python/paddle/v2/reader/tests/creator_test.py b/python/paddle/v2/reader/tests/creator_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f8d7133b8694aae5541eff9576eaba8a31e77dc
--- /dev/null
+++ b/python/paddle/v2/reader/tests/creator_test.py
@@ -0,0 +1,40 @@
+# Copyright PaddlePaddle contributors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import unittest
+
+import numpy as np
+
+import paddle.v2.reader.creator
+
+
+class TestNumpyArray(unittest.TestCase):
+    def test_numpy_array(self):
+        l = [[1, 2, 3], [4, 5, 6]]
+        x = np.array(l, np.int32)
+        reader = paddle.v2.reader.creator.np_array(x)
+        for idx, e in enumerate(reader()):
+            self.assertItemsEqual(e, l[idx])
+
+
+class TestTextFile(unittest.TestCase):
+    def test_text_file(self):
+        path = os.path.join(os.path.dirname(__file__), "test_data_creator.txt")
+        reader = paddle.v2.reader.creator.text_file(path)
+        for idx, e in enumerate(reader()):
+            self.assertEqual(e, str(idx * 2) + " " + str(idx * 2 + 1))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/reader/tests/decorator_test.py b/python/paddle/v2/reader/tests/decorator_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..734154b9790a4dc118d11992343648364c907305
--- /dev/null
+++ b/python/paddle/v2/reader/tests/decorator_test.py
@@ -0,0 +1,125 @@
+# Copyright PaddlePaddle contributors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import time
+import unittest
+
+import paddle.v2.reader
+
+
+def reader_creator_10(dur):
+    def reader():
+        for i in range(10):
+            # this invocation helps testing paddle.reader.buffer
+            time.sleep(dur)
+            yield i
+
+    return reader
+
+
+class TestMap(unittest.TestCase):
+    def test_map(self):
+        d = {"h": 0, "i": 1}
+
+        def tokenize(x):
+            return d[x]
+
+        def read():
+            yield "h"
+            yield "i"
+
+        r = paddle.v2.reader.map_readers(tokenize, read)
+        for i, e in enumerate(r()):
+            self.assertEqual(e, i)
+
+
+class TestBuffered(unittest.TestCase):
+    def test_read(self):
+        for size in range(20):
+            b = paddle.v2.reader.buffered(reader_creator_10(0), size)
+            c = 0
+            for i in b():
+                self.assertEqual(i, c)
+                c += 1
+            self.assertEqual(c, 10)
+
+    def test_buffering(self):
+        # read have 30ms delay.
+        b = paddle.v2.reader.buffered(reader_creator_10(0.03), 10)
+        last_time = time.time()
+        for idx, i in enumerate(b()):
+            elapsed_time = time.time() - last_time
+            if i == 0:
+                time.sleep(0.3)
+            else:
+                # read time should be short, meaning already buffered.
+                self.assertLess(elapsed_time, 0.05)
+            last_time = time.time()
+
+
+class TestCompose(unittest.TestCase):
+    def test_compse(self):
+        reader = paddle.v2.reader.compose(
+            reader_creator_10(0), reader_creator_10(0))
+        for idx, e in enumerate(reader()):
+            self.assertEqual(e, (idx, idx))
+
+    def test_compose_not_aligned(self):
+        total = 0
+        reader = paddle.v2.reader.compose(
+            paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)),
+            reader_creator_10(0))
+        with self.assertRaises(paddle.v2.reader.ComposeNotAligned):
+            for e in reader():
+                total += 1
+        # expecting 10, not 20
+        self.assertEqual(total, 10)
+
+    def test_compose_not_aligned_no_check(self):
+        total = 0
+        reader = paddle.v2.reader.compose(
+            paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0)),
+            reader_creator_10(0),
+            check_alignment=False)
+        for e in reader():
+            total += 1
+        # expecting 10, not 20
+        self.assertEqual(total, 10)
+
+
+class TestChain(unittest.TestCase):
+    def test_chain(self):
+        c = paddle.v2.reader.chain(reader_creator_10(0), reader_creator_10(0))
+        idx = 0
+        for e in c():
+            self.assertEqual(e, idx % 10)
+            idx += 1
+        self.assertEqual(idx, 20)
+
+
+class TestShuffle(unittest.TestCase):
+    def test_shuffle(self):
+        case = [(0, True), (1, True), (10, False), (100, False)]
+        a = reader_creator_10(0)
+        for size, checkEq in case:
+            s = paddle.v2.reader.shuffle(a, size)
+            total = 0
+            for idx, e in enumerate(s()):
+                if checkEq:
+                    self.assertEqual(idx, e)
+                total += 1
+            self.assertEqual(total, 10)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/reader/tests/run_tests.sh b/python/paddle/v2/reader/tests/run_tests.sh
new file mode 100755
index 0000000000000000000000000000000000000000..a544a5636021bcf8bd9a35966c91ae343c149d14
--- /dev/null
+++ b/python/paddle/v2/reader/tests/run_tests.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+pushd `dirname $0` > /dev/null
+SCRIPTPATH=$PWD
+popd > /dev/null
+
+cd $SCRIPTPATH
+$1 -m pip install ../../../../../paddle/dist/*.whl
+
+test_list="creator_test.py decorator_test.py"
+
+export PYTHONPATH=$PWD/../../../../../python/
+
+for fn in $test_list
+do
+  echo "test $fn"
+  $1 $fn
+  if [ $? -ne 0 ]; then
+    exit 1
+  fi
+done
diff --git a/python/paddle/v2/reader/tests/test_data_creator.txt b/python/paddle/v2/reader/tests/test_data_creator.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a2a8d47d43868d369083808497697da79e620e31
--- /dev/null
+++ b/python/paddle/v2/reader/tests/test_data_creator.txt
@@ -0,0 +1,3 @@
+0 1
+2 3
+4 5
diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..572deaff356712cac23cd7911cdf289db100564c
--- /dev/null
+++ b/python/paddle/v2/tests/CMakeLists.txt
@@ -0,0 +1,16 @@
+add_test(NAME test_v2_api
+        COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE})
+
+add_test(NAME test_v2_layer
+        COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
+        ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py
+        WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
+
+add_test(NAME test_v2_rnn_layer
+        COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
+        ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_rnn_layer.py)
+
+add_test(NAME test_topology
+        COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
+        ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_topology.py
+        WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
diff --git a/python/paddle/v2/tests/run_tests.sh b/python/paddle/v2/tests/run_tests.sh
new file mode 100755
index 0000000000000000000000000000000000000000..dda1b1bd222a9f226db1a4bd730e9637ab882196
--- /dev/null
+++ b/python/paddle/v2/tests/run_tests.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+pushd `dirname $0` > /dev/null
+SCRIPTPATH=$PWD
+popd > /dev/null
+
+cd $SCRIPTPATH
+
+$1 -m pip install ../../../../paddle/dist/*.whl
+
+test_list="test_data_feeder.py test_parameters.py"
+
+export PYTHONPATH=$PWD/../../../../python/
+
+for fn in $test_list
+do
+  echo "test $fn"
+  $1 $fn
+  if [ $? -ne 0 ]; then
+    exit 1
+  fi
+done
diff --git a/python/paddle/v2/tests/test_data_feeder.py b/python/paddle/v2/tests/test_data_feeder.py
new file mode 100644
index 0000000000000000000000000000000000000000..71eb3bf31425c22b47accc11c9550042e077ef12
--- /dev/null
+++ b/python/paddle/v2/tests/test_data_feeder.py
@@ -0,0 +1,243 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import py_paddle.swig_paddle as api
+import numpy as np
+
+from paddle.v2 import data_type
+from paddle.v2.data_feeder import DataFeeder
+
+
+class DataFeederTest(unittest.TestCase):
+    def dense_reader(self, size):
+        data = np.random.random(size)
+        return data
+
+    def sparse_binary_reader(self, high, size_limit, non_empty=False):
+        num = np.random.randint(size_limit)  # num could be 0
+        while non_empty and num == 0:
+            num = np.random.randint(size_limit)
+        return np.random.randint(high, size=num).tolist()
+
+    def test_dense(self):
+        def compare(input):
+            feeder = DataFeeder([('image', data_type.dense_vector(784))],
+                                {'image': 0})
+            arg = feeder(input)
+            output = arg.getSlotValue(0).copyToNumpyMat()
+            input = np.array(input, dtype='float32')
+            self.assertAlmostEqual(input.all(), output.all())
+
+        # test numpy array
+        batch_size = 32
+        dim = 784
+        data = []
+        for i in xrange(batch_size):
+            each_sample = []
+            each_sample.append(self.dense_reader(dim))
+            data.append(each_sample)
+        compare(data)
+
+        # each feature is a list
+        data = []
+        for i in xrange(batch_size):
+            each_sample = []
+            each_sample.append(self.dense_reader(dim).tolist())
+            data.append(each_sample)
+        compare(data)
+
+        # test tuple
+        data = []
+        for i in xrange(batch_size):
+            each_sample = (self.dense_reader(dim).tolist(), )
+            data.append(each_sample)
+        compare(data)
+
+    def test_sparse_binary(self):
+        dim = 10000
+        batch_size = 32
+        data = []
+        for i in xrange(batch_size):
+            each_sample = []
+            each_sample.append(self.sparse_binary_reader(dim, 50))
+            data.append(each_sample)
+        feeder = DataFeeder([('input', data_type.sparse_binary_vector(dim))],
+                            {'input': 0})
+        arg = feeder(data)
+        output = arg.getSlotValue(0)
+        assert isinstance(output, api.Matrix)
+        for i in xrange(batch_size):
+            self.assertEqual(output.getSparseRowCols(i), data[i][0])
+
+    def test_sparse(self):
+        dim = 10000
+        batch_size = 32
+        v = []
+        w = []
+        data = []
+        for dat in xrange(batch_size):
+            each_sample = []
+            a = self.sparse_binary_reader(dim, 40, non_empty=True)
+            b = self.dense_reader(len(a)).tolist()
+            v.append(a)
+            w.append(np.array(b, dtype="float32"))
+            each_sample.append(zip(a, b))
+            data.append(each_sample)
+
+        feeder = DataFeeder([('input', data_type.sparse_vector(dim))],
+                            {'input': 0})
+        arg = feeder(data)
+        output = arg.getSlotValue(0)
+        assert isinstance(output, api.Matrix)
+        for i in xrange(batch_size):
+            self.assertEqual(output.getSparseRowCols(i), v[i])
+            cols_value = output.getSparseRowColsVal(i)
+            value = [val[1] for val in cols_value]
+            value = np.array(value, dtype="float32")
+            self.assertAlmostEqual(value.all(), w[i].all())
+
+    def test_integer(self):
+        value_range = 100
+        batch_size = 32
+        index = []
+        for i in xrange(batch_size):
+            each_sample = []
+            each_sample.append(np.random.randint(value_range))
+            index.append(each_sample)
+        feeder = DataFeeder([('input', data_type.integer_value(value_range))],
+                            {'input': 0})
+        arg = feeder(index)
+        output = arg.getSlotIds(0).copyToNumpyArray()
+        index = np.array(index, dtype='int')
+        self.assertEqual(output.all(), index.flatten().all())
+
+    def test_integer_sequence(self):
+        value_range = 10000
+        batch_size = 32
+        start = [0]
+        data = []
+        for i in xrange(batch_size):
+            each_sample = []
+            each_sample.append(
+                self.sparse_binary_reader(
+                    value_range, 30, non_empty=True))
+            data.append(each_sample)
+            start.append(len(each_sample[0]) + start[-1])
+        feeder = DataFeeder(
+            [('input', data_type.integer_value_sequence(value_range))],
+            {'input': 0})
+        arg = feeder(data)
+        output_data = arg.getSlotIds(0).copyToNumpyArray()
+        output_start = arg.getSlotSequenceStartPositions(0).copyToNumpyArray()
+
+        index = []
+        for dat in data:
+            index.extend(x for x in dat[0])  # only one feature, so dat[0]
+        index = np.array(index, dtype='int')
+        start = np.array(start, dtype='int')
+        self.assertEqual(output_data.all(), index.all())
+        self.assertEqual(output_start.all(), start.all())
+
+    def test_multiple_features(self):
+        batch_size = 2
+        data = []
+        for i in xrange(batch_size):
+            each_sample = []
+            each_sample.append(np.random.randint(10))
+            each_sample.append(
+                self.sparse_binary_reader(
+                    20000, 40, non_empty=True))
+            each_sample.append(self.dense_reader(100))
+            data.append(each_sample)
+
+        # test multiple features
+        data_types = [('fea0', data_type.dense_vector(100)),
+                      ('fea1', data_type.sparse_binary_vector(20000)),
+                      ('fea2', data_type.integer_value(10))]
+        feeder = DataFeeder(data_types, {'fea0': 2, 'fea1': 1, 'fea2': 0})
+        arg = feeder(data)
+        output_dense = arg.getSlotValue(0).copyToNumpyMat()
+        output_sparse = arg.getSlotValue(1)
+        output_index = arg.getSlotIds(2).copyToNumpyArray()
+        for i in xrange(batch_size):
+            self.assertEqual(output_dense[i].all(), data[i][2].all())
+            self.assertEqual(output_sparse.getSparseRowCols(i), data[i][1])
+            self.assertEqual(output_index[i], data[i][0])
+
+        # reader returns 3 features, but only use 2 features
+        data_types = [('fea0', data_type.dense_vector(100)),
+                      ('fea2', data_type.integer_value(10))]
+        feeder = DataFeeder(data_types, {'fea0': 2, 'fea2': 0})
+        arg = feeder(data)
+        output_dense = arg.getSlotValue(0).copyToNumpyMat()
+        output_index = arg.getSlotIds(1).copyToNumpyArray()
+        for i in xrange(batch_size):
+            self.assertEqual(output_dense[i].all(), data[i][2].all())
+            self.assertEqual(output_index[i], data[i][0])
+
+        # reader returns 3 featreus, one is duplicate data
+        data_types = [('fea0', data_type.dense_vector(100)),
+                      ('fea1', data_type.sparse_binary_vector(20000)),
+                      ('fea2', data_type.integer_value(10)),
+                      ('fea3', data_type.dense_vector(100))]
+        feeder = DataFeeder(data_types,
+                            {'fea0': 2,
+                             'fea1': 1,
+                             'fea2': 0,
+                             'fea3': 2})
+        arg = feeder(data)
+        fea0 = arg.getSlotValue(0).copyToNumpyMat()
+        fea1 = arg.getSlotValue(1)
+        fea2 = arg.getSlotIds(2).copyToNumpyArray()
+        fea3 = arg.getSlotValue(3).copyToNumpyMat()
+        for i in xrange(batch_size):
+            self.assertEqual(fea0[i].all(), data[i][2].all())
+            self.assertEqual(fea1.getSparseRowCols(i), data[i][1])
+            self.assertEqual(fea2[i], data[i][0])
+            self.assertEqual(fea3[i].all(), data[i][2].all())
+
+    def test_multiple_features_tuple(self):
+        batch_size = 2
+        data = []
+        for i in xrange(batch_size):
+            a = np.random.randint(10)
+            b = self.sparse_binary_reader(20000, 40, non_empty=True)
+            c = self.dense_reader(100)
+            each_sample = (a, b, c)
+            data.append(each_sample)
+
+        # test multiple features
+        data_types = [('fea0', data_type.dense_vector(100)),
+                      ('fea1', data_type.sparse_binary_vector(20000)),
+                      ('fea2', data_type.integer_value(10))]
+        feeder = DataFeeder(data_types, {'fea0': 2, 'fea1': 1, 'fea2': 0})
+        arg = feeder(data)
+        out_dense = arg.getSlotValue(0).copyToNumpyMat()
+        out_sparse = arg.getSlotValue(1)
+        out_index = arg.getSlotIds(2).copyToNumpyArray()
+        for i in xrange(batch_size):
+            self.assertEqual(out_dense[i].all(), data[i][2].all())
+            self.assertEqual(out_sparse.getSparseRowCols(i), data[i][1])
+            self.assertEqual(out_index[i], data[i][0])
+
+
+if __name__ == '__main__':
+    api.initPaddle("--use_gpu=0")
+    suite = unittest.TestLoader().loadTestsFromTestCase(DataFeederTest)
+    unittest.TextTestRunner().run(suite)
+    if api.isGpuVersion():
+        api.setUseGpu(True)
+        unittest.main()
diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0055679a91801a2f9b6432797665ec17caf3beb1
--- /dev/null
+++ b/python/paddle/v2/tests/test_layer.py
@@ -0,0 +1,264 @@
+# Copyright PaddlePaddle contributors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+
+import paddle.v2.activation as activation
+import paddle.v2.attr as attr
+import paddle.v2.data_type as data_type
+import paddle.v2.layer as layer
+import paddle.v2.pooling as pooling
+import paddle.v2.networks as networks
+
+pixel = layer.data(name='pixel', type=data_type.dense_vector(128))
+label = layer.data(name='label', type=data_type.integer_value(10))
+weight = layer.data(name='weight', type=data_type.dense_vector(10))
+score = layer.data(name='score', type=data_type.dense_vector(1))
+
+hidden = layer.fc(input=pixel,
+                  size=100,
+                  act=activation.Sigmoid(),
+                  param_attr=attr.Param(name='hidden'))
+inference = layer.fc(input=hidden, size=10, act=activation.Softmax())
+conv = layer.img_conv(
+    input=pixel,
+    filter_size=1,
+    filter_size_y=1,
+    num_channels=8,
+    num_filters=16,
+    act=activation.Linear())
+
+
+class ImageLayerTest(unittest.TestCase):
+    def test_conv_layer(self):
+        conv_shift = layer.conv_shift(a=pixel, b=score)
+        print layer.parse_network(conv, conv_shift)
+
+    def test_pooling_layer(self):
+        maxpool = layer.img_pool(
+            input=conv,
+            pool_size=2,
+            num_channels=16,
+            padding=1,
+            pool_type=pooling.Max())
+        spp = layer.spp(input=conv,
+                        pyramid_height=2,
+                        num_channels=16,
+                        pool_type=pooling.Max())
+        maxout = layer.maxout(input=conv, num_channels=16, groups=4)
+        print layer.parse_network(maxpool, spp, maxout)
+
+    def test_norm_layer(self):
+        norm1 = layer.img_cmrnorm(input=conv, size=5)
+        norm2 = layer.batch_norm(input=conv)
+        norm3 = layer.sum_to_one_norm(input=conv)
+        print layer.parse_network(norm1, norm2, norm3)
+
+
+class AggregateLayerTest(unittest.TestCase):
+    def test_aggregate_layer(self):
+        pool = layer.pooling(
+            input=pixel,
+            pooling_type=pooling.Avg(),
+            agg_level=layer.AggregateLevel.EACH_SEQUENCE)
+        last_seq = layer.last_seq(input=pixel)
+        first_seq = layer.first_seq(input=pixel)
+        concat = layer.concat(input=[last_seq, first_seq])
+        seq_concat = layer.seq_concat(a=last_seq, b=first_seq)
+        print layer.parse_network(pool, last_seq, first_seq, concat, seq_concat)
+
+
+class MathLayerTest(unittest.TestCase):
+    def test_math_layer(self):
+        addto = layer.addto(input=[pixel, pixel])
+        linear_comb = layer.linear_comb(weights=weight, vectors=hidden, size=10)
+        interpolation = layer.interpolation(
+            input=[hidden, hidden], weight=score)
+        bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4)
+        power = layer.power(input=pixel, weight=score)
+        scaling = layer.scaling(input=pixel, weight=score)
+        slope = layer.slope_intercept(input=pixel)
+        tensor = layer.tensor(a=pixel, b=pixel, size=1000)
+        cos_sim = layer.cos_sim(a=pixel, b=pixel)
+        trans = layer.trans(input=tensor)
+        print layer.parse_network(addto, linear_comb, interpolation, power,
+                                  scaling, slope, tensor, cos_sim, trans)
+
+
+class ReshapeLayerTest(unittest.TestCase):
+    def test_reshape_layer(self):
+        block_expand = layer.block_expand(
+            input=conv, num_channels=4, stride_x=1, block_x=1)
+        expand = layer.expand(
+            input=weight,
+            expand_as=pixel,
+            expand_level=layer.ExpandLevel.FROM_TIMESTEP)
+        repeat = layer.repeat(input=pixel, num_repeats=4)
+        reshape = layer.seq_reshape(input=pixel, reshape_size=4)
+        rotate = layer.rotate(input=pixel, height=16, width=49)
+        print layer.parse_network(block_expand, expand, repeat, reshape, rotate)
+
+
+class RecurrentLayerTest(unittest.TestCase):
+    def test_recurrent_layer(self):
+        word = layer.data(name='word', type=data_type.integer_value(12))
+        recurrent = layer.recurrent(input=word)
+        lstm = layer.lstmemory(input=word)
+        gru = layer.grumemory(input=word)
+        print layer.parse_network(recurrent, lstm, gru)
+
+
+class CostLayerTest(unittest.TestCase):
+    def test_cost_layer(self):
+        cost1 = layer.classification_cost(input=inference, label=label)
+        cost2 = layer.classification_cost(
+            input=inference, label=label, weight=weight)
+        cost3 = layer.cross_entropy_cost(input=inference, label=label)
+        cost4 = layer.cross_entropy_with_selfnorm_cost(
+            input=inference, label=label)
+        cost5 = layer.regression_cost(input=inference, label=label)
+        cost6 = layer.regression_cost(
+            input=inference, label=label, weight=weight)
+        cost7 = layer.multi_binary_label_cross_entropy_cost(
+            input=inference, label=label)
+        cost8 = layer.rank_cost(left=score, right=score, label=score)
+        cost9 = layer.lambda_cost(input=inference, score=score)
+        cost10 = layer.sum_cost(input=inference)
+        cost11 = layer.huber_cost(input=score, label=label)
+
+        print layer.parse_network(cost1, cost2)
+        print layer.parse_network(cost3, cost4)
+        print layer.parse_network(cost5, cost6)
+        print layer.parse_network(cost7, cost8, cost9, cost10, cost11)
+
+        crf = layer.crf(input=inference, label=label)
+        crf_decoding = layer.crf_decoding(input=inference, size=3)
+        ctc = layer.ctc(input=inference, label=label)
+        warp_ctc = layer.warp_ctc(input=pixel, label=label)
+        nce = layer.nce(input=inference, label=label, num_classes=3)
+        hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3)
+
+        print layer.parse_network(crf, crf_decoding, ctc, warp_ctc, nce,
+                                  hsigmoid)
+
+
+class OtherLayerTest(unittest.TestCase):
+    def test_sampling_layer(self):
+        maxid = layer.max_id(input=inference)
+        sampling_id = layer.sampling_id(input=inference)
+        eos = layer.eos(input=maxid, eos_id=5)
+        print layer.parse_network(maxid, sampling_id, eos)
+
+    def test_slicing_joining_layer(self):
+        pad = layer.pad(input=conv, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1])
+        print layer.parse_network(pad)
+
+
+class ProjOpTest(unittest.TestCase):
+    def test_projection(self):
+        input = layer.data(name='data', type=data_type.dense_vector(784))
+        word = layer.data(
+            name='word', type=data_type.integer_value_sequence(10000))
+        fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid())
+        fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid())
+        mixed0 = layer.mixed(
+            size=256,
+            input=[
+                layer.full_matrix_projection(input=fc0),
+                layer.full_matrix_projection(input=fc1)
+            ])
+        with layer.mixed(size=200) as mixed1:
+            mixed1 += layer.full_matrix_projection(input=fc0)
+            mixed1 += layer.identity_projection(input=fc1)
+
+        table = layer.table_projection(input=word)
+        emb0 = layer.mixed(size=512, input=table)
+        with layer.mixed(size=512) as emb1:
+            emb1 += table
+
+        scale = layer.scaling_projection(input=fc0)
+        scale0 = layer.mixed(size=100, input=scale)
+        with layer.mixed(size=100) as scale1:
+            scale1 += scale
+
+        dotmul = layer.dotmul_projection(input=fc0)
+        dotmul0 = layer.mixed(size=100, input=dotmul)
+        with layer.mixed(size=100) as dotmul1:
+            dotmul1 += dotmul
+
+        context = layer.context_projection(input=fc0, context_len=5)
+        context0 = layer.mixed(size=100, input=context)
+        with layer.mixed(size=100) as context1:
+            context1 += context
+
+        conv = layer.conv_projection(
+            input=input,
+            filter_size=1,
+            num_channels=1,
+            num_filters=128,
+            stride=1,
+            padding=0)
+        conv0 = layer.mixed(input=conv, bias_attr=True)
+        with layer.mixed(bias_attr=True) as conv1:
+            conv1 += conv
+
+        print layer.parse_network(mixed0)
+        print layer.parse_network(mixed1)
+        print layer.parse_network(emb0)
+        print layer.parse_network(emb1)
+        print layer.parse_network(scale0)
+        print layer.parse_network(scale1)
+        print layer.parse_network(dotmul0)
+        print layer.parse_network(dotmul1)
+        print layer.parse_network(conv0)
+        print layer.parse_network(conv1)
+
+    def test_operator(self):
+        ipt0 = layer.data(name='data', type=data_type.dense_vector(784))
+        ipt1 = layer.data(name='word', type=data_type.dense_vector(128))
+        fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid())
+        fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid())
+
+        dotmul_op = layer.dotmul_operator(a=fc0, b=fc1)
+        dotmul0 = layer.mixed(input=dotmul_op)
+        with layer.mixed() as dotmul1:
+            dotmul1 += dotmul_op
+
+        conv = layer.conv_operator(
+            img=ipt0,
+            filter=ipt1,
+            filter_size=1,
+            num_channels=1,
+            num_filters=128,
+            stride=1,
+            padding=0)
+        conv0 = layer.mixed(input=conv)
+        with layer.mixed() as conv1:
+            conv1 += conv
+
+        print layer.parse_network(dotmul0)
+        print layer.parse_network(dotmul1)
+        print layer.parse_network(conv0)
+        print layer.parse_network(conv1)
+
+
+class NetworkTests(unittest.TestCase):
+    def test_vgg(self):
+        img = layer.data(name='pixel', type=data_type.dense_vector(784))
+        vgg_out = networks.small_vgg(
+            input_image=img, num_channels=1, num_classes=2)
+        print layer.parse_network(vgg_out)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/tests/test_parameters.py b/python/paddle/v2/tests/test_parameters.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebb182caab6430862a8e4da2ae4ea6b1e72f726c
--- /dev/null
+++ b/python/paddle/v2/tests/test_parameters.py
@@ -0,0 +1,60 @@
+import unittest
+import sys
+
+try:
+    import py_paddle
+
+    del py_paddle
+except ImportError:
+    print >> sys.stderr, "It seems swig of Paddle is not installed, this " \
+                         "unittest will not be run."
+    sys.exit(0)
+
+import paddle.v2.parameters as parameters
+from paddle.proto.ParameterConfig_pb2 import ParameterConfig
+import random
+import cStringIO
+import numpy
+
+
+def __rand_param_config__(name):
+    conf = ParameterConfig()
+    conf.name = name
+    size = 1
+    for i in xrange(2):
+        dim = random.randint(1, 1000)
+        conf.dims.append(dim)
+        size *= dim
+    conf.size = size
+    assert conf.IsInitialized()
+    return conf
+
+
+class TestParameters(unittest.TestCase):
+    def test_serialization(self):
+        params = parameters.Parameters()
+        params.__append_config__(__rand_param_config__("param_0"))
+        params.__append_config__(__rand_param_config__("param_1"))
+
+        for name in params.names():
+            param = params.get(name)
+            param[:] = numpy.random.uniform(
+                -1.0, 1.0, size=params.get_shape(name))
+            params.set(name, param)
+
+        tmp_file = cStringIO.StringIO()
+        params.to_tar(tmp_file)
+        tmp_file.seek(0)
+        params_dup = parameters.Parameters.from_tar(tmp_file)
+
+        self.assertEqual(params_dup.names(), params.names())
+
+        for name in params.names():
+            self.assertEqual(params.get_shape(name), params_dup.get_shape(name))
+            p0 = params.get(name)
+            p1 = params_dup.get(name)
+            self.assertTrue(numpy.isclose(p0, p1).all())
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fbbd20eb76bb9daab2bcf98c4adad989106a377
--- /dev/null
+++ b/python/paddle/v2/tests/test_rnn_layer.py
@@ -0,0 +1,155 @@
+# Copyright PaddlePaddle contributors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import difflib
+import unittest
+
+import paddle.trainer_config_helpers as conf_helps
+import paddle.v2.activation as activation
+import paddle.v2.data_type as data_type
+import paddle.v2.layer as layer
+from paddle.trainer_config_helpers.config_parser_utils import \
+    parse_network_config as parse_network
+
+
+class RNNTest(unittest.TestCase):
+    def test_simple_rnn(self):
+        dict_dim = 10
+        word_dim = 8
+        hidden_dim = 8
+
+        def parse_old_rnn():
+            def step(y):
+                mem = conf_helps.memory(name="rnn_state", size=hidden_dim)
+                out = conf_helps.fc_layer(
+                    input=[y, mem],
+                    size=hidden_dim,
+                    act=activation.Tanh(),
+                    bias_attr=True,
+                    name="rnn_state")
+                return out
+
+            def test():
+                data = conf_helps.data_layer(name="word", size=dict_dim)
+                embd = conf_helps.embedding_layer(input=data, size=word_dim)
+                conf_helps.recurrent_group(name="rnn", step=step, input=embd)
+
+            return str(parse_network(test))
+
+        def parse_new_rnn():
+            def new_step(y):
+                mem = layer.memory(name="rnn_state", size=hidden_dim)
+                out = layer.fc(input=[y, mem],
+                               size=hidden_dim,
+                               act=activation.Tanh(),
+                               bias_attr=True,
+                               name="rnn_state")
+                return out
+
+            data = layer.data(
+                name="word", type=data_type.integer_value(dict_dim))
+            embd = layer.embedding(input=data, size=word_dim)
+            rnn_layer = layer.recurrent_group(
+                name="rnn", step=new_step, input=embd)
+            return str(layer.parse_network(rnn_layer))
+
+        diff = difflib.unified_diff(parse_old_rnn().splitlines(1),
+                                    parse_new_rnn().splitlines(1))
+        print ''.join(diff)
+
+    def test_sequence_rnn_multi_input(self):
+        dict_dim = 10
+        word_dim = 8
+        hidden_dim = 8
+        label_dim = 3
+
+        def parse_old_rnn():
+            def test():
+                data = conf_helps.data_layer(name="word", size=dict_dim)
+                label = conf_helps.data_layer(name="label", size=label_dim)
+                emb = conf_helps.embedding_layer(input=data, size=word_dim)
+                boot_layer = conf_helps.data_layer(name="boot", size=10)
+                boot_layer = conf_helps.fc_layer(
+                    name='boot_fc', input=boot_layer, size=10)
+
+                def step(y, wid):
+                    z = conf_helps.embedding_layer(input=wid, size=word_dim)
+                    mem = conf_helps.memory(
+                        name="rnn_state",
+                        size=hidden_dim,
+                        boot_layer=boot_layer)
+                    out = conf_helps.fc_layer(
+                        input=[y, z, mem],
+                        size=hidden_dim,
+                        act=conf_helps.TanhActivation(),
+                        bias_attr=True,
+                        name="rnn_state")
+                    return out
+
+                out = conf_helps.recurrent_group(
+                    name="rnn", step=step, input=[emb, data])
+
+                rep = conf_helps.last_seq(input=out)
+                prob = conf_helps.fc_layer(
+                    size=label_dim,
+                    input=rep,
+                    act=conf_helps.SoftmaxActivation(),
+                    bias_attr=True)
+
+                conf_helps.outputs(
+                    conf_helps.classification_cost(
+                        input=prob, label=label))
+
+            return str(parse_network(test))
+
+        def parse_new_rnn():
+            data = layer.data(
+                name="word", type=data_type.dense_vector(dict_dim))
+            label = layer.data(
+                name="label", type=data_type.dense_vector(label_dim))
+            emb = layer.embedding(input=data, size=word_dim)
+            boot_layer = layer.data(
+                name="boot", type=data_type.dense_vector(10))
+            boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10)
+
+            def step(y, wid):
+                z = layer.embedding(input=wid, size=word_dim)
+                mem = layer.memory(
+                    name="rnn_state", size=hidden_dim, boot_layer=boot_layer)
+                out = layer.fc(input=[y, z, mem],
+                               size=hidden_dim,
+                               act=activation.Tanh(),
+                               bias_attr=True,
+                               name="rnn_state")
+                return out
+
+            out = layer.recurrent_group(
+                name="rnn", step=step, input=[emb, data])
+
+            rep = layer.last_seq(input=out)
+            prob = layer.fc(size=label_dim,
+                            input=rep,
+                            act=activation.Softmax(),
+                            bias_attr=True)
+
+            cost = layer.classification_cost(input=prob, label=label)
+
+            return str(layer.parse_network(cost))
+
+        diff = difflib.unified_diff(parse_old_rnn().splitlines(1),
+                                    parse_new_rnn().splitlines(1))
+        print ''.join(diff)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/tests/test_topology.py b/python/paddle/v2/tests/test_topology.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c6dbcdb4f49b960fb8b71aecbad4f013d2cd283
--- /dev/null
+++ b/python/paddle/v2/tests/test_topology.py
@@ -0,0 +1,84 @@
+# Copyright PaddlePaddle contributors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import paddle.v2.layer as layer
+import paddle.v2.topology as topology
+import paddle.v2.data_type as data_type
+import paddle.trainer_config_helpers as conf_helps
+import paddle.trainer.PyDataProvider2 as pydp2
+
+
+class TestTopology(unittest.TestCase):
+    def test_data_type(self):
+        pixel = layer.data(name='pixel', type=data_type.dense_vector(784))
+        label = layer.data(name='label', type=data_type.integer_value(10))
+        hidden = layer.fc(input=pixel,
+                          size=100,
+                          act=conf_helps.SigmoidActivation())
+        inference = layer.fc(input=hidden,
+                             size=10,
+                             act=conf_helps.SoftmaxActivation())
+        cost = layer.classification_cost(input=inference, label=label)
+        topo = topology.Topology(cost)
+        data_types = topo.data_type()
+        self.assertEqual(len(data_types), 2)
+        pixel_data_type = filter(lambda type: type[0] == "pixel", data_types)
+        self.assertEqual(len(pixel_data_type), 1)
+        pixel_data_type = pixel_data_type[0]
+        self.assertEqual(pixel_data_type[1].type, pydp2.DataType.Dense)
+        self.assertEqual(pixel_data_type[1].dim, 784)
+
+        label_data_type = filter(lambda type: type[0] == "label", data_types)
+        self.assertEqual(len(label_data_type), 1)
+        label_data_type = label_data_type[0]
+        self.assertEqual(label_data_type[1].type, pydp2.DataType.Index)
+        self.assertEqual(label_data_type[1].dim, 10)
+
+    def test_get_layer(self):
+        pixel = layer.data(name='pixel', type=data_type.dense_vector(784))
+        label = layer.data(name='label', type=data_type.integer_value(10))
+        hidden = layer.fc(input=pixel,
+                          size=100,
+                          act=conf_helps.SigmoidActivation())
+        inference = layer.fc(input=hidden,
+                             size=10,
+                             act=conf_helps.SoftmaxActivation())
+        cost = layer.classification_cost(input=inference, label=label)
+        topo = topology.Topology(cost)
+        pixel_layer = topo.get_layer("pixel")
+        label_layer = topo.get_layer("label")
+        self.assertEqual(pixel_layer, pixel)
+        self.assertEqual(label_layer, label)
+
+    def test_parse(self):
+        pixel = layer.data(name='pixel', type=data_type.dense_vector(784))
+        label = layer.data(name='label', type=data_type.integer_value(10))
+        hidden = layer.fc(input=pixel,
+                          size=100,
+                          act=conf_helps.SigmoidActivation())
+        inference = layer.fc(input=hidden,
+                             size=10,
+                             act=conf_helps.SoftmaxActivation())
+        maxid = layer.max_id(input=inference)
+        cost1 = layer.classification_cost(input=inference, label=label)
+        cost2 = layer.cross_entropy_cost(input=inference, label=label)
+
+        topology.Topology(cost2).proto()
+        topology.Topology([cost1]).proto()
+        topology.Topology([cost1, cost2]).proto()
+        topology.Topology([inference, maxid]).proto()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/topology.py b/python/paddle/v2/topology.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0679c5675b0c0f24f28f3df22efd4eb51ccbb3a
--- /dev/null
+++ b/python/paddle/v2/topology.py
@@ -0,0 +1,114 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+
+from paddle.proto.ModelConfig_pb2 import ModelConfig
+
+import layer as v2_layer
+from layer import WithExtraParent
+
+__all__ = ['Topology']
+
+
+def __flatten__(lis):
+    """
+    Given a list, possibly nested to any level, return it flattened.
+    """
+    new_lis = []
+    for item in lis:
+        if isinstance(item, collections.Sequence):
+            new_lis.extend(__flatten__(item))
+        else:
+            new_lis.append(item)
+    return new_lis
+
+
+def __bfs_travel__(callback, *layers):
+    layers = __flatten__(layers)
+    for each_layer in layers:
+        __break__ = callback(each_layer)
+        if __break__:
+            return
+        __layers__ = each_layer.__parent_layers__.values()
+        if isinstance(each_layer, WithExtraParent):
+            __layers__ = __layers__ + each_layer.extra_parent()
+        __bfs_travel__(callback, *__layers__)
+
+
+class Topology(object):
+    """
+    Topology is used to store the information about all layers
+    and network configs.
+    """
+
+    def __init__(self, layers):
+        if not isinstance(layers, collections.Sequence):
+            __check_layer_type__(layers)
+            layers = [layers]
+        for layer in layers:
+            __check_layer_type__(layer)
+        self.layers = layers
+        self.__model_config__ = v2_layer.parse_network(*layers)
+        assert isinstance(self.__model_config__, ModelConfig)
+
+    def proto(self):
+        return self.__model_config__
+
+    def get_layer(self, name):
+        """
+        get v2.Layer Class instance by layer name
+        :param name:
+        :return:
+        """
+        result_layer = [None]
+
+        def __impl__(l):
+            if l.name == name:
+                result_layer[0] = l
+                return True  # break
+            return False
+
+        __bfs_travel__(__impl__, *self.layers)
+        if result_layer[0] is None:
+            raise ValueError("No such layer %s" % name)
+        return result_layer[0]
+
+    def data_layers(self):
+        """
+        get all data layer
+        :return:
+        """
+        data_layers = dict()
+
+        def __impl__(l):
+            if isinstance(l, v2_layer.DataLayerV2):
+                data_layers[l.name] = l
+
+        __bfs_travel__(__impl__, *self.layers)
+        return data_layers
+
+    def data_type(self):
+        """
+        get data_type from proto, such as:
+        [('image', dense_vector(768)), ('label', integer_value(10))]
+        """
+        data_layers = self.data_layers()
+        return [(nm, data_layers[nm].type)
+                for nm in self.proto().input_layer_names]
+
+
+def __check_layer_type__(layer):
+    if not isinstance(layer, v2_layer.LayerV2):
+        raise ValueError('layer should have type paddle.layer.Layer')
diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..187abaf9a34f7b7901c5ff71b15a6db05756c7c4
--- /dev/null
+++ b/python/paddle/v2/trainer.py
@@ -0,0 +1,164 @@
+import collections
+
+import py_paddle.swig_paddle as api
+
+from data_feeder import DataFeeder
+from topology import Topology
+from . import event as v2_event
+from . import optimizer as v2_optimizer
+from . import parameters as v2_parameters
+
+__all__ = ['SGD']
+"""
+Trainer package
+TODO(yuyang18): Complete comments.
+"""
+
+
+def default_event_handler(event):
+    """
+    Default event handler. It will print some log and save mode.
+
+    TODO(yuyang18): Complete it!
+    :param event:
+    :return:
+    """
+    pass
+
+
+class SGD(object):
+    """
+    Simple SGD Trainer.
+    TODO(yuyang18): Complete comments
+
+    :param update_equation: The optimizer object.
+    :type update_equation: paddle.v2.optimizer.Optimizer
+    :param cost: Target cost that neural network should be optimized.
+    :type cost: paddle.v2.config_base.Layer
+    :param parameters: The parameters dictionary.
+    :type parameters: paddle.v2.parameters.Parameters
+    """
+
+    def __init__(self, cost, parameters, update_equation):
+
+        if not isinstance(parameters, v2_parameters.Parameters):
+            raise TypeError('parameters should be parameters')
+
+        if not isinstance(update_equation, v2_optimizer.Optimizer):
+            raise TypeError("update equation parameter must be "
+                            "paddle.v2.optimizer.Optimizer")
+        topology = Topology(cost)
+        self.__optimizer__ = update_equation
+        self.__topology__ = topology
+        self.__parameters__ = parameters
+        self.__topology_in_proto__ = topology.proto()
+        self.__data_types__ = topology.data_type()
+        gm = api.GradientMachine.createFromConfigProto(
+            self.__topology_in_proto__, api.CREATE_MODE_NORMAL,
+            self.__optimizer__.enable_types())
+        assert isinstance(gm, api.GradientMachine)
+        self.__gradient_machine__ = gm
+        self.__gradient_machine__.randParameters()
+        parameters.append_gradient_machine(gm)
+
+    def train(self, reader, num_passes=1, event_handler=None, reader_dict=None):
+        """
+        Training method. Will train num_passes of input data.
+
+        :param reader:
+        :param num_passes: The total train passes.
+        :param event_handler: Event handler. A method will be invoked when event
+                              occurred.
+        :type event_handler: (BaseEvent) => None
+        :return:
+        """
+        if event_handler is None:
+            event_handler = default_event_handler
+
+        if reader_dict is None:
+            reader_dict = self.default_reader_dict()
+
+        __check_train_args__(**locals())
+
+        updater = self.__optimizer__.create_local_updater()
+        updater.init(self.__gradient_machine__)
+
+        self.__gradient_machine__.start()
+        batch_evaluator = self.__gradient_machine__.makeEvaluator()
+        assert isinstance(batch_evaluator, api.Evaluator)
+        pass_evaluator = self.__gradient_machine__.makeEvaluator()
+        assert isinstance(pass_evaluator, api.Evaluator)
+        out_args = api.Arguments.createArguments(0)
+
+        feeder = DataFeeder(self.__data_types__, reader_dict)
+
+        for pass_id in xrange(num_passes):
+            event_handler(v2_event.BeginPass(pass_id))
+            pass_evaluator.start()
+            updater.startPass()
+            for batch_id, data_batch in enumerate(reader()):
+                batch_evaluator.start()
+                event_handler(
+                    v2_event.BeginIteration(
+                        pass_id=pass_id, batch_id=batch_id))
+                pass_type = updater.startBatch(len(data_batch))
+                self.__gradient_machine__.forwardBackward(
+                    feeder(data_batch), out_args, pass_type)
+                self.__gradient_machine__.eval(pass_evaluator)
+                self.__gradient_machine__.eval(batch_evaluator)
+                for each_param in self.__gradient_machine__.getNonStaticParameters(
+                ):
+                    updater.update(each_param)
+                cost_sum = out_args.sum()
+                cost = cost_sum / len(data_batch)
+                updater.finishBatch(cost)
+                batch_evaluator.finish()
+                event_handler(
+                    v2_event.EndIteration(
+                        pass_id=pass_id,
+                        batch_id=batch_id,
+                        cost=cost,
+                        evaluator=batch_evaluator))
+
+            updater.finishPass()
+            pass_evaluator.finish()
+            event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator))
+        self.__gradient_machine__.finish()
+
+    def default_reader_dict(self):
+        reader_dict = dict()
+        for i, tp in enumerate(self.__data_types__):
+            reader_dict[tp[0]] = i
+        return reader_dict
+
+    def test(self, reader, reader_dict=None):
+        if reader_dict is None:
+            reader_dict = self.default_reader_dict()
+
+        feeder = DataFeeder(self.__data_types__, reader_dict)
+        evaluator = self.__gradient_machine__.makeEvaluator()
+        out_args = api.Arguments.createArguments(0)
+        evaluator.start()
+        total_cost = 0
+        num_samples = 0.0
+        for data_batch in reader():
+            num_samples += len(data_batch)
+            self.__gradient_machine__.forward(
+                feeder(data_batch), out_args, api.PASS_TEST)
+            total_cost += out_args.sum()
+            self.__gradient_machine__.eval(evaluator)
+
+        evaluator.finish()
+        return v2_event.TestResult(
+            evaluator=evaluator, cost=total_cost / num_samples)
+
+
+def __check_train_args__(reader, event_handler, **kwargs):
+    """
+    Check train function's argument types
+    """
+    if not callable(reader) or not isinstance(reader(), collections.Iterator):
+        raise TypeError('train_data_reader should be a function, '
+                        'which can return a iterator')
+    if not callable(event_handler):
+        raise TypeError('event handler should be a function')
diff --git a/python/setup.py.in b/python/setup.py.in
index b66a42e87c78701e9eb26b1b7dc8f46a95035a76..68ca35265cf13265ad0b171b0f70e20b83006ff9 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -4,7 +4,10 @@ packages=['paddle',
           'paddle.proto',
           'paddle.trainer',
           'paddle.trainer_config_helpers',
-          'paddle.utils']
+          'paddle.utils',
+          'paddle.v2',
+          'paddle.v2.dataset',
+          'paddle.v2.reader']
 
 setup(name='paddle',
       version='${PADDLE_VERSION}',