提交 4430fc9e 编写于 作者: L Liu Yiqun

Merge branch 'develop' into build_arm

[submodule "book"]
path = book
url = https://github.com/PaddlePaddle/book.git
branch = develop
\ No newline at end of file
...@@ -2,12 +2,12 @@ ...@@ -2,12 +2,12 @@
sha: c25201a00e6b0514370501050cf2a8538ac12270 sha: c25201a00e6b0514370501050cf2a8538ac12270
hooks: hooks:
- id: remove-crlf - id: remove-crlf
files: (?!.*third_party)^.*$ files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- repo: https://github.com/reyoung/mirrors-yapf.git - repo: https://github.com/reyoung/mirrors-yapf.git
sha: v0.13.2 sha: v0.13.2
hooks: hooks:
- id: yapf - id: yapf
files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ # Bazel BUILD files follow Python syntax. files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469 sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469
hooks: hooks:
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
- id: check-merge-conflict - id: check-merge-conflict
- id: check-symlinks - id: check-symlinks
- id: detect-private-key - id: detect-private-key
files: (?!.*third_party)^.*$ files: (?!.*third_party)^.*$ | (?!.*book)^.*$
- id: end-of-file-fixer - id: end-of-file-fixer
- repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git - repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29 sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
......
...@@ -4,22 +4,14 @@ cache: ...@@ -4,22 +4,14 @@ cache:
- $HOME/third_party - $HOME/third_party
- $HOME/.ccache - $HOME/.ccache
- $HOME/.cache/pip - $HOME/.cache/pip
- $HOME/Library/Caches/Homebrew
sudo: required sudo: required
dist: trusty dist: trusty
os: os:
- linux - linux
- osx
env: env:
- JOB=DOCS - JOB=DOCS
- JOB=BUILD_AND_TEST - JOB=BUILD_AND_TEST
- JOB=PRE_COMMIT - JOB=PRE_COMMIT
matrix:
exclude:
- os: osx
env: JOB=DOCS # Only generate documentation in linux.
- os: osx
env: JOB=PRE_COMMIT # Only check pre-commit hook in linux
addons: addons:
apt: apt:
...@@ -53,11 +45,10 @@ before_install: ...@@ -53,11 +45,10 @@ before_install:
fi fi
fi fi
fi fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
# protobuf version. # protobuf version.
- pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx_rtd_theme virtualenv pre-commit requests==2.9.2 LinkChecker - pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker
script: script:
- paddle/scripts/travis/main.sh - paddle/scripts/travis/main.sh
notifications: notifications:
......
...@@ -29,13 +29,16 @@ Luo, Tao ...@@ -29,13 +29,16 @@ Luo, Tao
Lyu, Qin Lyu, Qin
Mao, Hongyue Mao, Hongyue
Qian, Xiaojun Qian, Xiaojun
Qiao, Longfei
Qi, Jun Qi, Jun
Qin, Duohao Qin, Duohao
Shen, Guolong Shen, Guolong
Shi, Guangchuan Shi, Guangchuan
Song, Xiang Song, Xiang
Wang, Helin
Wang, Jiang Wang, Jiang
Wang, Yanfei Wang, Yanfei
Wang, Yi
Wang, Yong Wang, Yong
Weng, Renliang Weng, Renliang
Xu, Tianbing Xu, Tianbing
......
Subproject commit 6e3875eb62533de1f2c1088a477719eb57b9732c
...@@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination ) ...@@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
${source} ${source}
${destination} ${destination}
COMMENT "Generating sphinx documentation: ${builder}" COMMENT "Generating sphinx documentation: ${builder}"
COMMAND ln -sf ${destination}/index_*.html ${destination}/index.html COMMAND cd ${destination} && ln -s ./index_*.html index.html
) )
set_property( set_property(
......
# Use ccache if found ccache program # Use ccache if found ccache program
find_program(CCACHE_FOUND ccache) find_program(CCACHE_PATH ccache)
if(CCACHE_FOUND) if(CCACHE_PATH)
message(STATUS "Ccache is founded, use ccache to speed up compile.") message(STATUS "Ccache is founded, use ccache to speed up compile.")
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH})
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache) set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH})
endif(CCACHE_FOUND) endif(CCACHE_PATH)
\ No newline at end of file
...@@ -110,14 +110,13 @@ endmacro() ...@@ -110,14 +110,13 @@ endmacro()
# Get the coverage data. # Get the coverage data.
file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda") file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda")
message("GCDA files:") message("Process GCDA files:")
message("===============================")
# Get a list of all the object directories needed by gcov # Get a list of all the object directories needed by gcov
# (The directories the .gcda files and .o files are found in) # (The directories the .gcda files and .o files are found in)
# and run gcov on those. # and run gcov on those.
foreach(GCDA ${GCDA_FILES}) foreach(GCDA ${GCDA_FILES})
message("Process: ${GCDA}")
message("------------------------------------------------------------------------------")
get_filename_component(GCDA_DIR ${GCDA} PATH) get_filename_component(GCDA_DIR ${GCDA} PATH)
# #
...@@ -135,7 +134,7 @@ foreach(GCDA ${GCDA_FILES}) ...@@ -135,7 +134,7 @@ foreach(GCDA ${GCDA_FILES})
# If -p is not specified then the file is named only "the_file.c.gcov" # If -p is not specified then the file is named only "the_file.c.gcov"
# #
execute_process( execute_process(
COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} COMMAND "${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null"
WORKING_DIRECTORY ${GCDA_DIR} WORKING_DIRECTORY ${GCDA_DIR}
) )
endforeach() endforeach()
...@@ -383,7 +382,6 @@ foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING}) ...@@ -383,7 +382,6 @@ foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING})
set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]") set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
# Generate the final JSON for this file. # Generate the final JSON for this file.
message("Generate JSON for non-gcov file: ${NOT_COVERED_SRC}...")
string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON) string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ") set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
endforeach() endforeach()
......
...@@ -12,26 +12,19 @@ ...@@ -12,26 +12,19 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
FIND_PACKAGE(Protobuf REQUIRED) INCLUDE(ExternalProject)
IF(Protobuf_FOUND)
# Check protobuf library version.
execute_process(COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --version
OUTPUT_VARIABLE PROTOBUF_VERSION)
string(REPLACE "libprotoc " "" PROTOBUF_VERSION ${PROTOBUF_VERSION})
#MESSAGE("PROTOBUF_INCLUDE_DIR: " ${PROTOBUF_INCLUDE_DIR})
#MESSAGE("PROTOBUF_LITE_LIBRARY: " ${PROTOBUF_LITE_LIBRARY})
#MESSAGE("PROTOBUF_LIBRARY: " ${PROTOBUF_LIBRARY})
#MESSAGE("PROTOBUF_PROTOC_LIBRARY: " ${PROTOBUF_PROTOC_LIBRARY})
#MESSAGE("PROTOBUF_PROTOC_EXECUTABLE: " ${PROTOBUF_PROTOC_EXECUTABLE})
set(PROTOBUF_3 OFF) FIND_PACKAGE(Protobuf 3.1)
if (${PROTOBUF_VERSION} VERSION_GREATER "3.0.0" OR ${PROTOBUF_VERSION} VERSION_EQUAL "3.0.0")
set(PROTOBUF_3 ON)
endif()
ELSE()
INCLUDE(ExternalProject) IF(PROTOBUF_FOUND)
EXEC_PROGRAM(${PROTOBUF_PROTOC_EXECUTABLE} ARGS --version OUTPUT_VARIABLE PROTOBUF_VERSION)
STRING(REGEX MATCH "[0-9]+.[0-9]+" PROTOBUF_VERSION "${PROTOBUF_VERSION}")
IF (${PROTOBUF_VERSION} VERSION_LESS "3.1.0")
SET(PROTOBUF_FOUND OFF)
ENDIF()
ENDIF(PROTOBUF_FOUND)
IF(NOT PROTOBUF_FOUND)
SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf) SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf) SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE) SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
...@@ -70,9 +63,9 @@ ELSE() ...@@ -70,9 +63,9 @@ ELSE()
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib -DCMAKE_INSTALL_LIBDIR=lib
) )
LIST(APPEND external_project_dependencies protobuf) LIST(APPEND external_project_dependencies protobuf)
ENDIF() ENDIF(NOT PROTOBUF_FOUND)
INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
...@@ -221,7 +221,3 @@ ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) ...@@ -221,7 +221,3 @@ ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
MESSAGE("[Paddle] Python Executable: ${PYTHON_EXECUTABLE}")
MESSAGE("[Paddle] Python Include: ${PYTHON_INCLUDE_DIRS}")
MESSAGE("[Paddle] Python Libraries: ${PYTHON_LIBRARIES}")
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
__all__ = ['resnet_cifar10']
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
active_type=paddle.activation.Relu(),
ch_in=None):
tmp = paddle.layer.img_conv(
input=input,
filter_size=filter_size,
num_channels=ch_in,
num_filters=ch_out,
stride=stride,
padding=padding,
act=paddle.activation.Linear(),
bias_attr=False)
return paddle.layer.batch_norm(input=tmp, act=active_type)
def shortcut(ipt, n_in, n_out, stride):
if n_in != n_out:
return conv_bn_layer(ipt, n_out, 1, stride, 0,
paddle.activation.Linear())
else:
return ipt
def basicblock(ipt, ch_out, stride):
ch_in = ch_out * 2
tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
short = shortcut(ipt, ch_in, ch_out, stride)
return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
def layer_warp(block_func, ipt, features, count, stride):
tmp = block_func(ipt, features, stride)
for i in range(1, count):
tmp = block_func(tmp, features, 1)
return tmp
def resnet_cifar10(ipt, depth=32):
# depth should be one of 20, 32, 44, 56, 110, 1202
assert (depth - 2) % 6 == 0
n = (depth - 2) / 6
nStages = {16, 64, 128}
conv1 = conv_bn_layer(
ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
res1 = layer_warp(basicblock, conv1, 16, n, 1)
res2 = layer_warp(basicblock, res1, 32, n, 2)
res3 = layer_warp(basicblock, res2, 64, n, 2)
pool = paddle.layer.img_pool(
input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
return pool
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
import sys
import paddle.v2 as paddle
from api_v2_vgg import vgg_bn_drop
def main():
datadim = 3 * 32 * 32
classdim = 10
# PaddlePaddle init
paddle.init(use_gpu=False, trainer_count=1)
image = paddle.layer.data(
name="image", type=paddle.data_type.dense_vector(datadim))
# Add neural network config
# option 1. resnet
# net = resnet_cifar10(image, depth=32)
# option 2. vgg
net = vgg_bn_drop(image)
out = paddle.layer.fc(input=net,
size=classdim,
act=paddle.activation.Softmax())
lbl = paddle.layer.data(
name="label", type=paddle.data_type.integer_value(classdim))
cost = paddle.layer.classification_cost(input=out, label=lbl)
# Create parameters
parameters = paddle.parameters.create(cost)
# Create optimizer
momentum_optimizer = paddle.optimizer.Momentum(
momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
learning_rate=0.1 / 128.0,
learning_rate_decay_a=0.1,
learning_rate_decay_b=50000 * 100,
learning_rate_schedule='discexp',
batch_size=128)
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.batch(
paddle.dataset.cifar.test10(), batch_size=128),
feeding={'image': 0,
'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# Create trainer
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=momentum_optimizer)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(), buf_size=50000),
batch_size=128),
num_passes=5,
event_handler=event_handler,
feeding={'image': 0,
'label': 1})
if __name__ == '__main__':
main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
__all__ = ['vgg_bn_drop']
def vgg_bn_drop(input):
def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
return paddle.networks.img_conv_group(
input=ipt,
num_channels=num_channels,
pool_size=2,
pool_stride=2,
conv_num_filter=[num_filter] * groups,
conv_filter_size=3,
conv_act=paddle.activation.Relu(),
conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts,
pool_type=paddle.pooling.Max())
conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
bn = paddle.layer.batch_norm(
input=fc1,
act=paddle.activation.Relu(),
layer_attr=paddle.attr.Extra(drop_rate=0.5))
fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
return fc2
import paddle.v2 as paddle
import paddle.v2.dataset.uci_housing as uci_housing
def main():
# init
paddle.init(use_gpu=False, trainer_count=1)
# network config
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
y_predict = paddle.layer.fc(input=x,
param_attr=paddle.attr.Param(name='w'),
size=1,
act=paddle.activation.Linear(),
bias_attr=paddle.attr.Param(name='b'))
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
cost = paddle.layer.regression_cost(input=y_predict, label=y)
# create parameters
parameters = paddle.parameters.create(cost)
# create optimizer
optimizer = paddle.optimizer.Momentum(momentum=0)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# event_handler to print training and testing info
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f" % (
event.pass_id, event.batch_id, event.cost)
if isinstance(event, paddle.event.EndPass):
if (event.pass_id + 1) % 10 == 0:
result = trainer.test(
reader=paddle.batch(
uci_housing.test(), batch_size=2),
feeding={'x': 0,
'y': 1})
print "Test %d, %.2f" % (event.pass_id, result.cost)
# training
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
uci_housing.train(), buf_size=500),
batch_size=2),
feeding={'x': 0,
'y': 1},
event_handler=event_handler,
num_passes=30)
if __name__ == '__main__':
main()
...@@ -5,3 +5,6 @@ plot.png ...@@ -5,3 +5,6 @@ plot.png
train.log train.log
*pyc *pyc
.ipynb_checkpoints .ipynb_checkpoints
params.pkl
params.tar
params.tar.gz
import paddle.v2 as paddle import paddle.v2 as paddle
import gzip
def softmax_regression(img):
predict = paddle.layer.fc(input=img,
size=10,
act=paddle.activation.Softmax())
return predict
def multilayer_perceptron(img):
# The first fully-connected layer
hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
# The second fully-connected layer and the according activation function
hidden2 = paddle.layer.fc(input=hidden1,
size=64,
act=paddle.activation.Relu())
# The thrid fully-connected layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = paddle.layer.fc(input=hidden2,
size=10,
act=paddle.activation.Softmax())
return predict
def convolutional_neural_network(img):
# first conv layer
conv_pool_1 = paddle.networks.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
num_channel=1,
pool_size=2,
pool_stride=2,
act=paddle.activation.Tanh())
# second conv layer
conv_pool_2 = paddle.networks.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
num_channel=20,
pool_size=2,
pool_stride=2,
act=paddle.activation.Tanh())
# The first fully-connected layer
fc1 = paddle.layer.fc(input=conv_pool_2,
size=128,
act=paddle.activation.Tanh())
# The softmax layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = paddle.layer.fc(input=fc1,
size=10,
act=paddle.activation.Softmax())
return predict
def main(): def main():
...@@ -9,40 +63,74 @@ def main(): ...@@ -9,40 +63,74 @@ def main():
name='pixel', type=paddle.data_type.dense_vector(784)) name='pixel', type=paddle.data_type.dense_vector(784))
label = paddle.layer.data( label = paddle.layer.data(
name='label', type=paddle.data_type.integer_value(10)) name='label', type=paddle.data_type.integer_value(10))
hidden1 = paddle.layer.fc(input=images, size=200)
hidden2 = paddle.layer.fc(input=hidden1, size=200)
inference = paddle.layer.fc(input=hidden2,
size=10,
act=paddle.activation.Softmax())
cost = paddle.layer.classification_cost(input=inference, label=label)
parameters = paddle.parameters.create(cost) # Here we can build the prediction network in different ways. Please
# choose one by uncomment corresponding line.
predict = softmax_regression(images)
#predict = multilayer_perceptron(images)
#predict = convolutional_neural_network(images)
adam_optimizer = paddle.optimizer.Adam(learning_rate=0.01) cost = paddle.layer.classification_cost(input=predict, label=label)
try:
with gzip.open('params.tar.gz', 'r') as f:
parameters = paddle.parameters.Parameters.from_tar(f)
except IOError:
parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Momentum(
learning_rate=0.1 / 128.0,
momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
trainer = paddle.trainer.SGD(cost=cost, trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters, parameters=parameters,
update_equation=adam_optimizer) update_equation=optimizer)
lists = []
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 1000 == 0: if event.batch_id % 1000 == 0:
result = trainer.test(reader=paddle.reader.batched( print "Pass %d, Batch %d, Cost %f, %s" % (
paddle.dataset.mnist.test(), batch_size=256)) event.pass_id, event.batch_id, event.cost, event.metrics)
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % ( with gzip.open('params.tar.gz', 'w') as f:
event.pass_id, event.batch_id, event.cost, event.metrics, parameters.to_tar(f)
result.metrics)
else: elif isinstance(event, paddle.event.EndPass):
pass result = trainer.test(reader=paddle.batch(
paddle.dataset.mnist.test(), batch_size=128))
print "Test with Pass %d, Cost %f, %s\n" % (
event.pass_id, result.cost, result.metrics)
lists.append((event.pass_id, result.cost,
result.metrics['classification_error_evaluator']))
trainer.train( trainer.train(
reader=paddle.reader.batched( reader=paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=8192), paddle.dataset.mnist.train(), buf_size=8192),
batch_size=32), batch_size=128),
event_handler=event_handler) event_handler=event_handler,
num_passes=100)
# find the best pass
best = sorted(lists, key=lambda list: float(list[1]))[0]
print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
test_creator = paddle.dataset.mnist.test()
test_data = []
for item in test_creator():
test_data.append((item[0], ))
if len(test_data) == 100:
break
# output is a softmax layer. It returns probabilities.
# Shape should be (100, 10)
probs = paddle.infer(
output_layer=predict, parameters=parameters, input=test_data)
print probs.shape
if __name__ == '__main__': if __name__ == '__main__':
......
import paddle.v2 as paddle
import cPickle
import copy
def main():
paddle.init(use_gpu=False)
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
uid = paddle.layer.data(
name='user_id',
type=paddle.data_type.integer_value(
paddle.dataset.movielens.max_user_id() + 1))
usr_emb = paddle.layer.embedding(input=uid, size=32)
usr_gender_id = paddle.layer.data(
name='gender_id', type=paddle.data_type.integer_value(2))
usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16)
usr_age_id = paddle.layer.data(
name='age_id',
type=paddle.data_type.integer_value(
len(paddle.dataset.movielens.age_table)))
usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)
usr_job_id = paddle.layer.data(
name='job_id',
type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id(
) + 1))
usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)
usr_combined_features = paddle.layer.fc(
input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb],
size=200,
act=paddle.activation.Tanh())
mov_id = paddle.layer.data(
name='movie_id',
type=paddle.data_type.integer_value(
paddle.dataset.movielens.max_movie_id() + 1))
mov_emb = paddle.layer.embedding(input=mov_id, size=32)
mov_categories = paddle.layer.data(
name='category_id',
type=paddle.data_type.sparse_binary_vector(
len(paddle.dataset.movielens.movie_categories())))
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
mov_title_id = paddle.layer.data(
name='movie_title',
type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
mov_title_conv = paddle.networks.sequence_conv_pool(
input=mov_title_emb, hidden_size=32, context_len=3)
mov_combined_features = paddle.layer.fc(
input=[mov_emb, mov_categories_hidden, mov_title_conv],
size=200,
act=paddle.activation.Tanh())
inference = paddle.layer.cos_sim(
a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
cost = paddle.layer.regression_cost(
input=inference,
label=paddle.layer.data(
name='score', type=paddle.data_type.dense_vector(1)))
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=paddle.optimizer.Adam(
learning_rate=1e-4))
feeding = {
'user_id': 0,
'gender_id': 1,
'age_id': 2,
'job_id': 3,
'movie_id': 4,
'category_id': 5,
'movie_title': 6,
'score': 7
}
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d Batch %d Cost %.2f" % (
event.pass_id, event.batch_id, event.cost)
trainer.train(
reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.movielens.train(), buf_size=8192),
batch_size=256),
event_handler=event_handler,
feeding=feeding,
num_passes=1)
user_id = 234
movie_id = 345
user = paddle.dataset.movielens.user_info()[user_id]
movie = paddle.dataset.movielens.movie_info()[movie_id]
feature = user.value() + movie.value()
def reader():
yield feature
infer_dict = copy.copy(feeding)
del infer_dict['score']
prediction = paddle.infer(
output=inference,
parameters=parameters,
reader=paddle.batch(
reader, batch_size=32),
feeding=infer_dict)
print(prediction + 5) / 2
if __name__ == '__main__':
main()
import sys
import math
import numpy as np
import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05
def db_lstm():
word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(verb_dict)
mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
#8 features
def d_type(size):
return paddle.data_type.integer_value_sequence(size)
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
default_std = 1 / math.sqrt(hidden_dim) / 3.0
emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
std_0 = paddle.attr.Param(initial_std=0.)
std_default = paddle.attr.Param(initial_std=default_std)
predicate_embedding = paddle.layer.embedding(
size=word_dim,
input=predicate,
param_attr=paddle.attr.Param(
name='vemb', initial_std=default_std))
mark_embedding = paddle.layer.embedding(
size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
paddle.layer.embedding(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0 = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
mix_hidden_lr = 1e-3
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = paddle.attr.Param(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = paddle.layer.lstmemory(
input=hidden_0,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
])
lstm = paddle.layer.lstmemory(
input=mix_hidden,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
reverse=((i % 2) == 1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = paddle.layer.mixed(
size=label_dict_len,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
crf_cost = paddle.layer.crf(size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(
name='crfw',
initial_std=default_std,
learning_rate=mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(name='crfw'))
return crf_cost, crf_dec
def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32).reshape(h, w)
def main():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
crf_cost, crf_dec = db_lstm()
# create parameters
parameters = paddle.parameters.create([crf_cost, crf_dec])
# create optimizer
optimizer = paddle.optimizer.Momentum(
momentum=0,
learning_rate=2e-2,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(
average_window=0.5, max_average_window=10000), )
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
trainer = paddle.trainer.SGD(cost=crf_cost,
parameters=parameters,
update_equation=optimizer)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
trn_reader = paddle.batch(
paddle.reader.shuffle(
conll05.test(), buf_size=8192), batch_size=10)
feeding = {
'word_data': 0,
'ctx_n2_data': 1,
'ctx_n1_data': 2,
'ctx_0_data': 3,
'ctx_p1_data': 4,
'ctx_p2_data': 5,
'verb_data': 6,
'mark_data': 7,
'target': 8
}
trainer.train(
reader=trn_reader,
event_handler=event_handler,
num_passes=10000,
feeding=feeding)
if __name__ == '__main__':
main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle.v2 as paddle
def convolution_net(input_dim, class_dim=2, emb_dim=128, hid_dim=128):
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
conv_3 = paddle.networks.sequence_conv_pool(
input=emb, context_len=3, hidden_size=hid_dim)
conv_4 = paddle.networks.sequence_conv_pool(
input=emb, context_len=4, hidden_size=hid_dim)
output = paddle.layer.fc(input=[conv_3, conv_4],
size=class_dim,
act=paddle.activation.Softmax())
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
def stacked_lstm_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=512,
stacked_num=3):
"""
A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network,
consisting three LSTM layers. This configure is referred to
the paper as following url, but use fewer layrs.
http://www.aclweb.org/anthology/P15-1109
input_dim: here is word dictionary dimension.
class_dim: number of categories.
emb_dim: dimension of word embedding.
hid_dim: dimension of hidden layer.
stacked_num: number of stacked lstm-hidden layer.
"""
assert stacked_num % 2 == 1
layer_attr = paddle.attr.Extra(drop_rate=0.5)
fc_para_attr = paddle.attr.Param(learning_rate=1e-3)
lstm_para_attr = paddle.attr.Param(initial_std=0., learning_rate=1.)
para_attr = [fc_para_attr, lstm_para_attr]
bias_attr = paddle.attr.Param(initial_std=0., l2_rate=0.)
relu = paddle.activation.Relu()
linear = paddle.activation.Linear()
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
fc1 = paddle.layer.fc(input=emb,
size=hid_dim,
act=linear,
bias_attr=bias_attr)
lstm1 = paddle.layer.lstmemory(
input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1):
fc = paddle.layer.fc(input=inputs,
size=hid_dim,
act=linear,
param_attr=para_attr,
bias_attr=bias_attr)
lstm = paddle.layer.lstmemory(
input=fc,
reverse=(i % 2) == 0,
act=relu,
bias_attr=bias_attr,
layer_attr=layer_attr)
inputs = [fc, lstm]
fc_last = paddle.layer.pooling(
input=inputs[0], pooling_type=paddle.pooling.Max())
lstm_last = paddle.layer.pooling(
input=inputs[1], pooling_type=paddle.pooling.Max())
output = paddle.layer.fc(input=[fc_last, lstm_last],
size=class_dim,
act=paddle.activation.Softmax(),
bias_attr=bias_attr,
param_attr=para_attr)
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
if __name__ == '__main__':
# init
paddle.init(use_gpu=False)
#data
print 'load dictionary...'
word_dict = paddle.dataset.imdb.word_dict()
dict_dim = len(word_dict)
class_dim = 2
train_reader = paddle.batch(
paddle.reader.shuffle(
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=100)
test_reader = paddle.batch(
lambda: paddle.dataset.imdb.test(word_dict), batch_size=100)
feeding = {'word': 0, 'label': 1}
# network config
# Please choose the way to build the network
# by uncommenting the corresponding line.
cost = convolution_net(dict_dim, class_dim=class_dim)
# cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
# create parameters
parameters = paddle.parameters.create(cost)
# create optimizer
adam_optimizer = paddle.optimizer.Adam(
learning_rate=2e-3,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(average_window=0.5))
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=test_reader, feeding=feeding)
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# create trainer
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=adam_optimizer)
trainer.train(
reader=train_reader,
event_handler=event_handler,
feeding=feeding,
num_passes=2)
import sys
import paddle.v2 as paddle
def seqToseq_net(source_dict_dim, target_dict_dim):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
#### Encoder
src_word_id = paddle.layer.data(
name='source_language_word',
type=paddle.data_type.integer_value_sequence(source_dict_dim))
src_embedding = paddle.layer.embedding(
input=src_word_id,
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
src_forward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size)
src_backward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
#### Decoder
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
encoded_proj += paddle.layer.full_matrix_projection(
input=encoded_vector)
backward_first = paddle.layer.first_seq(input=src_backward)
with paddle.layer.mixed(
size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
decoder_boot += paddle.layer.full_matrix_projection(
input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = paddle.layer.memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = paddle.networks.simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem)
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
decoder_inputs += paddle.layer.full_matrix_projection(
input=current_word)
gru_step = paddle.layer.gru_step(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
with paddle.layer.mixed(
size=target_dict_dim,
bias_attr=True,
act=paddle.activation.Softmax()) as out:
out += paddle.layer.full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
def main():
paddle.init(use_gpu=False, trainer_count=1)
# source and target dict dim.
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
# define network topology
cost = seqToseq_net(source_dict_dim, target_dict_dim)
parameters = paddle.parameters.create(cost)
# define optimize method and trainer
optimizer = paddle.optimizer.Adam(
learning_rate=5e-5,
regularization=paddle.optimizer.L2Regularization(rate=1e-3))
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# define data reader
feeding = {
'source_language_word': 0,
'target_language_word': 1,
'target_language_next_word': 2
}
wmt14_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size=dict_size), buf_size=8192),
batch_size=5)
# define event_handler callback
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 10 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
# start to train
trainer.train(
reader=wmt14_reader,
event_handler=event_handler,
num_passes=10000,
feeding=feeding)
if __name__ == '__main__':
main()
import math
import paddle.v2 as paddle
dictsize = 1953
embsize = 32
hiddensize = 256
N = 5
def wordemb(inlayer):
wordemb = paddle.layer.table_projection(
input=inlayer,
size=embsize,
param_attr=paddle.attr.Param(
name="_proj",
initial_std=0.001,
learning_rate=1,
l2_rate=0, ))
return wordemb
def main():
paddle.init(use_gpu=False, trainer_count=1)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
firstword = paddle.layer.data(
name="firstw", type=paddle.data_type.integer_value(dict_size))
secondword = paddle.layer.data(
name="secondw", type=paddle.data_type.integer_value(dict_size))
thirdword = paddle.layer.data(
name="thirdw", type=paddle.data_type.integer_value(dict_size))
fourthword = paddle.layer.data(
name="fourthw", type=paddle.data_type.integer_value(dict_size))
nextword = paddle.layer.data(
name="fifthw", type=paddle.data_type.integer_value(dict_size))
Efirst = wordemb(firstword)
Esecond = wordemb(secondword)
Ethird = wordemb(thirdword)
Efourth = wordemb(fourthword)
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
hidden1 = paddle.layer.fc(input=contextemb,
size=hiddensize,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(embsize * 8),
learning_rate=1))
predictword = paddle.layer.fc(input=hidden1,
size=dict_size,
bias_attr=paddle.attr.Param(learning_rate=2),
act=paddle.activation.Softmax())
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
result = trainer.test(
paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), 32))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics,
result.metrics)
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
parameters = paddle.parameters.create(cost)
adam_optimizer = paddle.optimizer.Adam(
learning_rate=3e-3,
regularization=paddle.optimizer.L2Regularization(8e-4))
trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer)
trainer.train(
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
num_passes=30,
event_handler=event_handler)
if __name__ == '__main__':
main()
API中文手册 API
============ ===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
predict/swig_py_paddle_cn.rst 模型配置 <v2/model_configs.rst>
数据访问 <v2/data.rst>
训练与应用 <v2/run_logic.rst>
API API
=== ===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
predict/swig_py_paddle_en.rst v2/model_configs.rst
v2/data.rst
v2/run_logic.rst
API中文手册
============
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst
API
===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_en.rst
===========
Activation
===========
Abs
===
.. automodule:: paddle.v2.activation
:members: Abs
:noindex:
Exp
===
.. automodule:: paddle.v2.activation
:members: Exp
:noindex:
Identity
========
.. automodule:: paddle.v2.activation
:members: Identity
:noindex:
Linear
======
.. automodule:: paddle.v2.activation
:members: Linear
:noindex:
Log
===
.. automodule:: paddle.v2.activation
:members: Log
:noindex:
Square
======
.. automodule:: paddle.v2.activation
:members: Square
:noindex:
Sigmoid
=======
.. automodule:: paddle.v2.activation
:members: Sigmoid
:noindex:
Softmax
=======
.. automodule:: paddle.v2.activation
:members: Softmax
:noindex:
SequenceSoftmax
===============
.. automodule:: paddle.v2.activation
:members: SequenceSoftmax
:noindex:
Relu
====
.. automodule:: paddle.v2.activation
:members: Relu
:noindex:
BRelu
=====
.. automodule:: paddle.v2.activation
:members: BRelu
:noindex:
SoftRelu
========
.. automodule:: paddle.v2.activation
:members: SoftRelu
:noindex:
Tanh
====
.. automodule:: paddle.v2.activation
:members: Tanh
:noindex:
STanh
=====
.. automodule:: paddle.v2.activation
:members: STanh
:noindex:
Parameter Attribute
===================
.. automodule:: paddle.v2.attr
:members:
:noindex:
.. _api_v2.layer:
======
Layers
======
Data layer
===========
.. _api_v2.layer_data:
data
----
.. automodule:: paddle.v2.layer
:members: data
:noindex:
Fully Connected Layers
======================
.. _api_v2.layer_fc:
fc
--
.. automodule:: paddle.v2.layer
:members: fc
:noindex:
selective_fc
------------
.. automodule:: paddle.v2.layer
:members: selective_fc
:noindex:
Conv Layers
===========
conv_operator
-------------
.. automodule:: paddle.v2.layer
:members: conv_operator
:noindex:
conv_projection
---------------
.. automodule:: paddle.v2.layer
:members: conv_projection
:noindex:
conv_shift
----------
.. automodule:: paddle.v2.layer
:members: conv_shift
:noindex:
img_conv
--------
.. automodule:: paddle.v2.layer
:members: img_conv
:noindex:
.. _api_v2.layer_context_projection:
context_projection
------------------
.. automodule:: paddle.v2.layer
:members: context_projection
:noindex:
Image Pooling Layer
===================
img_pool
--------
.. automodule:: paddle.v2.layer
:members: img_pool
:noindex:
spp
---
.. automodule:: paddle.v2.layer
:members: spp
:noindex:
maxout
------
.. automodule:: paddle.v2.layer
:members: maxout
:noindex:
Norm Layer
==========
img_cmrnorm
-----------
.. automodule:: paddle.v2.layer
:members: img_cmrnorm
:noindex:
batch_norm
----------
.. automodule:: paddle.v2.layer
:members: batch_norm
:noindex:
sum_to_one_norm
---------------
.. automodule:: paddle.v2.layer
:members: sum_to_one_norm
:noindex:
Recurrent Layers
================
recurrent
---------
.. automodule:: paddle.v2.layer
:members: recurrent
:noindex:
lstmemory
---------
.. automodule:: paddle.v2.layer
:members: lstmemory
:noindex:
grumemory
---------
.. automodule:: paddle.v2.layer
:members: grumemory
:noindex:
Recurrent Layer Group
=====================
memory
------
.. automodule:: paddle.v2.layer
:members: memory
:noindex:
recurrent_group
---------------
.. automodule:: paddle.v2.layer
:members: recurrent_group
:noindex:
lstm_step
---------
.. automodule:: paddle.v2.layer
:members: lstm_step
:noindex:
gru_step
--------
.. automodule:: paddle.v2.layer
:members: gru_step
:noindex:
beam_search
------------
.. automodule:: paddle.v2.layer
:members: beam_search
:noindex:
get_output
----------
.. automodule:: paddle.v2.layer
:members: get_output
:noindex:
Mixed Layer
===========
.. _api_v2.layer_mixed:
mixed
-----
.. automodule:: paddle.v2.layer
:members: mixed
:noindex:
.. _api_v2.layer_embedding:
embedding
---------
.. automodule:: paddle.v2.layer
:members: embedding
:noindex:
scaling_projection
------------------
.. automodule:: paddle.v2.layer
:members: scaling_projection
:noindex:
dotmul_projection
-----------------
.. automodule:: paddle.v2.layer
:members: dotmul_projection
:noindex:
dotmul_operator
---------------
.. automodule:: paddle.v2.layer
:members: dotmul_operator
:noindex:
full_matrix_projection
----------------------
.. automodule:: paddle.v2.layer
:members: full_matrix_projection
:noindex:
identity_projection
-------------------
.. automodule:: paddle.v2.layer
:members: identity_projection
:noindex:
table_projection
----------------
.. automodule:: paddle.v2.layer
:members: table_projection
:noindex:
trans_full_matrix_projection
----------------------------
.. automodule:: paddle.v2.layer
:members: trans_full_matrix_projection
:noindex:
Aggregate Layers
================
.. _api_v2.layer_pooling:
pooling
-------
.. automodule:: paddle.v2.layer
:members: pooling
:noindex:
.. _api_v2.layer_last_seq:
last_seq
--------
.. automodule:: paddle.v2.layer
:members: last_seq
:noindex:
.. _api_v2.layer_first_seq:
first_seq
---------
.. automodule:: paddle.v2.layer
:members: first_seq
:noindex:
concat
------
.. automodule:: paddle.v2.layer
:members: concat
:noindex:
seq_concat
----------
.. automodule:: paddle.v2.layer
:members: seq_concat
:noindex:
Reshaping Layers
================
block_expand
------------
.. automodule:: paddle.v2.layer
:members: block_expand
:noindex:
.. _api_v2.layer_expand:
expand
------
.. automodule:: paddle.v2.layer
:members: expand
:noindex:
repeat
------
.. automodule:: paddle.v2.layer
:members: repeat
:noindex:
rotate
------
.. automodule:: paddle.v2.layer
:members: rotate
:noindex:
seq_reshape
-----------
.. automodule:: paddle.v2.layer
:members: seq_reshape
:noindex:
Math Layers
===========
addto
-----
.. automodule:: paddle.v2.layer
:members: addto
:noindex:
linear_comb
-----------
.. automodule:: paddle.v2.layer
:members: linear_comb
:noindex:
interpolation
-------------
.. automodule:: paddle.v2.layer
:members: interpolation
:noindex:
bilinear_interp
---------------
.. automodule:: paddle.v2.layer
:members: bilinear_interp
:noindex:
power
-----
.. automodule:: paddle.v2.layer
:members: power
:noindex:
scaling
-------
.. automodule:: paddle.v2.layer
:members: scaling
:noindex:
slope_intercept
---------------
.. automodule:: paddle.v2.layer
:members: slope_intercept
:noindex:
tensor
------
.. automodule:: paddle.v2.layer
:members: tensor
:noindex:
.. _api_v2.layer_cos_sim:
cos_sim
-------
.. automodule:: paddle.v2.layer
:members: cos_sim
:noindex:
trans
-----
.. automodule:: paddle.v2.layer
:members: trans
:noindex:
Sampling Layers
===============
maxid
-----
.. automodule:: paddle.v2.layer
:members: maxid
:noindex:
sampling_id
-----------
.. automodule:: paddle.v2.layer
:members: sampling_id
:noindex:
Slicing and Joining Layers
==========================
pad
----
.. automodule:: paddle.v2.layer
:members: pad
:noindex:
.. _api_v2.layer_costs:
Cost Layers
===========
cross_entropy_cost
------------------
.. automodule:: paddle.v2.layer
:members: cross_entropy_cost
:noindex:
cross_entropy_with_selfnorm_cost
--------------------------------
.. automodule:: paddle.v2.layer
:members: cross_entropy_with_selfnorm_cost
:noindex:
multi_binary_label_cross_entropy_cost
-------------------------------------
.. automodule:: paddle.v2.layer
:members: multi_binary_label_cross_entropy_cost
:noindex:
huber_cost
----------
.. automodule:: paddle.v2.layer
:members: huber_cost
:noindex:
lambda_cost
-----------
.. automodule:: paddle.v2.layer
:members: lambda_cost
:noindex:
rank_cost
---------
.. automodule:: paddle.v2.layer
:members: rank_cost
:noindex:
sum_cost
---------
.. automodule:: paddle.v2.layer
:members: sum_cost
:noindex:
crf
---
.. automodule:: paddle.v2.layer
:members: crf
:noindex:
crf_decoding
------------
.. automodule:: paddle.v2.layer
:members: crf_decoding
:noindex:
ctc
---
.. automodule:: paddle.v2.layer
:members: ctc
:noindex:
warp_ctc
--------
.. automodule:: paddle.v2.layer
:members: warp_ctc
:noindex:
nce
---
.. automodule:: paddle.v2.layer
:members: nce
:noindex:
hsigmoid
---------
.. automodule:: paddle.v2.layer
:members: hsigmoid
:noindex:
Check Layer
============
eos
---
.. automodule:: paddle.v2.layer
:members: eos
:noindex:
========
Networks
========
The v2.networks module contains pieces of neural network that combine multiple layers.
NLP
===
sequence_conv_pool
------------------
.. automodule:: paddle.v2.networks
:members: sequence_conv_pool
:noindex:
.. _api_trainer_config_helpers_network_text_conv_pool:
text_conv_pool
--------------
.. automodule:: paddle.v2.networks
:members: text_conv_pool
:noindex:
Images
======
img_conv_bn_pool
----------------
.. automodule:: paddle.v2.networks
:members: img_conv_bn_pool
:noindex:
img_conv_group
--------------
.. automodule:: paddle.v2.networks
:members: img_conv_group
:noindex:
.. _api_trainer_config_helpers_network_simple_img_conv_pool:
simple_img_conv_pool
--------------------
.. automodule:: paddle.v2.networks
:members: simple_img_conv_pool
:noindex:
vgg_16_network
---------------
.. automodule:: paddle.v2.networks
:members: vgg_16_network
:noindex:
Recurrent
=========
LSTM
----
lstmemory_unit
``````````````
.. automodule:: paddle.v2.networks
:members: lstmemory_unit
:noindex:
lstmemory_group
```````````````
.. automodule:: paddle.v2.networks
:members: lstmemory_group
:noindex:
simple_lstm
```````````
.. automodule:: paddle.v2.networks
:members: simple_lstm
:noindex:
bidirectional_lstm
``````````````````
.. automodule:: paddle.v2.networks
:members: bidirectional_lstm
:noindex:
GRU
---
gru_unit
````````
.. automodule:: paddle.v2.networks
:members: gru_unit
:noindex:
gru_group
`````````
.. automodule:: paddle.v2.networks
:members: gru_group
:noindex:
simple_gru
``````````
.. automodule:: paddle.v2.networks
:members: simple_gru
:noindex:
simple_attention
----------------
.. automodule:: paddle.v2.networks
:members: simple_attention
:noindex:
Miscs
=====
dropout_layer
--------------
.. automodule:: paddle.v2.networks
:members: dropout_layer
:noindex:
.. _api_v2.optimizer:
==========
Optimizer
==========
Momentum
========
.. automodule:: paddle.v2.optimizer
:members: Momentum
:noindex:
Adam
====
.. automodule:: paddle.v2.optimizer
:members: Adam
:noindex:
Adamax
======
.. automodule:: paddle.v2.optimizer
:members: Adamax
:noindex:
AdaGrad
=======
.. automodule:: paddle.v2.optimizer
:members: AdaGrad
:noindex:
DecayedAdaGrad
==============
.. automodule:: paddle.v2.optimizer
:members: DecayedAdaGrad
:noindex:
AdaDelta
========
.. automodule:: paddle.v2.optimizer
:members: AdaDelta
:noindex:
RMSProp
=======
.. automodule:: paddle.v2.optimizer
:members: RMSProp
:noindex:
=======
Pooling
=======
BasePool
========
.. automodule:: paddle.v2.pooling
:members: BasePool
:noindex:
Avg
===
.. automodule:: paddle.v2.pooling
:members: Avg
:noindex:
Max
===
.. automodule:: paddle.v2.pooling
:members: Max
:noindex:
Sum
===
.. automodule:: paddle.v2.pooling
:members: Sum
:noindex:
SquareRootN
===========
.. automodule:: paddle.v2.pooling
:members: SquareRootN
:noindex:
CudnnAvg
========
.. automodule:: paddle.v2.pooling
:members: CudnnAvg
:noindex:
CudnnMax
========
.. automodule:: paddle.v2.pooling
:members: CudnnMax
:noindex:
========
Datasets
========
DataTypes
=========
.. automodule:: paddle.v2.data_type
:members:
:noindex:
DataFeeder
==========
.. automodule:: paddle.v2.data_feeder
:members:
:noindex:
Reader
======
.. automodule:: paddle.v2.reader
:members:
:noindex:
.. automodule:: paddle.v2.reader.creator
:members:
:noindex:
minibatch
=========
.. automodule:: paddle.v2.minibatch
:members:
:noindex:
Dataset
=======
.. automodule:: paddle.v2.dataset
:members:
:noindex:
mnist
+++++
.. automodule:: paddle.v2.dataset.mnist
:members:
:noindex:
cifar
+++++
.. automodule:: paddle.v2.dataset.cifar
:members:
:noindex:
conll05
+++++++
.. automodule:: paddle.v2.dataset.conll05
:members:
:noindex:
imdb
++++
.. automodule:: paddle.v2.dataset.imdb
:members:
:noindex:
imikolov
++++++++
.. automodule:: paddle.v2.dataset.imikolov
:members:
:noindex:
movielens
+++++++++
.. automodule:: paddle.v2.dataset.movielens
:members:
:noindex:
sentiment
+++++++++
.. automodule:: paddle.v2.dataset.sentiment
:members:
:noindex:
uci_housing
+++++++++++
.. automodule:: paddle.v2.dataset.uci_housing
:members:
:noindex:
wmt14
+++++
.. automodule:: paddle.v2.dataset.uci_housing
:members:
:noindex:
Model Configuration
===================
.. toctree::
:maxdepth: 1
config/activation.rst
config/layer.rst
config/optimizer.rst
config/pooling.rst
config/networks.rst
config/attr.rst
======================
Training and Inference
======================
Parameters
==========
.. automodule:: paddle.v2.parameters
:noindex:
Trainer
=======
.. automodule:: paddle.v2.trainer
:noindex:
Event
=====
.. automodule:: paddle.v2.event
:noindex:
Inference
=========
.. autofunction:: paddle.v2.infer
:noindex:
...@@ -4,9 +4,10 @@ At training and testing time, PaddlePaddle programs need to read data. To ease t ...@@ -4,9 +4,10 @@ At training and testing time, PaddlePaddle programs need to read data. To ease t
- A *reader* is a function that reads data (from file, network, random number generator, etc) and yields data items. - A *reader* is a function that reads data (from file, network, random number generator, etc) and yields data items.
- A *reader creator* is a function that returns a reader function. - A *reader creator* is a function that returns a reader function.
- A *reader* decorator is a function, which accepts one or more readers, and returns a reader. - A *reader decorator* is a function, which accepts one or more readers, and returns a reader.
- A *batch reader* is a function that reads data (from *reader*, file, network, random number generator, etc) and yields a batch of data items.
and provide frequently used reader creators and reader decorators. and provide function which converts reader to batch reader, frequently used reader creators and reader decorators.
## Data Reader Interface ## Data Reader Interface
...@@ -22,24 +23,69 @@ An example implementation for single item data reader creator: ...@@ -22,24 +23,69 @@ An example implementation for single item data reader creator:
```python ```python
def reader_creator_random_image(width, height): def reader_creator_random_image(width, height):
def reader(): def reader():
while True: while True:
yield numpy.random.uniform(-1, 1, size=width*height) yield numpy.random.uniform(-1, 1, size=width*height)
return reader return reader
``` ```
An example implementation for multiple item data reader creator: An example implementation for multiple item data reader creator:
```python ```python
def reader_creator_random_imageand_label(widht, height, label): def reader_creator_random_image_and_label(width, height, label):
def reader(): def reader():
while True: while True:
yield numpy.random.uniform(-1, 1, size=width*height), label yield numpy.random.uniform(-1, 1, size=width*height), label
return reader return reader
```
## Batch Reader Interface
*batch reader* can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`). The output of the iterable should be a batch (list) of data items. Each item inside the list must be a tuple.
Here are valid outputs:
```python
# a mini batch of three data items. Each data item consist three columns of data, each of which is 1.
[(1, 1, 1),
(2, 2, 2),
(3, 3, 3)]
# a mini batch of three data items, each data item is a list (single column).
[([1,1,1],),
([2,2,2],),
([3,3,3],),
```
Please note that each item inside the list must be a tuple, below is an invalid output:
```python
# wrong, [1,1,1] needs to be inside a tuple: ([1,1,1],).
# Otherwise it's ambiguous whether [1,1,1] means a single column of data [1, 1, 1],
# or three column of datas, each of which is 1.
[[1,1,1],
[2,2,2],
[3,3,3]]
```
It's easy to convert from reader to batch reader:
```python
mnist_train = paddle.dataset.mnist.train()
mnist_train_batch_reader = paddle.batch(mnist_train, 128)
```
Also easy to create custom batch reader:
```python
def custom_batch_reader():
while True:
batch = []
for i in xrange(128):
batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended.
yield batch
mnist_random_image_batch_reader = custom_batch_reader
``` ```
## Usage ## Usage
data reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`: batch reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`:
```python ```python
# two data layer is created: # two data layer is created:
...@@ -47,8 +93,8 @@ image_layer = paddle.layer.data("image", ...) ...@@ -47,8 +93,8 @@ image_layer = paddle.layer.data("image", ...)
label_layer = paddle.layer.data("label", ...) label_layer = paddle.layer.data("label", ...)
# ... # ...
batch_reader = paddle.batch(paddle.dataset.mnist.train(), 128)
paddle.train(paddle.dataset.mnist, {"image":0, "label":1}, 128, 10, ...) paddle.train(batch_reader, {"image":0, "label":1}, 128, 10, ...)
``` ```
## Data Reader Decorator ## Data Reader Decorator
...@@ -64,7 +110,7 @@ Since reading data may take time and training can not proceed without data. It i ...@@ -64,7 +110,7 @@ Since reading data may take time and training can not proceed without data. It i
Use `paddle.reader.buffered` to prefetch data: Use `paddle.reader.buffered` to prefetch data:
```python ```python
buffered_reader = paddle.reader.buffered(paddle.dataset.mnist, 100) buffered_reader = paddle.reader.buffered(paddle.dataset.mnist.train(), 100)
``` ```
`buffered_reader` will try to buffer (prefetch) `100` data entries. `buffered_reader` will try to buffer (prefetch) `100` data entries.
...@@ -77,24 +123,24 @@ We can do: ...@@ -77,24 +123,24 @@ We can do:
```python ```python
def reader_creator_random_image(width, height): def reader_creator_random_image(width, height):
def reader(): def reader():
while True: while True:
yield numpy.random.uniform(-1, 1, size=width*height) yield numpy.random.uniform(-1, 1, size=width*height)
return reader return reader
def reader_creator_bool(t): def reader_creator_bool(t):
def reader: def reader:
while True: while True:
yield t yield t
return reader return reader
true_reader = reader_creator_bool(True) true_reader = reader_creator_bool(True)
false_reader = reader_creator_bool(False) false_reader = reader_creator_bool(False)
reader = paddle.reader.compose(paddle.dataset.mnist, data_reader_creator_random_image(20, 20), true_reader, false_reader) reader = paddle.reader.compose(paddle.dataset.mnist.train(), data_reader_creator_random_image(20, 20), true_reader, false_reader)
# Skipped 1 because paddle.dataset.mnist produces two items per data entry. # Skipped 1 because paddle.dataset.mnist.train() produces two items per data entry.
# And we don't care second item at this time. # And we don't care second item at this time.
paddle.train(reader, {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...) paddle.train(paddle.batch(reader, 128), {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...)
``` ```
### Shuffle ### Shuffle
...@@ -103,16 +149,20 @@ Given shuffle buffer size `n`, `paddle.reader.shuffle` will return a data reader ...@@ -103,16 +149,20 @@ Given shuffle buffer size `n`, `paddle.reader.shuffle` will return a data reader
Example: Example:
```python ```python
reader = paddle.reader.shuffle(paddle.dataset.mnist, 512) reader = paddle.reader.shuffle(paddle.dataset.mnist.train(), 512)
``` ```
## Q & A ## Q & A
### Why return only a single entry, but not a mini batch? ### Why reader return only a single entry, but not a mini batch?
Always returning a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2).
We provide function `paddle.batch` to turn (single entry) reader into batch reader.
If a mini batch is returned, data reader need to take care of batch size. But batch size is a concept for training, it makes more sense for user to specify batch size as a parameter for `train`. ### Why do we need batch reader, isn't train take reader and batch_size as arguments sufficient?
Practically, always return a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2). In most of the case, train taking reader and batch_size as arguments would be sufficent. However sometimes user want to customize order of data entries inside a mini batch. Or even change batch size dynamically.
### Why use a dictionary but not a list to provide mapping? ### Why use a dictionary but not a list to provide mapping?
...@@ -122,22 +172,22 @@ We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["imag ...@@ -122,22 +172,22 @@ We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["imag
```python ```python
def image_reader_creator(image_path, label_path, n): def image_reader_creator(image_path, label_path, n):
def reader(): def reader():
f = open(image_path) f = open(image_path)
l = open(label_path) l = open(label_path)
images = numpy.fromfile( images = numpy.fromfile(
f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32') f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
images = images / 255.0 * 2.0 - 1.0 images = images / 255.0 * 2.0 - 1.0
labels = numpy.fromfile(l, 'ubyte', count=n).astype("int") labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
for i in xrange(n): for i in xrange(n):
yield images[i, :], labels[i] # a single entry of data is created each time yield images[i, :], labels[i] # a single entry of data is created each time
f.close() f.close()
l.close() l.close()
return reader return reader
# images_reader_creator creates a reader # images_reader_creator creates a reader
reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024) reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024)
paddle.train(reader, {"image":0, "label":1}, ...) paddle.train(paddle.batch(reader, 128), {"image":0, "label":1}, ...)
``` ```
### How is `paddle.train` implemented ### How is `paddle.train` implemented
...@@ -145,17 +195,8 @@ paddle.train(reader, {"image":0, "label":1}, ...) ...@@ -145,17 +195,8 @@ paddle.train(reader, {"image":0, "label":1}, ...)
An example implementation of paddle.train could be: An example implementation of paddle.train could be:
```python ```python
def make_minibatch(reader, minibatch_size): def train(batch_reader, mapping, batch_size, total_pass):
def ret(): for pass_idx in range(total_pass):
r = reader() for mini_batch in batch_reader(): # this loop will never end in online learning.
buf = [r.next() for x in xrange(minibatch_size)] do_forward_backward(mini_batch, mapping)
while len(buf) > 0:
yield buf
buf = [r.next() for x in xrange(minibatch_size)]
return ret
def train(reader, mapping, batch_size, total_pass):
for pass_idx in range(total_pass):
for mini_batch in make_minibatch(reader): # this loop will never end in online learning.
do_forward_backward(mini_batch, mapping)
``` ```
安装PaddlePaddle的Docker镜像 PaddlePaddle的Docker容器使用方式
============================ ================================
PaddlePaddle项目提供官方 `Docker <https://www.docker.com/>`_ 镜像。Docker镜像是我们目前唯一官方支持的部署和运行方式 PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 <https://github.com/PaddlePaddle/Paddle/issues/627>`_ 才能充分利用Mac OS X和Windows上的硬件资源
下述内容将分为如下几个类别描述。
* PaddlePaddle提供的Docker镜像版本 通过Docker容器开发PaddlePaddle
* 下载和运行Docker镜像 ------------------------------
* 注意事项
PaddlePaddle提供的Docker镜像版本 开发人员可以在Docker中开发PaddlePaddle。这样开发人员可以以一致的方式在不同的平台上工作 - Linux,Mac OS X和Windows。
--------------------------------
我们提供了12个 `Docker image <https://hub.docker.com/r/paddledev/paddle/tags/>`_ ,他们的image name都是 :code:`paddledev/paddle` ,tag分别为 1. 将开发环境构建为Docker镜像
.. code-block:: bash
+-----------------+------------------+------------------------+-----------------------+ git clone --recursive https://github.com/PaddlePaddle/Paddle
| | normal | devel | demo | cd Paddle
+=================+==================+========================+=======================+ docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile .
| CPU | cpu-latest | cpu-devel-latest | cpu-demo-latest |
+-----------------+------------------+------------------------+-----------------------+
| GPU | gpu-latest | gpu-devel-latest | gpu-demo-latest |
+-----------------+------------------+------------------------+-----------------------+
| CPU WITHOUT AVX | cpu-noavx-latest | cpu-noavx-devel-latest | cpu-noavx-demo-latest |
+-----------------+------------------+------------------------+-----------------------+
| GPU WITHOUT AVX | gpu-noavx-latest | gpu-noavx-devel-latest | gpu-noavx-demo-latest |
+-----------------+------------------+------------------------+-----------------------+
其中,横向包括三个版本,normal,devel和demo。
* Normal: 正常的Docker image,只包括paddle的二进制 请注意,默认情况下,:code:`docker build` 不会将源码导入到镜像中并编译它。如果我们想这样做,需要设置一个参数:
* Devel: 包括Paddle的二进制、编译环境和源代码
* Demo: 包括Paddle运行demo所需要的依赖
纵向包括四个版本,他们是。 .. code-block:: bash
* CPU: CPU版本。需要支持AVX指令集的CPU docker build -t paddle:dev -f paddle/scripts/docker/Dockerfile --build-arg BUILD_AND_INSTALL=ON .
* GPU: GPU版本。需要支持AVX指令集的CPU
* CPU WITHOUT AVX: CPU版本,不支持AVX指令集的CPU也可以运行
* GPU WITHOUT AVX: GPU版本,不需要AVX指令集的CPU也可以运行。
用户可以选择对应版本的docker image。使用如下脚本可以确定本机的CPU是否支持 :code:`AVX` 指令集\:
.. code-block:: bash 2. 运行开发环境
if cat /proc/cpuinfo | grep -q avx ; then echo "Support AVX"; else echo "Not support AVX"; fi 当我们编译好了 :code:`paddle:dev`, 我们可以在docker容器里做开发,源代码可以通过挂载本地文件来被载入Docker的开发环境里面:
.. code-block:: bash
如果输出 :code:`Support AVX`,则可以选择上表中的AVX版本PaddlePaddle。否则需要选择非AVX的PaddlePaddle。选择普通CPU版本的devel版本的image,则可以使用 :code:`paddledev/paddle:cpu-devel-latest` 来引用这个image。 docker run -d -p 2202:22 -v $PWD:/paddle paddle:dev
PaddlePaddle提供的镜像并不包含任何命令运行,想要运行PaddlePaddle,您需要进入镜像运行PaddlePaddle 以上代码会启动一个带有PaddlePaddle开发环境的docker容器,源代码会被挂载到 :code:`/paddle` 。
程序或者自定义一个含有启动脚本的image。具体请参考注意事项中的 :code:`使用ssh访问PaddlePaddle镜像`
下载和运行Docker镜像 请注意, :code:`paddle:dev` 的默认入口是 :code:`sshd` 。以上的 :code:`docker run` 命令其实会启动一个在2202端口监听的SSHD服务器。这样,我们就能SSH进入我们的开发容器了:
--------------------
.. code-block:: bash
为了运行PaddlePaddle的docker镜像,您需要在机器中安装好Docker。安装Docker需要您的机器 ssh root@localhost -p 2202
至少具有3.10以上的linux kernel。安装方法请参考
`Docker的官方文档 <https://docs.docker.com/engine/installation/>`_ 。如果您使用
mac osx或者是windows机器,请参考
`mac osx的安装文档 <https://docs.docker.com/engine/installation/mac/>`_ 和
`windows 的安装文档 <https://docs.docker.com/engine/installation/windows/>`_ 。
您可以使用 :code:`docker pull` 命令预先下载镜像,也可以直接执行 3. 在Docker开发环境中编译与安装PaddlPaddle代码
:code:`docker run` 命令运行镜像。执行方法如下:
.. code-block:: bash 当在容器里面的时候,可以用脚本 :code:`paddle/scripts/docker/build.sh` 来编译、安装与测试PaddlePaddle:
$ docker run -it paddledev/paddle:cpu-latest .. code-block:: bash
/paddle/paddle/scripts/docker/build.sh
即可启动和进入PaddlePaddle的container。如果运行GPU版本的PaddlePaddle,则需要先将 以上指令会在 :code:`/paddle/build` 中编译PaddlePaddle。通过以下指令可以运行单元测试:
cuda相关的Driver和设备映射进container中,脚本类似于
.. code-block:: bash
.. code-block:: bash cd /paddle/build
ctest
$ export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" 4. 在Docker容器中运行PaddlePaddle书籍
$ export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
$ docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest
进入Docker container后,运行 :code:`paddle version` 即可打印出PaddlePaddle的版本和构建 Jupyter Notebook是一个开源的web程序,大家可以通过它制作和分享带有代码、公式、图表、文字的交互式文档。用户可以通过网页浏览文档。
信息。安装完成的PaddlePaddle主体包括三个部分, :code:`paddle` 脚本, python的
:code:`paddle` 包和 :code:`py_paddle` 包。其中\:
* :code:`paddle` 脚本和 :code:`paddle` 的python包是PaddlePaddle的训练主要程序。使用 PaddlePaddle书籍是为用户和开发者制作的一个交互式的Jupyter Nodebook。
:code:`paddle` 脚本可以启动PaddlePaddle的训练进程和pserver。而 :code:`paddle` 脚本 如果您想要更深入了解deep learning,PaddlePaddle书籍一定是您最好的选择。
中的二进制使用了 :code:`paddle` 的python包来做配置文件解析等工作。
* python包 :code:`py_paddle` 是一个swig封装的PaddlePaddle包,用来做预测和简单的定制化 当您进入容器内之后,只用运行以下命令:
训练。
注意事项 .. code-block:: bash
--------
jupyter notebook
性能问题 然后在浏览器中输入以下网址:
++++++++
.. code-block:: text
由于Docker是基于容器的轻量化虚拟方案,所以在CPU的运算性能上并不会有严重的影响。 http://localhost:8888/
而GPU的驱动和设备全部映射到了容器内,所以GPU在运算性能上也不会有严重的影响。
但是如果使用了高性能的网卡,例如RDMA网卡(RoCE 40GbE 或者 IB 56GbE),或者高性能的 就这么简单,享受您的旅程!
以太网卡 (10GbE)。推荐使用将本地网卡,即 "--net=host" 来进行训练。而不使用docker
的网桥来进行网络通信。
远程访问问题和二次开发 纯CPU和GPU的docker镜像
++++++++++++++++++++++ ----------------------
由于PaddlePaddle的Docker镜像并不包含任何预定义的运行命令。所以如果想要在后台启用ssh 对于每一个PaddlePaddle版本,我们都会发布两个Docker镜像:纯CPU的和GPU的。我们通过设置 `dockerhub.com <https://hub.docker.com/r/paddledev/paddle/>`_ 自动运行以下两个命令:
远程访问,则需要进行一定的二次开发,将ssh装入系统内并开启远程访问。二次开发可以
使用Dockerfile构建一个全新的docker image。需要参考
`Dockerfile的文档 <https://docs.docker.com/engine/reference/builder/>`_ 和
`Dockerfile的最佳实践 <https://docs.docker.com/engine/userguide/eng-image/dockerfile_best-practices/>`_
两个文档。
简单的含有ssh的Dockerfile如下: .. code-block:: bash
.. code-block:: bash docker build -t paddle:cpu -f paddle/scripts/docker/Dockerfile --build-arg BUILD_AND_INSTALL=ON .
docker build -t paddle:gpu -f paddle/scripts/docker/Dockerfile.gpu --build-arg BUILD_AND_INSTALL=ON .
FROM paddledev/paddle:cpu-latest 以交互容器方式运行纯CPU的镜像:
MAINTAINER PaddlePaddle dev team <paddle-dev@baidu.com> .. code-block:: bash
RUN apt-get update docker run -it --rm paddledev/paddle:0.10.0rc1-cpu /bin/bash
RUN apt-get install -y openssh-server
RUN mkdir /var/run/sshd
RUN echo 'root:root' | chpasswd
RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config 或者,可以以后台进程方式运行容器:
RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
EXPOSE 22 .. code-block:: bash
CMD ["/usr/sbin/sshd", "-D"] docker run -d -p 2202:22 paddledev/paddle:0.10.0rc1-cpu
然后用密码 :code:`root` SSH进入容器:
使用该Dockerfile构建出镜像,然后运行这个container即可。相关命令为\: .. code-block:: bash
.. code-block:: bash ssh -p 2202 root@localhost
# cd到含有Dockerfile的路径中 SSH方式的一个优点是我们可以从多个终端进入容器。比如,一个终端运行vi,另一个终端运行Python。另一个好处是我们可以把PaddlePaddle容器运行在远程服务器上,并在笔记本上通过SSH与其连接。
$ docker build . -t paddle_ssh
# 运行这个container,将宿主机的8022端口映射到container的22端口上
$ docker run -d -p 8022:22 --name paddle_ssh_machine paddle_ssh
执行如下命令即可以关闭这个container,并且删除container中的数据\:
.. code-block:: bash 以上方法在GPU镜像里也能用-只是请不要忘记按装CUDA驱动,以及告诉Docker:
# 关闭container
$ docker stop paddle_ssh_machine
# 删除container
$ docker rm paddle_ssh_machine
如果想要在外部机器访问这个container,即可以使用ssh访问宿主机的8022端口。用户名为 .. code-block:: bash
root,密码也是root。命令为\:
.. code-block:: bash export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0rc1-gpu
$ ssh -p 8022 root@YOUR_HOST_MACHINE
至此,您就可以远程的使用PaddlePaddle啦。 非AVX镜像
---------
纯CPU镜像以及GPU镜像都会用到AVX指令集,但是2008年之前生产的旧电脑不支持AVX。以下指令能检查Linux电脑是否支持AVX:
.. code-block:: bash
if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi
如果输出是No,我们就需要手动编译一个非AVX版本的镜像:
.. code-block:: bash
cd ~
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
docker build --build-arg WITH_AVX=OFF -t paddle:cpu-noavx -f paddle/scripts/docker/Dockerfile .
docker build --build-arg WITH_AVX=OFF -t paddle:gpu-noavx -f paddle/scripts/docker/Dockerfile.gpu .
文档
----
Paddle的Docker镜像带有一个通过 `woboq code browser
<https://github.com/woboq/woboq_codebrowser>`_ 生成的HTML版本的C++源代码,便于用户浏览C++源码。
只要在Docker里启动PaddlePaddle的时候给它一个名字,就可以再运行另一个Nginx Docker镜像来服务HTML代码:
.. code-block:: bash
docker run -d --name paddle-cpu-doc paddle:0.10.0rc1-cpu
docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx
接着我们就能够打开浏览器在 http://localhost:8088/paddle/ 浏览代码。
...@@ -42,7 +42,7 @@ Windows -- in a consistent way. ...@@ -42,7 +42,7 @@ Windows -- in a consistent way.
.. code-block:: bash .. code-block:: bash
docker run -d -p 2202:22 -v $PWD:/paddle paddle:dev docker run -d -p 2202:22 -p 8888:8888 -v $PWD:/paddle paddle:dev
This runs a container of the development environment Docker image This runs a container of the development environment Docker image
with the local source tree mounted to :code:`/paddle` of the with the local source tree mounted to :code:`/paddle` of the
...@@ -82,6 +82,29 @@ Windows -- in a consistent way. ...@@ -82,6 +82,29 @@ Windows -- in a consistent way.
cd /paddle/build cd /paddle/build
ctest ctest
4. Run PaddlePaddle Book under Docker Container
The Jupyter Notebook is an open-source web application that allows
you to create and share documents that contain live code, equations,
visualizations and explanatory text in a single browser.
PaddlePaddle Book is an interactive Jupyter Notebook for users and developers.
We already exposed port 8888 for this book. If you want to
dig deeper into deep learning, PaddlePaddle Book definitely is your best choice.
Once you are inside the container, simply issue the command:
.. code-block:: bash
jupyter notebook
Then, you would back and paste the address into the local browser:
.. code-block:: text
http://localhost:8888/
That's all. Enjoy your journey!
CPU-only and GPU Images CPU-only and GPU Images
----------------------- -----------------------
...@@ -93,21 +116,21 @@ automatically runs the following commands: ...@@ -93,21 +116,21 @@ automatically runs the following commands:
.. code-block:: bash .. code-block:: bash
docker build -t paddle:cpu -f paddle/scripts/docker/Dockerfile . docker build -t paddle:cpu -f paddle/scripts/docker/Dockerfile --build-arg BUILD_AND_INSTALL=ON .
docker build -t paddle:gpu -f paddle/scripts/docker/Dockerfile.gpu . docker build -t paddle:gpu -f paddle/scripts/docker/Dockerfile.gpu --build-arg BUILD_AND_INSTALL=ON .
To run the CPU-only image as an interactive container: To run the CPU-only image as an interactive container:
.. code-block:: bash .. code-block:: bash
docker run -it --rm paddledev/paddle:cpu-latest /bin/bash docker run -it --rm paddledev/paddle:0.10.0rc1-cpu /bin/bash
or, we can run it as a daemon container or, we can run it as a daemon container
.. code-block:: bash .. code-block:: bash
docker run -d -p 2202:22 paddledev/paddle:cpu-latest docker run -d -p 2202:22 paddledev/paddle:0.10.0rc1-cpu
and SSH to this container using password :code:`root`: and SSH to this container using password :code:`root`:
...@@ -129,7 +152,7 @@ to install CUDA driver and let Docker knows about it: ...@@ -129,7 +152,7 @@ to install CUDA driver and let Docker knows about it:
export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')"
export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}')
docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:gpu-latest docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0rc1-gpu
Non-AVX Images Non-AVX Images
...@@ -171,7 +194,7 @@ container: ...@@ -171,7 +194,7 @@ container:
.. code-block:: bash .. code-block:: bash
docker run -d --name paddle-cpu-doc paddle:cpu docker run -d --name paddle-cpu-doc paddle:0.10.0rc1-cpu
docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
usage/cmd_parameter/index_cn.rst usage/cmd_parameter/index_cn.rst
usage/concepts/use_concepts_cn.rst usage/concepts/use_concepts_cn.rst
usage/cluster/cluster_train_cn.md usage/cluster/cluster_train_cn.md
usage/k8s/k8s_basis_cn.md
usage/k8s/k8s_cn.md usage/k8s/k8s_cn.md
usage/k8s/k8s_distributed_cn.md usage/k8s/k8s_distributed_cn.md
......
# Kubernetes 简介
[*Kubernetes*](http://kubernetes.io/)是Google开源的容器集群管理系统,其提供应用部署、维护、扩展机制等功能,利用Kubernetes能方便地管理跨机器运行容器化的应用。Kubernetes可以在物理机或虚拟机上运行,且支持部署到[AWS](http://kubernetes.io/docs/getting-started-guides/aws)[Azure](http://kubernetes.io/docs/getting-started-guides/azure/)[GCE](http://kubernetes.io/docs/getting-started-guides/gce)等多种公有云环境。介绍分布式训练之前,需要对[Kubernetes](http://kubernetes.io/)有一个基本的认识,下面先简要介绍一下本文用到的几个Kubernetes概念。
- [*Node*](http://kubernetes.io/docs/admin/node/) 表示一个Kubernetes集群中的一个工作节点,这个节点可以是物理机或者虚拟机,Kubernetes集群就是由node节点与master节点组成的。
- [*Pod*](http://kubernetes.io/docs/user-guide/pods/) 是一组(一个或多个)容器,pod是Kubernetes的最小调度单元,一个pod中的所有容器会被调度到同一个node上。Pod中的容器共享NET,PID,IPC,UTS等Linux namespace。由于容器之间共享NET namespace,所以它们使用同一个IP地址,可以通过*localhost*互相通信。不同pod之间可以通过IP地址访问。
- [*Job*](http://kubernetes.io/docs/user-guide/jobs/) 描述Kubernetes上运行的作业,一次作业称为一个job,通常每个job包括一个或者多个pods,job启动后会创建这些pod并开始执行一个程序,等待这个程序执行成功并返回0则成功退出,如果执行失败,也可以配置不同的重试机制。
- [*Volume*](http://kubernetes.io/docs/user-guide/volumes/) 存储卷,是pod内的容器都可以访问的共享目录,也是容器与node之间共享文件的方式,因为容器内的文件都是暂时存在的,当容器因为各种原因被销毁时,其内部的文件也会随之消失。通过volume,就可以将这些文件持久化存储。Kubernetes支持多种volume,例如hostPath(宿主机目录),gcePersistentDisk,awsElasticBlockStore等。
- [*Namespaces*](https://kubernetes.io/docs/user-guide/namespaces/) 命名空间,在kubernetes中创建的所有资源对象(例如上文的pod,job)等都属于一个命名空间,在同一个命名空间中,资源对象的名字是唯一的,不同空间的资源名可以重复,命名空间主要为了对象进行逻辑上的分组便于管理。本文只使用了默认命名空间。
- [*PersistentVolume*](https://kubernetes.io/docs/user-guide/persistent-volumes/): 和[*PersistentVolumeClaim*](https://kubernetes.io/docs/user-guide/persistent-volumes/#persistentvolumeclaims)结合,将外部的存储服务在Kubernetes中描述成为统一的资源形式,便于存储资源管理和Pod引用。
# 部署Kubernetes集群
Kubernetes提供了多种集群部署的方案,本文档内不重复介绍。这里给出集中常见的部署方法:
- [*minikube*](https://kubernetes.io/docs/getting-started-guides/minikube/): 快速在本地启动一个单机的kubernetes服务器,便于本地验证和测试。
- [*kubeadm*](http://kubernetes.io/docs/getting-started-guides/kubeadm/): 在不同操作系统,不同主机(Bare-Metal, AWS, GCE)条件下,快速部署集群。
- [*AWS EC2*](https://kubernetes.io/docs/getting-started-guides/aws/): 在aws上快速部署集群。
- [*Bare-Metal*](https://kubernetes.io/docs/getting-started-guides/centos/centos_manual_config/): 在物理机上手动部署。
可以参考[这个表格](https://kubernetes.io/docs/getting-started-guides/#table-of-solutions)选择适合您的场景的合适方案。
# 选择存储方案
容器不会保留在运行时生成的数据,job或者应用程序在容器中运行时生成的数据会在容器销毁时消失。为了完成分布式机器学习训练任务,需要有一个外部的存储服务来保存训练所需数据和训练输出。
常见的可选存储服务包括:
- [*NFS*](https://github.com/kubernetes/kubernetes/tree/master/examples/volumes/nfs): 可以将磁盘上某个目录共享给网络中其他机器访问。部署和配置比较简单,可以用于小量数据的验证。不提供分布式存储,高可用,冗余等功能。NFS的部署方法可以参考[这里](http://www.tecmint.com/how-to-setup-nfs-server-in-linux/)
- [*GlusterFS*](http://gluster.readthedocs.io/en/latest/Quick-Start-Guide/Quickstart/): 网络分布式文件系统,可以在Kubernetes中按照[这个](https://github.com/kubernetes/kubernetes/tree/master/examples/volumes/glusterfs)例子使用。
- [*Ceph*](http://docs.ceph.com/docs/master/): 分布式文件系统,支持rbd,POSIX API接口(ceph fs)和对象存储API,参考[这里](https://kubernetes.io/docs/user-guide/volumes/#rbd)
- [*MooseFS*](https://moosefs.com/documentation.html): 一个分布式的存储系统。需要先挂载到服务器Node上再通过kubernetes hostPath Volume挂载到容器中。
# 配置kubectl
## 安装kubectl
```
# OS X
curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/darwin/amd64/kubectl
# Linux
curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl
# Windows
curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/windows/amd64/kubectl.exe
```
## 配置kubectl访问你的kubernetes集群
编辑`~/.kube/config`这个配置文件,修改`Master-IP`的地址。如果使用SSL认证,则需要配置`certificate-authority``users`中的用户证书。如果是使用非SSL方式访问(比如通过8080端口),也可以去掉这些证书的配置。
```
apiVersion: v1
clusters:
- cluster:
certificate-authority: /path/to/ca.crt
server: https://[Master-IP]:443
name: minikube
contexts:
- context:
cluster: minikube
user: minikube
name: minikube
current-context: minikube
kind: Config
preferences: {}
users:
- name: minikube
user:
client-certificate: /path/to/apiserver.crt
client-key: /Users/wuyi/.minikube/apiserver.key
```
...@@ -2,181 +2,96 @@ ...@@ -2,181 +2,96 @@
前一篇文章介绍了如何在Kubernetes集群上启动一个单机PaddlePaddle训练作业 (Job)。在这篇文章里,我们介绍如何在Kubernetes集群上进行分布式PaddlePaddle训练作业。关于PaddlePaddle的分布式训练,文章 [Cluster Training](https://github.com/baidu/Paddle/blob/develop/doc/cluster/opensource/cluster_train.md)介绍了一种通过SSH远程分发任务,进行分布式训练的方法,与此不同的是,本文将介绍在Kubernetes容器管理平台上快速构建PaddlePaddle容器集群,进行分布式训练的方案。 前一篇文章介绍了如何在Kubernetes集群上启动一个单机PaddlePaddle训练作业 (Job)。在这篇文章里,我们介绍如何在Kubernetes集群上进行分布式PaddlePaddle训练作业。关于PaddlePaddle的分布式训练,文章 [Cluster Training](https://github.com/baidu/Paddle/blob/develop/doc/cluster/opensource/cluster_train.md)介绍了一种通过SSH远程分发任务,进行分布式训练的方法,与此不同的是,本文将介绍在Kubernetes容器管理平台上快速构建PaddlePaddle容器集群,进行分布式训练的方案。
## Kubernetes 基本概念 有关Kubernetes相关概念以及如何搭建和配置Kubernetes集群,可以参考[k8s_basis](./k8s_basis_cn.md)
[*Kubernetes*](http://kubernetes.io/)是Google开源的容器集群管理系统,其提供应用部署、维护、 扩展机制等功能,利用Kubernetes能方便地管理跨机器运行容器化的应用。Kubernetes可以在物理机或虚拟机上运行,且支持部署到[AWS](http://kubernetes.io/docs/getting-started-guides/aws)[Azure](http://kubernetes.io/docs/getting-started-guides/azure/)[GCE](http://kubernetes.io/docs/getting-started-guides/gce)等多种公有云环境。介绍分布式训练之前,需要对[Kubernetes](http://kubernetes.io/)有一个基本的认识,下面先简要介绍一下本文用到的几个Kubernetes概念。
- [*Node*](http://kubernetes.io/docs/admin/node/) 表示一个Kubernetes集群中的一个工作节点,这个节点可以是物理机或者虚拟机,Kubernetes集群就是由node节点与master节点组成的。
- [*Pod*](http://kubernetes.io/docs/user-guide/pods/) 是一组(一个或多个)容器,pod是Kubernetes的最小调度单元,一个pod中的所有容器会被调度到同一个node上。Pod中的容器共享NET,PID,IPC,UTS等Linux namespace。由于容器之间共享NET namespace,所以它们使用同一个IP地址,可以通过*localhost*互相通信。不同pod之间可以通过IP地址访问。
- [*Job*](http://kubernetes.io/docs/user-guide/jobs/) 是Kubernetes上运行的作业,一次作业称为一个job,通常每个job包括一个或者多个pods。
- [*Volume*](http://kubernetes.io/docs/user-guide/volumes/) 存储卷,是pod内的容器都可以访问的共享目录,也是容器与node之间共享文件的方式,因为容器内的文件都是暂时存在的,当容器因为各种原因被销毁时,其内部的文件也会随之消失。通过volume,就可以将这些文件持久化存储。Kubernetes支持多种volume,例如hostPath(宿主机目录),gcePersistentDisk,awsElasticBlockStore等。
- [*Namespaces*](http://kubernetes.io/docs/user-guide/volumes/) 命名空间,在kubernetes中创建的所有资源对象(例如上文的pod,job)等都属于一个命名空间,在同一个命名空间中,资源对象的名字是唯一的,不同空间的资源名可以重复,命名空间主要为了对象进行逻辑上的分组便于管理。本文只使用了默认命名空间。
## 整体方案 ## 整体方案
### 部署Kubernetes集群 在训练之前,用户将配置与训练数据切分好放在分布式文件系统预先分配好的目录中(不同的分布式文件系统,需要使用其制定的方式挂载后并导入数据),训练时,程序从此目录拷贝文件到容器内进行训练,将结果保存到此目录里。整体的结构图如下:
首先,我们需要拥有一个Kubernetes集群,在这个集群中所有node与pod都可以互相通信。关于Kubernetes集群搭建,可以参考[官方文档](http://kubernetes.io/docs/getting-started-guides/kubeadm/),在以后的文章中我们也会介绍AWS上搭建的方案。本文假设大家能找到几台物理机,并且可以按照官方文档在上面部署Kubernetes。在本文的环境中,Kubernetes集群中所有node都挂载了一个[MFS](http://moosefs.org/)(Moose filesystem,一种分布式文件系统)共享目录,我们通过这个目录来存放训练文件与最终输出的模型。关于MFS的安装部署,可以参考[MooseFS documentation](https://moosefs.com/documentation.html)。在训练之前,用户将配置与训练数据切分好放在MFS目录中,训练时,程序从此目录拷贝文件到容器内进行训练,将结果保存到此目录里。整体的结构图如下:
![paddle on kubernetes结构图](src/k8s-paddle-arch.png) ![paddle on kubernetes结构图](src/k8s-paddle-arch.png)
上图描述了一个3节点的分布式训练场景,Kubernetes集群的每个node上都挂载了一个MFS目录,这个目录可以通过volume的形式挂载到容器中。Kubernetes为这次训练创建了3个pod并且调度到了3个node上运行,每个pod包含一个PaddlePaddle容器。在容器创建后,会启动pserver与trainer进程,读取volume中的数据进行这次分布式训练。 上图描述了一个3节点的分布式训练场景,在每个Pod上都通过volume方式挂载分布式文件系统的一个目录用于保存训练数据和输出结果。Kubernetes为这次训练创建了3个pod并且调度到了3个node上运行,每个pod包含一个PaddlePaddle容器。在容器创建后,会启动pserver与trainer进程,读取volume中的数据进行这次分布式训练。
### 使用 Job
我们使用Kubernetes中的job这个概念来代表一次分布式训练。Job表示一次性作业,在作业完成后,Kubernetes会销毁job产生的容器并且释放相关资源。
在Kubernetes中,可以通过编写一个YAML文件,来描述这个job,在这个文件中,主要包含了一些配置信息,例如PaddlePaddle的节点个数,`paddle pserver`开放的端口个数与端口号,使用的网卡设备等,这些信息通过环境变量的形式传递给容器内的程序使用。
在一次分布式训练中,用户确定好本次训练需要的PaddlePaddle节点个数,将切分好的训练数据与配置文件上传到MFS共享目录中。然后编写这次训练的job YAML文件,提交给Kubernetes集群创建并开始作业。
### 创建PaddlePaddle节点
当Kubernetes master收到请求,解析完YAML文件后,会创建出多个pod(个数为PaddlePaddle节点数),Kubernetes会把这些pod调度到集群的node上运行。一个pod就代表一个PaddlePaddle节点,当pod被成功分配到一台物理/虚拟机上后,Kubernetes会启动pod内的容器,这个容器会根据YAML文件中的环境变量,启动`paddle pserver``paddle train`进程。 根据前文的描述,要在已有的Kubernetes集群上进行PaddlePaddle的分布式训练,按照下面步骤即可:
### 启动训练 1. [制作PaddlePaddle镜像](#制作镜像)
1. [将训练文件与切分好的数据上传到共享存储](#上传训练文件)
在容器启动后,会通过脚本来启动这次分布式训练,我们知道`paddle train`进程启动时需要知道其他节点的IP地址以及本节点的trainer_id,由于PaddlePaddle本身不提供类似服务发现的功能,所以在本文的启动脚本中,每个节点会根据job name向Kubernetes apiserver查询这个job对应的所有pod信息(Kubernetes默认会在每个容器的环境变量中写入apiserver的地址)。 1. [编写本次训练的YAML文件,创建一个Kubernetes job](#创建Job)
1. [训练结束后查看输出结果](#查看输出)
根据这些pod信息,就可以通过某种方式,为每个pod分配一个唯一的trainer_id。本文把所有pod的IP地址进行排序,将顺序作为每个PaddlePaddle节点的trainer_id。启动脚本的工作流程大致如下:
1. 查询Kubernetes apiserver获取pod信息,根据IP分配trainer_id
1. 从MFS共享目录中拷贝训练文件到容器内
1. 根据环境变量,解析出`paddle pserver``paddle train`的启动参数,启动进程
1. 训练时,PaddlePaddle会自动将结果保存在trainer_id为0的节点上,将输出路径设置为MFS目录,保存输出的文件
## 搭建过程
根据前文的描述,要在已有的Kubernetes集群上进行PaddlePaddle的分布式训练,主要分为以下几个步骤:
1. 制作PaddlePaddle镜像
1. 将训练文件与切分好的数据上传到共享存储
1. 编写本次训练的YAML文件,创建一个Kubernetes job
1. 训练结束后查看输出结果
下面就根据这几个步骤分别介绍。 下面就根据这几个步骤分别介绍。
### 制作镜像 ### 制作镜像
PaddlePaddle镜像需要提供`paddle pserver``paddle train`进程的运行环境,用这个镜像创建的容器需要有以下两个功能: PaddlePaddle镜像需要提供`paddle pserver``paddle train`进程的运行环境,用这个镜像创建的容器需要有以下两个功能:
- 拷贝训练文件到容器内 - 拷贝训练文件到容器内
- 生成`paddle pserver``paddle train`进程的启动参数,并且启动训练 - 生成`paddle pserver``paddle train`进程的启动参数,并且启动训练
因为官方镜像 `paddledev/paddle:cpu-latest` 内已经包含PaddlePaddle的执行程序但是还没上述功能,所以我们可以在这个基础上,添加启动脚本,制作新镜像来完成以上的工作。镜像的*Dockerfile*如下: 因为官方镜像 `paddledev/paddle:cpu-latest` 内已经包含PaddlePaddle的执行程序但是还没上述功能,所以我们可以在这个基础上,添加启动脚本,制作新镜像来完成以上的工作。参考镜像的[*Dockerfile*](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/usage/cluster/k8s/src/k8s_train/Dockerfile)
```Dockerfile
FROM paddledev/paddle:cpu-latest
MAINTAINER zjsxzong89@gmail.com
COPY start.sh /root/
COPY start_paddle.py /root/
CMD ["bash"," -c","/root/start.sh"]
```
[start.sh](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/usage/cluster/k8s/start.sh)文件拷贝训练文件到容器内,然后执行[start_paddle.py](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/usage/cluster/k8s/start_paddle.py)脚本启动训练,前文提到的获取其他节点IP地址,分配`trainer_id`等都在`start_paddle.py`脚本中完成。
`start_paddle.py`脚本开始时,会先进行参数的初始化与解析。
```python
parser = argparse.ArgumentParser(prog="start_paddle.py",
description='simple tool for k8s')
args, train_args_list = parser.parse_known_args()
train_args = refine_unknown_args(train_args_list)
train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2]))
podlist = getPodList()
```
然后通过函数`getPodList()`访问Kubernetes的接口来查询此job对应的所有pod信息。当所有pod都处于running状态(容器运行都运行)时,再通过函数`getIdMap(podlist)`获取trainer_id。
```python
podlist = getPodList()
# need to wait until all pods are running
while not isPodAllRunning(podlist):
time.sleep(10)
podlist = getPodList()
idMap = getIdMap(podlist)
```
在函数`getIdMap(podlist)`内部,我们通过读取`podlist`中每个pod的IP地址,将IP排序生成的序号作为trainer_id。
```python
def getIdMap(podlist):
'''
generate tainer_id by ip
'''
ips = []
for pod in podlist["items"]:
ips.append(pod["status"]["podIP"])
ips.sort()
idMap = {}
for i in range(len(ips)):
idMap[ips[i]] = i
return idMap
```
在得到`idMap`后,通过函数`startPaddle(idMap, train_args_dict)`构造`paddle pserver``paddle train`的启动参数并执行进程。
在函数`startPaddle`中,最主要的工作就是解析出`paddle pserver``paddle train`的启动参数。例如`paddle train`参数的解析,解析环境变量得到`PADDLE_NIC``PADDLE_PORT``PADDLE_PORTS_NUM`等参数,然后通过自身的IP地址在`idMap`中获取`trainerId`
```python
program = 'paddle train'
args = " --nics=" + PADDLE_NIC
args += " --port=" + str(PADDLE_PORT)
args += " --ports_num=" + str(PADDLE_PORTS_NUM)
args += " --comment=" + "paddle_process_by_paddle"
ip_string = ""
for ip in idMap.keys():
ip_string += (ip + ",")
ip_string = ip_string.rstrip(",")
args += " --pservers=" + ip_string
args_ext = ""
for key, value in train_args_dict.items():
args_ext += (' --' + key + '=' + value)
localIP = socket.gethostbyname(socket.gethostname())
trainerId = idMap[localIP]
args += " " + args_ext + " --trainer_id=" + \
str(trainerId) + " --save_dir=" + JOB_PATH_OUTPUT
```
使用 `docker build` 构建镜像:
```bash ```bash
docker build -t your_repo/paddle:mypaddle . $ cd doc/howto/usage/k8s/src/k8s_train
$ docker build -t [YOUR_REPO]/paddle:mypaddle .
``` ```
然后将构建成功的镜像上传到镜像仓库。 然后将构建成功的镜像上传到镜像仓库。
```bash ```bash
docker push your_repo/paddle:mypaddle docker push [YOUR_REPO]/paddle:mypaddle
``` ```
注意上述命令中`your_repo`表示读者所使用的Docker镜像仓库地址,读者需要替换成自己使用的仓库地址。下文使用`your_repo/paddle:mypaddle`这个地址来表示此步骤所构建出的镜像。 注意上述命令中`[YOUR_REPO]`表示读者所使用的Docker镜像仓库地址,读者需要替换成自己使用的仓库地址。下文使用`[YOUR_REPO]/paddle:mypaddle`这个地址来表示此步骤所构建出的镜像。
### 上传训练文件 ### 准备训练数据
本文使用PaddlePaddle官方的[recommendation demo](http://www.paddlepaddle.org/doc/demo/index.html#recommendation)作为这次训练的内容,我们将训练文件与数据放在一个job name命名的目录中,上传到MFS共享存储。完成后MFS上的文件内容大致如下: 这里我们通过在Kubernetes集群上启动一个Job来下载并切割数据,也可以通过修改[k8s_train](./src/k8s_train/README.md)的内容来定制image.
```bash 在启动Job之前,需要根据不同的分布式存储来绑定一个[persistentVolumeClaim](https://kubernetes.io/docs/user-guide/persistent-volumes/),生成的数据将会存储在这个volume下.
[root@paddle-kubernetes-node0 mfs]# tree -d
```yaml
apiVersion: batch/v1
kind: Job
metadata:
name: paddle-data
spec:
template:
metadata:
name: pi
spec:
hostNetwork: true
containers:
- name: paddle-data
image: paddledev/paddle-tutorial:k8s_data
imagePullPolicy: Always
volumeMounts:
- mountPath: "/mnt"
name: nfs
env:
- name: OUT_DIR
value: /home/work/mfs/paddle-cluster-job
- name: SPLIT_COUNT
value: "3"
volumes:
- name: nfs
persistentVolumeClaim:
claimName: mfs
restartPolicy: Never
```
完成后volume中的文件内容大致如下:
```base
[root@paddle-kubernetes-node0 nfsdir]$ tree -d
. .
└── paddle-cluster-job `-- paddle-cluster-job
├── data |-- 0
│   ├── 0 | `-- data
│   │ |-- 1
│   ├── 1 | `-- data
│   │ |-- 2
│   └── 2 | `-- data
├── output |-- output
└── recommendation |-- quick_start
``` ```
目录中paddle-cluster-job是本次训练对应的job name,本次训练要求有3个PaddlePaddle节点,在paddle-cluster-job/data目录中存放切分好的数据,文件夹0,1,2分别代表3个节点的trainer_id。recommendation文件夹内存放训练文件,output文件夹存放训练结果与日志。 目录中paddle-cluster-job是本次训练对应的job name,本次训练要求有3个PaddlePaddle节点,在paddle-cluster-job/data目录中存放切分好的数据,文件夹0,1,2分别代表3个节点的trainer_id。recommendation文件夹内存放训练文件,output文件夹存放训练结果与日志。
...@@ -205,7 +120,7 @@ spec: ...@@ -205,7 +120,7 @@ spec:
path: /home/work/mfs path: /home/work/mfs
containers: containers:
- name: trainer - name: trainer
image: your_repo/paddle:mypaddle image: [YOUR_REPO]/paddle:mypaddle
command: ["bin/bash", "-c", "/root/start.sh"] command: ["bin/bash", "-c", "/root/start.sh"]
env: env:
- name: JOB_NAME - name: JOB_NAME
...@@ -236,15 +151,16 @@ spec: ...@@ -236,15 +151,16 @@ spec:
`env`字段表示容器的环境变量,我们将`paddle`运行的一些参数通过这种方式传递到容器内。 `env`字段表示容器的环境变量,我们将`paddle`运行的一些参数通过这种方式传递到容器内。
`JOB_PATH`表示共享存储挂载的路径,`JOB_NAME`表示job名字,`TRAIN_CONFIG_DIR`表示本次训练文件所在目录,这三个变量组合就可以找到本次训练需要的文件路径。 环境变量 | 说明
--- | ---
`CONF_PADDLE_NIC`表示`paddle pserver`进程需要的`--nics`参数,即网卡名 JOB_PATH | 共享存储挂在的路径
JOB_NAME | Job的名字
`CONF_PADDLE_PORT`表示`paddle pserver``--port`参数,`CONF_PADDLE_PORTS_NUM`则表示稠密更新的端口数量,也就是`--ports_num`参数。 TRAIN_CONFIG_DIR | 本次训练文件所在目录,与JOB_PATH,JOB_NAME组合可以找到本次训练需要的文件路径
CONF_PADDLE_NIC | `paddle pserver`进程需要的`--nics`参数,即网卡名
`CONF_PADDLE_PORTS_NUM_SPARSE`表示稀疏更新的端口数量,也就是`--ports_num_for_sparse`参数。 CONF_PADDLE_PORT | `paddle paserver``--port`参数
CONF_PADDLE_PORTS_NUM | 稠密更新的端口数量,即`--ports_num`参数
`CONF_PADDLE_GRADIENT_NUM`表示训练节点数量,即`--num_gradient_servers`参数 CONF_PADDLE_PORTS_NUM_SPARSE | 稀疏更新的端口数量,即`--ports_num_for_sparse`参数
CONF_PADDLE_GRADIENT_NUM | 训练节点数量,即`--num_gradient_servers参数`
这些参数的具体描述,读者可以查看[这里](http://www.paddlepaddle.org/doc/ui/cmd_argument/detail_introduction.html#parameter-server-and-distributed-communication) 这些参数的具体描述,读者可以查看[这里](http://www.paddlepaddle.org/doc/ui/cmd_argument/detail_introduction.html#parameter-server-and-distributed-communication)
...@@ -289,8 +205,8 @@ I1116 09:10:17.123121 50 Util.cpp:155] commandline: ...@@ -289,8 +205,8 @@ I1116 09:10:17.123121 50 Util.cpp:155] commandline:
--ports_num=2 --comment=paddle_process_by_paddle --ports_num=2 --comment=paddle_process_by_paddle
--pservers=192.168.129.66,192.168.223.143,192.168.129.71 --pservers=192.168.129.66,192.168.223.143,192.168.129.71
--ports_num_for_sparse=2 --config=./trainer_config.py --ports_num_for_sparse=2 --config=./trainer_config.py
--trainer_count=4 --num_passes=10 --use_gpu=0 --trainer_count=4 --num_passes=10 --use_gpu=0
--log_period=50 --dot_period=10 --saving_period=1 --log_period=50 --dot_period=10 --saving_period=1
--local=0 --trainer_id=0 --local=0 --trainer_id=0
--save_dir=/home/jobpath/paddle-cluster-job/output --save_dir=/home/jobpath/paddle-cluster-job/output
I1116 09:10:17.123440 50 Util.cpp:130] Calling runInitFunctions I1116 09:10:17.123440 50 Util.cpp:130] Calling runInitFunctions
...@@ -310,3 +226,90 @@ I1116 09:10:18.019492 50 ParameterClient2.cpp:122] pserver 3 192.168.223.143: ...@@ -310,3 +226,90 @@ I1116 09:10:18.019492 50 ParameterClient2.cpp:122] pserver 3 192.168.223.143:
I1116 09:10:18.019716 50 ParameterClient2.cpp:122] pserver 4 192.168.129.71:7164 I1116 09:10:18.019716 50 ParameterClient2.cpp:122] pserver 4 192.168.129.71:7164
I1116 09:10:18.019836 50 ParameterClient2.cpp:122] pserver 5 192.168.129.71:7165 I1116 09:10:18.019836 50 ParameterClient2.cpp:122] pserver 5 192.168.129.71:7165
``` ```
## 一些细节的补充
### 使用环境变量
使用容器方式运行训练任务的Kubernetes Job,通常会使用环境变量配置Job的配置信息`start_paddle.py`提供了一个启动脚本,将环境变量转换成paddle的命令行参数:
```
API = "/api/v1/namespaces/"
JOBSELECTOR = "labelSelector=job-name="
JOB_PATH = os.getenv("JOB_PATH") + "/" + os.getenv("JOB_NAME")
JOB_PATH_OUTPUT = JOB_PATH + "/output"
JOBNAME = os.getenv("JOB_NAME")
NAMESPACE = os.getenv("JOB_NAMESPACE")
PADDLE_NIC = os.getenv("CONF_PADDLE_NIC")
PADDLE_PORT = os.getenv("CONF_PADDLE_PORT")
PADDLE_PORTS_NUM = os.getenv("CONF_PADDLE_PORTS_NUM")
PADDLE_PORTS_NUM_SPARSE = os.getenv("CONF_PADDLE_PORTS_NUM_SPARSE")
PADDLE_SERVER_NUM = os.getenv("CONF_PADDLE_GRADIENT_NUM")
```
### Pod间通信
`start_paddle.py`脚本开始时,会先进行参数的初始化与解析。
```python
parser = argparse.ArgumentParser(prog="start_paddle.py",
description='simple tool for k8s')
args, train_args_list = parser.parse_known_args()
train_args = refine_unknown_args(train_args_list)
train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2]))
podlist = getPodList()
```
然后通过函数`getPodList()`访问Kubernetes的接口来查询此job对应的所有pod信息。当所有pod都处于running状态(容器运行都运行)时,再通过函数`getIdMap(podlist)`获取trainer_id。
```python
podlist = getPodList()
# need to wait until all pods are running
while not isPodAllRunning(podlist):
time.sleep(10)
podlist = getPodList()
idMap = getIdMap(podlist)
```
* *注意*: `getPodList()`会获取当前namespace下的所有pod,如果已经有pod运行,可能会导致出错。这种集群节点管理方式会在将来使用[statfulsets](https://kubernetes.io/docs/concepts/abstractions/controllers/statefulsets/)代替。
在函数`getIdMap(podlist)`内部,我们通过读取`podlist`中每个pod的IP地址,将IP排序生成的序号作为trainer_id。
```python
def getIdMap(podlist):
'''
generate tainer_id by ip
'''
ips = []
for pod in podlist["items"]:
ips.append(pod["status"]["podIP"])
ips.sort()
idMap = {}
for i in range(len(ips)):
idMap[ips[i]] = i
return idMap
```
在得到`idMap`后,通过函数`startPaddle(idMap, train_args_dict)`构造`paddle pserver``paddle train`的启动参数并执行进程。
### 启动任务
在函数`startPaddle`中,最主要的工作就是解析出`paddle pserver``paddle train`的启动参数。例如`paddle train`参数的解析,解析环境变量得到`PADDLE_NIC``PADDLE_PORT``PADDLE_PORTS_NUM`等参数,然后通过自身的IP地址在`idMap`中获取`trainerId`
```python
program = 'paddle train'
args = " --nics=" + PADDLE_NIC
args += " --port=" + str(PADDLE_PORT)
args += " --ports_num=" + str(PADDLE_PORTS_NUM)
args += " --comment=" + "paddle_process_by_paddle"
ip_string = ""
for ip in idMap.keys():
ip_string += (ip + ",")
ip_string = ip_string.rstrip(",")
args += " --pservers=" + ip_string
args_ext = ""
for key, value in train_args_dict.items():
args_ext += (' --' + key + '=' + value)
localIP = socket.gethostbyname(socket.gethostname())
trainerId = idMap[localIP]
args += " " + args_ext + " --trainer_id=" + \
str(trainerId) + " --save_dir=" + JOB_PATH_OUTPUT
```
...@@ -132,7 +132,8 @@ def startPaddle(idMap={}, train_args_dict=None): ...@@ -132,7 +132,8 @@ def startPaddle(idMap={}, train_args_dict=None):
logDir = JOB_PATH_OUTPUT + "/node_" + str(trainerId) logDir = JOB_PATH_OUTPUT + "/node_" + str(trainerId)
if not os.path.exists(JOB_PATH_OUTPUT): if not os.path.exists(JOB_PATH_OUTPUT):
os.makedirs(JOB_PATH_OUTPUT) os.makedirs(JOB_PATH_OUTPUT)
os.mkdir(logDir) if not os.path.exists(logDir):
os.mkdir(logDir)
copyCommand = 'cp -rf ' + JOB_PATH + \ copyCommand = 'cp -rf ' + JOB_PATH + \
"/" + str(trainerId) + "/data/*" + " ./data/" "/" + str(trainerId) + "/data/*" + " ./data/"
os.system(copyCommand) os.system(copyCommand)
......
...@@ -15,13 +15,19 @@ import sys ...@@ -15,13 +15,19 @@ import sys
import os, subprocess import os, subprocess
import shlex import shlex
from recommonmark import parser, transform from recommonmark import parser, transform
try:
import py_paddle
import paddle
import paddle.v2
except ImportError:
print("Must install paddle python package before generating documentation")
sys.exit(1)
MarkdownParser = parser.CommonMarkParser MarkdownParser = parser.CommonMarkParser
AutoStructify = transform.AutoStructify AutoStructify = transform.AutoStructify
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, '@PROJ_ROOT@/python')
templates_path = ["@PROJ_ROOT@/doc_theme/templates"] templates_path = ["@PROJ_ROOT@/doc_theme/templates"]
# -- General configuration ------------------------------------------------ # -- General configuration ------------------------------------------------
......
...@@ -15,14 +15,20 @@ import sys ...@@ -15,14 +15,20 @@ import sys
import os, subprocess import os, subprocess
import shlex import shlex
from recommonmark import parser, transform from recommonmark import parser, transform
try:
import py_paddle
import paddle
import paddle.v2
except ImportError:
print("Must install paddle python package before generating documentation")
sys.exit(1)
MarkdownParser = parser.CommonMarkParser MarkdownParser = parser.CommonMarkParser
AutoStructify = transform.AutoStructify AutoStructify = transform.AutoStructify
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, '@PROJ_ROOT@/python')
templates_path = ["@PROJ_ROOT@/doc_theme/templates"] templates_path = ["@PROJ_ROOT@/doc_theme/templates"]
# -- General configuration ------------------------------------------------ # -- General configuration ------------------------------------------------
......
...@@ -156,14 +156,14 @@ define_py_data_sources2(train_list='data/train.list', ...@@ -156,14 +156,14 @@ define_py_data_sources2(train_list='data/train.list',
obj="process", obj="process",
args={"dictionary": word_dict}) args={"dictionary": word_dict})
``` ```
You can refer to the following link for more detailed examples and data formats: <a href = "../../api/data_provider/pydataprovider2_en.html">PyDataProvider2</a>. You can refer to the following link for more detailed examples and data formats: <a href = "../../api/v1/data_provider/pydataprovider2_en.html">PyDataProvider2</a>.
## Network Architecture ## Network Architecture
We will describe four kinds of network architectures in this section. We will describe four kinds of network architectures in this section.
<center> ![](./src/PipelineNetwork_en.jpg) </center> <center> ![](./src/PipelineNetwork_en.jpg) </center>
First, you will build a logistic regression model. Later, you will also get chance to build other more powerful network architectures. First, you will build a logistic regression model. Later, you will also get chance to build other more powerful network architectures.
For more detailed documentation, you could refer to: <a href = "../../api/trainer_config_helpers/layers.html">layer documentation</a>. All configuration files are in `demo/quick_start` directory. For more detailed documentation, you could refer to: <a href = "../../api/v1/trainer_config_helpers/layers.html">layer documentation</a>. All configuration files are in `demo/quick_start` directory.
### Logistic Regression ### Logistic Regression
The architecture is illustrated in the following picture: The architecture is illustrated in the following picture:
...@@ -366,7 +366,7 @@ You can use single layer LSTM model with Dropout for our text classification pro ...@@ -366,7 +366,7 @@ You can use single layer LSTM model with Dropout for our text classification pro
<br> <br>
## Optimization Algorithm ## Optimization Algorithm
<a href = "../../api/trainer_config_helpers/optimizers.html">Optimization algorithms</a> include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network. <a href = "../../api/v1/trainer_config_helpers/optimizers.html">Optimization algorithms</a> include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network.
```python ```python
settings(batch_size=128, settings(batch_size=128,
...@@ -407,7 +407,7 @@ paddle train \ ...@@ -407,7 +407,7 @@ paddle train \
--init_model_path=./output/pass-0000x --init_model_path=./output/pass-0000x
``` ```
We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to <a href = "../../api/predict/swig_py_paddle_en.html">Python Prediction API</a> tutorial,or other <a href = "../../tutorials/index_en.html">demo</a> for the prediction process using Python. You can also use the following script for inference or evaluation. We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to <a href = "../../api/v1/predict/swig_py_paddle_en.html">Python Prediction API</a> tutorial,or other <a href = "../../tutorials/index_en.html">demo</a> for the prediction process using Python. You can also use the following script for inference or evaluation.
inference script (predict.sh): inference script (predict.sh):
......
* {
font-family:"Roboto","Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;
}
body { body {
padding-top: 80px; padding-top: 80px;
background-image: none !important; background-image: none !important;
......
...@@ -144,9 +144,7 @@ void Arguments::setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError) { ...@@ -144,9 +144,7 @@ void Arguments::setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError) {
a.cpuSequenceDims = m->cast<paddle::IVector>(vec->getSharedPtr()); a.cpuSequenceDims = m->cast<paddle::IVector>(vec->getSharedPtr());
} }
float Arguments::sumCosts() const { float Arguments::sum() const { return paddle::Argument::sum(m->outputs); }
return paddle::Argument::sumCosts(m->outputs);
}
int64_t Arguments::getBatchSize(size_t idx) const throw(RangeError) { int64_t Arguments::getBatchSize(size_t idx) const throw(RangeError) {
auto& a = m->getArg(idx); auto& a = m->getArg(idx);
......
...@@ -142,6 +142,20 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) { ...@@ -142,6 +142,20 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) {
} }
} }
size_t GradientMachine::getNonStaticParameterSize() const {
return m->machine->getNonStaticParameters().size();
}
Parameter* GradientMachine::getNonStaticParameter(size_t i) throw(RangeError) {
auto params = m->machine->getNonStaticParameters();
if (i < params.size()) {
return Parameter::createFromSharedPtr(
&m->machine->getNonStaticParameters()[i]);
} else {
throw RangeError();
}
}
void GradientMachine::randParameters() { m->machine->randParameters(); } void GradientMachine::randParameters() { m->machine->randParameters(); }
Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const
......
...@@ -453,7 +453,7 @@ public: ...@@ -453,7 +453,7 @@ public:
IVector* vec) throw(RangeError); IVector* vec) throw(RangeError);
void setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError); void setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError);
float sumCosts() const; float sum() const;
private: private:
static Arguments* createByPaddleArgumentVector(void* ptr); static Arguments* createByPaddleArgumentVector(void* ptr);
...@@ -771,6 +771,9 @@ public: ...@@ -771,6 +771,9 @@ public:
size_t getParameterSize() const; size_t getParameterSize() const;
Parameter* getParameter(size_t i) throw(RangeError); Parameter* getParameter(size_t i) throw(RangeError);
size_t getNonStaticParameterSize() const;
Parameter* getNonStaticParameter(size_t i) throw(RangeError);
void randParameters(); void randParameters();
Arguments* getLayerOutput(const std::string& layerName) const Arguments* getLayerOutput(const std::string& layerName) const
......
...@@ -22,7 +22,7 @@ class TestArguments(unittest.TestCase): ...@@ -22,7 +22,7 @@ class TestArguments(unittest.TestCase):
args = swig_paddle.Arguments.createArguments(1) args = swig_paddle.Arguments.createArguments(1)
args.setSlotValue(0, m) args.setSlotValue(0, m)
self.assertAlmostEqual(27.0, args.sumCosts()) self.assertAlmostEqual(27.0, args.sum())
mat = args.getSlotValue(0) mat = args.getSlotValue(0)
assert isinstance(mat, swig_paddle.Matrix) assert isinstance(mat, swig_paddle.Matrix)
......
...@@ -346,7 +346,9 @@ Evaluator* MultiGradientMachine::makeEvaluator() const { ...@@ -346,7 +346,9 @@ Evaluator* MultiGradientMachine::makeEvaluator() const {
void MultiGradientMachine::eval(Evaluator* evaluator) const { void MultiGradientMachine::eval(Evaluator* evaluator) const {
for (auto& thread : threads_) { for (auto& thread : threads_) {
SetDevice device(thread->getDeviceId()); SetDevice device(thread->getDeviceId());
thread->getGradientMachine()->eval(evaluator); if (thread->hasInputData()) {
thread->getGradientMachine()->eval(evaluator);
}
} }
} }
...@@ -356,14 +358,19 @@ void MultiGradientMachine::getOutArgs(std::vector<Argument>* outArgs, ...@@ -356,14 +358,19 @@ void MultiGradientMachine::getOutArgs(std::vector<Argument>* outArgs,
REGISTER_TIMER("waitOutArgs"); REGISTER_TIMER("waitOutArgs");
thread->waitOutArgsReady(); thread->waitOutArgsReady();
} }
outArgs_.resize(threads_[0]->getOutArgs().size());
outArgs_.resize(threads_[threads_.size() - 1]->getOutArgs().size());
REGISTER_TIMER("copyOutArgs"); REGISTER_TIMER("copyOutArgs");
for (size_t i = 0; i < outArgs_.size(); ++i) { for (size_t i = 0; i < outArgs_.size(); ++i) {
std::vector<Argument> args; std::vector<Argument> args;
args.reserve(threads_.size()); args.reserve(threads_.size());
for (auto& thread : threads_) { for (auto& thread : threads_) {
args.push_back(thread->getOutArgs()[i]); // If the thread input is empty, then the output is empty.
auto tmp = thread->getOutArgs();
if (tmp.size() > 0) {
args.push_back(tmp[i]);
}
} }
outArgs_[i].concat(args, useGpu_, outArgStream_, passType); outArgs_[i].concat(args, useGpu_, outArgStream_, passType);
} }
...@@ -534,7 +541,7 @@ void TrainerThread::prefetch() { ...@@ -534,7 +541,7 @@ void TrainerThread::prefetch() {
void TrainerThread::forward() { void TrainerThread::forward() {
if (!inArgsCopied_) { if (!inArgsCopied_) {
REGISTER_TIMER("copyInArgs"); REGISTER_TIMER("copyInArgs");
copyInArgs(); batchSize_ = copyInArgs();
} else { } else {
inArgsCopied_ = false; inArgsCopied_ = false;
} }
...@@ -564,7 +571,12 @@ void TrainerThread::forward() { ...@@ -564,7 +571,12 @@ void TrainerThread::forward() {
{ {
REGISTER_TIMER("thread_forward"); REGISTER_TIMER("thread_forward");
gradientMachine_->forward(inArgs_, &outArgs_, multiMachine_->getPassType()); if (batchSize_ > 0) {
gradientMachine_->forward(
inArgs_, &outArgs_, multiMachine_->getPassType());
} else {
outArgs_.clear();
}
} }
outArgsReadySem_.post(); outArgsReadySem_.post();
} }
...@@ -574,7 +586,13 @@ void TrainerThread::backward() { ...@@ -574,7 +586,13 @@ void TrainerThread::backward() {
if (multiMachine_->isPassGrad()) { if (multiMachine_->isPassGrad()) {
copyOutputGrad(); copyOutputGrad();
} }
gradientMachine_->backward(backwardCallback_); if (batchSize_ > 0) {
gradientMachine_->backward(backwardCallback_);
} else {
for (size_t i = parameters_.size(); i > 0; i--) {
backwardCallback(parameters_[i - 1].get());
}
}
if (multiMachine_->hasNonstaticCpuParamters()) { if (multiMachine_->hasNonstaticCpuParamters()) {
mergeCpuGradients(); mergeCpuGradients();
} }
...@@ -732,7 +750,7 @@ void TrainerThread::notifyValueReady(int paramId) { ...@@ -732,7 +750,7 @@ void TrainerThread::notifyValueReady(int paramId) {
notifyValueDispatch(paramId); notifyValueDispatch(paramId);
} }
void TrainerThread::copyInArgs() { int TrainerThread::copyInArgs() {
const std::vector<Argument>& fullInArgs = multiMachine_->getInArgs(); const std::vector<Argument>& fullInArgs = multiMachine_->getInArgs();
int numThreads = multiMachine_->getAllThreads().size(); int numThreads = multiMachine_->getAllThreads().size();
int32_t numSequences = fullInArgs[0].getNumSequences(); int32_t numSequences = fullInArgs[0].getNumSequences();
...@@ -748,7 +766,7 @@ void TrainerThread::copyInArgs() { ...@@ -748,7 +766,7 @@ void TrainerThread::copyInArgs() {
} }
if (copySize == 0) { if (copySize == 0) {
return; return 0;
} }
for (size_t i = 0; i < fullInArgs.size(); i++) { for (size_t i = 0; i < fullInArgs.size(); i++) {
...@@ -758,6 +776,7 @@ void TrainerThread::copyInArgs() { ...@@ -758,6 +776,7 @@ void TrainerThread::copyInArgs() {
copySize, copySize,
FLAGS_parallel_nn ? false : multiMachine_->useGpu()); FLAGS_parallel_nn ? false : multiMachine_->useGpu());
} }
return copySize;
} }
void TrainerThread::mergeCpuGradients() { void TrainerThread::mergeCpuGradients() {
......
...@@ -387,6 +387,9 @@ public: ...@@ -387,6 +387,9 @@ public:
/// copy the output gradient from the main GradientMachine. /// copy the output gradient from the main GradientMachine.
void copyOutputGrad(); void copyOutputGrad();
/// Whether the thread has input data.
bool hasInputData() { return batchSize_ != 0; }
protected: protected:
void mergeCpuGradients(); void mergeCpuGradients();
...@@ -407,7 +410,7 @@ protected: ...@@ -407,7 +410,7 @@ protected:
void copyGradToBufferThread(); void copyGradToBufferThread();
void gradCollectThread(); void gradCollectThread();
void copyInArgs(); int copyInArgs();
void forward(); void forward();
void backward(); void backward();
void backwardCallback(Parameter* para); void backwardCallback(Parameter* para);
...@@ -467,6 +470,7 @@ protected: ...@@ -467,6 +470,7 @@ protected:
/// indicate whether inArgs is copied before forward() /// indicate whether inArgs is copied before forward()
bool inArgsCopied_; bool inArgsCopied_;
int batchSize_;
}; };
} // namespace paddle } // namespace paddle
...@@ -24,7 +24,7 @@ bool CRFDecodingLayer::init(const LayerMap& layerMap, ...@@ -24,7 +24,7 @@ bool CRFDecodingLayer::init(const LayerMap& layerMap,
return false; return false;
} }
crf_.reset(new LinearChainCRF( crf_.reset(new LinearChainCRF(
numClasses_, parameter_->getBuf(PARAMETER_VALUE)->getData(), nullptr)); numClasses_, parameter_->getBuf(PARAMETER_VALUE)->getData()));
return true; return true;
} }
......
...@@ -42,6 +42,7 @@ bool CRFLayer::init(const LayerMap& layerMap, ...@@ -42,6 +42,7 @@ bool CRFLayer::init(const LayerMap& layerMap,
CHECK_EQ(parameters_[0]->getSize(), numClasses_ * (numClasses_ + 2)); CHECK_EQ(parameters_[0]->getSize(), numClasses_ * (numClasses_ + 2));
parameter_ = parameters_[0]; parameter_ = parameters_[0];
weight_.reset(new Weight(numClasses_ + 2, numClasses_, parameter_));
// We don't need sequenceStartPositions because each sample of output_ is // We don't need sequenceStartPositions because each sample of output_ is
// for the cost of one sequence. // for the cost of one sequence.
...@@ -69,11 +70,7 @@ void CRFLayer::forward(PassType passType) { ...@@ -69,11 +70,7 @@ void CRFLayer::forward(PassType passType) {
for (size_t i = 0; i < numSequences; ++i) { for (size_t i = 0; i < numSequences; ++i) {
if (i >= crfs_.size()) { if (i >= crfs_.size()) {
crfs_.emplace_back(numClasses_, crfs_.emplace_back(numClasses_, weight_->getW()->getData());
parameter_->getBuf(PARAMETER_VALUE)->getData(),
parameter_->getBuf(PARAMETER_GRADIENT)
? parameter_->getBuf(PARAMETER_GRADIENT)->getData()
: nullptr);
} }
output_.value->getData()[i] = output_.value->getData()[i] =
crfs_[i].forward(output.value->getData() + numClasses_ * starts[i], crfs_[i].forward(output.value->getData() + numClasses_ * starts[i],
...@@ -93,22 +90,25 @@ void CRFLayer::backward(const UpdateCallback& callback) { ...@@ -93,22 +90,25 @@ void CRFLayer::backward(const UpdateCallback& callback) {
const int* starts = label.sequenceStartPositions->getData(false); const int* starts = label.sequenceStartPositions->getData(false);
int numSequences = label.sequenceStartPositions->getSize() - 1; int numSequences = label.sequenceStartPositions->getSize() - 1;
bool needWGrad = weight_->getWGrad() ? true : false;
for (int i = 0; i < numSequences; ++i) { for (int i = 0; i < numSequences; ++i) {
crfs_[i].backward(output.value->getData() + numClasses_ * starts[i], crfs_[i].backward(output.value->getData() + numClasses_ * starts[i],
output.grad->getData() + numClasses_ * starts[i],
label.ids->getData() + starts[i], label.ids->getData() + starts[i],
starts[i + 1] - starts[i]); starts[i + 1] - starts[i],
if (weightLayer_) { needWGrad);
real weight = getInputValue(*weightLayer_)->getElement(i, 0); real instanceWeight = weightLayer_
MatrixPtr grad = output.grad->subRowMatrix(starts[i], starts[i + 1]); ? getInputValue(*weightLayer_)->getElement(i, 0)
grad->mulScalar(weight); : real(1.0f);
instanceWeight *= coeff_;
MatrixPtr grad = output.grad->subRowMatrix(starts[i], starts[i + 1]);
grad->add(*crfs_[i].getXGrad(), real(1.0f), instanceWeight);
if (needWGrad) {
weight_->getWGrad()->add(
*crfs_[i].getWGrad(), real(1.0f), instanceWeight);
} }
} }
if (coeff_ != real(1.0f)) {
output.grad->mulScalar(coeff_);
}
parameter_->incUpdate(callback); parameter_->incUpdate(callback);
} }
......
...@@ -38,8 +38,9 @@ protected: ...@@ -38,8 +38,9 @@ protected:
size_t numClasses_; size_t numClasses_;
ParameterPtr parameter_; ParameterPtr parameter_;
std::vector<LinearChainCRF> crfs_; std::vector<LinearChainCRF> crfs_;
LayerPtr weightLayer_; // weight for each sequence LayerPtr weightLayer_; // weight for each sequence
real coeff_; // weight for the layer std::unique_ptr<Weight> weight_; // parameters
real coeff_; // weight for the layer
}; };
} // namespace paddle } // namespace paddle
...@@ -17,18 +17,12 @@ limitations under the License. */ ...@@ -17,18 +17,12 @@ limitations under the License. */
namespace paddle { namespace paddle {
LinearChainCRF::LinearChainCRF(int numClasses, real* para, real* grad) LinearChainCRF::LinearChainCRF(int numClasses, real* para)
: numClasses_(numClasses) { : numClasses_(numClasses) {
a_ = Matrix::create(para, 1, numClasses_); a_ = Matrix::create(para, 1, numClasses_);
b_ = Matrix::create(para + numClasses_, 1, numClasses_); b_ = Matrix::create(para + numClasses_, 1, numClasses_);
w_ = Matrix::create(para + 2 * numClasses_, numClasses_, numClasses_); w_ = Matrix::create(para + 2 * numClasses_, numClasses_, numClasses_);
if (grad) {
da_ = Matrix::create(grad, 1, numClasses_);
db_ = Matrix::create(grad + numClasses_, 1, numClasses_);
dw_ = Matrix::create(grad + 2 * numClasses_, numClasses_, numClasses_);
}
ones_ = Matrix::create(1, numClasses_); ones_ = Matrix::create(1, numClasses_);
ones_->one(); ones_->one();
...@@ -107,19 +101,24 @@ real LinearChainCRF::forward(real* x, int* s, int length) { ...@@ -107,19 +101,24 @@ real LinearChainCRF::forward(real* x, int* s, int length) {
return -ll; return -ll;
} }
void LinearChainCRF::backward(real* x, real* dx, int* s, int length) { void LinearChainCRF::backward(real* x, int* s, int length, bool needWGrad) {
MatrixPtr matX = Matrix::create(x, length, numClasses_); MatrixPtr matX = Matrix::create(x, length, numClasses_);
MatrixPtr matDX = Matrix::create(dx, length, numClasses_); Matrix::resizeOrCreate(matGrad_, length, numClasses_);
MatrixPtr matGrad = Matrix::create(length, numClasses_);
Matrix::resizeOrCreate(beta_, length, numClasses_); Matrix::resizeOrCreate(beta_, length, numClasses_);
real* b = b_->getData(); real* b = b_->getData();
real* dw = dw_ ? dw_->getData() : nullptr; if (needWGrad) {
Matrix::resizeOrCreate(matWGrad_, numClasses_ + 2, numClasses_);
matWGrad_->zeroMem();
da_ = matWGrad_->subRowMatrix(0, 1);
db_ = matWGrad_->subRowMatrix(1, 2);
dw_ = matWGrad_->subRowMatrix(2, numClasses_ + 2);
}
real* alpha = alpha_->getData(); real* alpha = alpha_->getData();
real* beta = beta_->getData(); real* beta = beta_->getData();
real* expW = expW_->getData(); real* expW = expW_->getData();
real* expX = expX_->getData(); real* expX = expX_->getData();
real* grad = matGrad->getData(); real* grad = matGrad_->getData();
for (int i = 0; i < numClasses_; ++i) { for (int i = 0; i < numClasses_; ++i) {
beta[(length - 1) * numClasses_ + i] = exp(b[i]); beta[(length - 1) * numClasses_ + i] = exp(b[i]);
...@@ -140,39 +139,38 @@ void LinearChainCRF::backward(real* x, real* dx, int* s, int length) { ...@@ -140,39 +139,38 @@ void LinearChainCRF::backward(real* x, real* dx, int* s, int length) {
normalizeL1(beta + k * numClasses_, numClasses_); normalizeL1(beta + k * numClasses_, numClasses_);
} }
matGrad->dotMul(*alpha_, *beta_); matGrad_->dotMul(*alpha_, *beta_);
matGrad->rowNormalizeL1(*matGrad); matGrad_->rowNormalizeL1(*matGrad_);
for (int k = 0; k < length; ++k) { for (int k = 0; k < length; ++k) {
grad[k * numClasses_ + s[k]] -= (real)1; grad[k * numClasses_ + s[k]] -= (real)1;
} }
matDX->add(*matGrad);
if (da_) {
da_->add(*matGrad->subMatrix(/* startRow= */ 0, /* numRows= */ 1));
}
if (db_) {
db_->add(*matGrad->subMatrix(/* startRow= */ length - 1, 1));
}
beta_->dotMul(*beta_, *expX_); if (needWGrad) {
beta_->rowNormalizeL1(*beta_); da_->add(*matGrad_->subMatrix(/* startRow= */ 0, /* numRows= */ 1));
db_->add(*matGrad_->subMatrix(/* startRow= */ length - 1, 1));
for (int k = 1; dw && k < length; ++k) { beta_->dotMul(*beta_, *expX_);
real sum = 0; beta_->rowNormalizeL1(*beta_);
for (int i = 0; i < numClasses_; ++i) {
for (int j = 0; j < numClasses_; ++j) { real* dw = dw_->getData();
sum += expW[i * numClasses_ + j] * alpha[(k - 1) * numClasses_ + i] * for (int k = 1; k < length; ++k) {
beta[k * numClasses_ + j]; real sum = 0;
for (int i = 0; i < numClasses_; ++i) {
for (int j = 0; j < numClasses_; ++j) {
sum += expW[i * numClasses_ + j] * alpha[(k - 1) * numClasses_ + i] *
beta[k * numClasses_ + j];
}
} }
} sum = 1 / sum;
sum = 1 / sum; for (int i = 0; i < numClasses_; ++i) {
for (int i = 0; i < numClasses_; ++i) { for (int j = 0; j < numClasses_; ++j) {
for (int j = 0; j < numClasses_; ++j) { dw[i * numClasses_ + j] += sum * expW[i * numClasses_ + j] *
dw[i * numClasses_ + j] += sum * expW[i * numClasses_ + j] * alpha[(k - 1) * numClasses_ + i] *
alpha[(k - 1) * numClasses_ + i] * beta[k * numClasses_ + j];
beta[k * numClasses_ + j]; }
} }
dw[s[k - 1] * numClasses_ + s[k]] -= (real)1;
} }
dw[s[k - 1] * numClasses_ + s[k]] -= (real)1;
} }
} }
......
...@@ -21,7 +21,7 @@ namespace paddle { ...@@ -21,7 +21,7 @@ namespace paddle {
class LinearChainCRF { class LinearChainCRF {
public: public:
/** /**
* The size of para and grad must be \f$(numClasses + 2) * numClasses\f$. * The size of para must be \f$(numClasses + 2) * numClasses\f$.
* The first numClasses values of para are for starting weights (\f$a\f$). * The first numClasses values of para are for starting weights (\f$a\f$).
* The next numClasses values of para are for ending weights (\f$b\f$), * The next numClasses values of para are for ending weights (\f$b\f$),
* The remaning values are for transition weights (\f$w\f$). * The remaning values are for transition weights (\f$w\f$).
...@@ -34,7 +34,7 @@ public: ...@@ -34,7 +34,7 @@ public:
* all possible * all possible
* sequences is \f$1\f$, and \f$x\f$ is the input feature to the CRF. * sequences is \f$1\f$, and \f$x\f$ is the input feature to the CRF.
*/ */
LinearChainCRF(int numClasses, real* para, real* grad); LinearChainCRF(int numClasses, real* para);
/** /**
* Calculate the negative log likelihood of s given x. * Calculate the negative log likelihood of s given x.
...@@ -45,29 +45,45 @@ public: ...@@ -45,29 +45,45 @@ public:
/** /**
* Calculate the gradient with respect to x, a, b, and w. * Calculate the gradient with respect to x, a, b, and w.
* The gradient of x will be stored in dx.
* backward() can only be called after a corresponding call to forward() with * backward() can only be called after a corresponding call to forward() with
* the same x, s and length. * the same x, s and length.
* @note The gradient is added to dx and grad (provided at constructor). * The gradient with respect to a, b, and w will not be calculated if
* needWGrad is false.
* @note Please call getWGrad() and getXGrad() to get the gradient with
* respect to (a, b, w) and x respectively.
*/ */
void backward(real* x, real* dx, int* s, int length); void backward(real* x, int* s, int length, bool needWGrad);
/** /**
* Find the most probable sequence given x. The result will be stored in s. * Find the most probable sequence given x. The result will be stored in s.
*/ */
void decode(real* x, int* s, int length); void decode(real* x, int* s, int length);
/*
* Return the gradient with respect to (a, b, w). It can only be called after
* a corresponding call to backward().
*/
MatrixPtr getWGrad() { return matWGrad_; }
/*
* Return the gradient with respect to x. It can only be called after a
* corresponding call to backward().
*/
MatrixPtr getXGrad() { return matGrad_; }
protected: protected:
int numClasses_; int numClasses_;
MatrixPtr a_; MatrixPtr a_;
MatrixPtr b_; MatrixPtr b_;
MatrixPtr w_; MatrixPtr w_;
MatrixPtr matWGrad_;
MatrixPtr da_; MatrixPtr da_;
MatrixPtr db_; MatrixPtr db_;
MatrixPtr dw_; MatrixPtr dw_;
MatrixPtr ones_; MatrixPtr ones_;
MatrixPtr expX_; MatrixPtr expX_;
MatrixPtr matGrad_;
MatrixPtr alpha_; MatrixPtr alpha_;
MatrixPtr beta_; MatrixPtr beta_;
MatrixPtr maxX_; MatrixPtr maxX_;
......
...@@ -18,6 +18,14 @@ add_unittest_without_exec(test_LayerGrad ...@@ -18,6 +18,14 @@ add_unittest_without_exec(test_LayerGrad
add_test(NAME test_LayerGrad add_test(NAME test_LayerGrad
COMMAND test_LayerGrad) COMMAND test_LayerGrad)
################ test_CRFLayerGrad ####################
add_unittest_without_exec(test_CRFLayerGrad
test_CRFLayerGrad.cpp
LayerGradUtil.cpp)
add_test(NAME test_CRFLayerGrad
COMMAND test_CRFLayerGrad)
add_unittest_without_exec(test_ActivationGrad add_unittest_without_exec(test_ActivationGrad
test_ActivationGrad.cpp test_ActivationGrad.cpp
LayerGradUtil.cpp) LayerGradUtil.cpp)
......
...@@ -24,7 +24,7 @@ real getCostSum(LayerPtr& testLayer, MatrixPtr weights) { ...@@ -24,7 +24,7 @@ real getCostSum(LayerPtr& testLayer, MatrixPtr weights) {
if (weights) { if (weights) {
outArgs[0].value->dotMul(*outArgs[0].value, *weights); outArgs[0].value->dotMul(*outArgs[0].value, *weights);
} }
return Argument::sumCosts(outArgs); return Argument::sum(outArgs);
} }
real getDiffAndPrint(real newCost1, real getDiffAndPrint(real newCost1,
...@@ -241,7 +241,7 @@ void testBatchState(LayerPtr testLayer, ...@@ -241,7 +241,7 @@ void testBatchState(LayerPtr testLayer,
std::vector<Argument> args; std::vector<Argument> args;
args.push_back(out); args.push_back(out);
EXPECT_EQ(0, Argument::sumCosts(args)) << "testBatchState failed"; EXPECT_EQ(0, Argument::sum(args)) << "testBatchState failed";
for (size_t seqId = 0; seqId < numSequences; ++seqId) { for (size_t seqId = 0; seqId < numSequences; ++seqId) {
start[seqId] += seqLens[seqId]; start[seqId] += seqLens[seqId];
} }
...@@ -672,7 +672,7 @@ void testLayerGradKernel(TestConfig testConf, ...@@ -672,7 +672,7 @@ void testLayerGradKernel(TestConfig testConf,
outArgs[0].value->dotMul(*testLayer->getOutput().value, *weights); outArgs[0].value->dotMul(*testLayer->getOutput().value, *weights);
} }
real cost = Argument::sumCosts(outArgs); real cost = Argument::sum(outArgs);
LOG(INFO) << " cost " << cost; LOG(INFO) << " cost " << cost;
EXPECT_FALSE(std::isnan(cost)); EXPECT_FALSE(std::isnan(cost));
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include "ModelConfig.pb.h"
#include "paddle/gserver/layers/DataLayer.h"
#include "paddle/gserver/layers/LinearChainCRF.h"
#include "paddle/trainer/Trainer.h"
#include "LayerGradUtil.h"
#include "paddle/testing/TestUtil.h"
using namespace paddle; // NOLINT
DECLARE_int32(gpu_id);
DECLARE_bool(thread_local_rand_use_global_seed);
static inline bool getNextSequence(std::vector<int>& seq, int numClasses) {
for (auto& v : seq) {
if (++v < numClasses) {
return true;
}
v = 0;
}
return false;
}
// log(exp(x) + exp(y))
static inline real logSum(real x, real y) {
real maxValue = std::max(x, y);
if (std::isinf(maxValue)) {
return -std::numeric_limits<real>::infinity();
} else {
return maxValue + log(exp(x - maxValue) + exp(y - maxValue));
}
}
static inline std::vector<int> genRandLabels(int numClasses, int length) {
std::vector<int> labels(length);
for (int i = 0; i < length; ++i) {
labels[i] = rand() % numClasses; // NOLINT
}
return labels;
}
TEST(CRFLayer, cost) {
const int numClasses = 4;
CpuVector para(numClasses * (numClasses + 2));
real* a = para.getData();
real* b = para.getData() + numClasses;
real* w = para.getData() + 2 * numClasses;
LinearChainCRF crf(4, para.getData());
for (int length : {1, 2, 3, 10}) {
for (int tries = 0; tries < 10; ++tries) {
CpuMatrix x(length, numClasses);
x.randomizeUniform();
para.randnorm(0, 2);
std::vector<int> goldenLabels = genRandLabels(numClasses, length);
real cost = crf.forward(x.getData(), goldenLabels.data(), length);
real logZ = -std::numeric_limits<real>::infinity();
real logNominator = -std::numeric_limits<real>::infinity();
std::vector<int> testResult(length, 0);
do {
real score = a[testResult.front()];
score += x.getElement(0, testResult.front());
for (int k = 1; k < length; ++k) {
score += x.getElement(k, testResult[k]) +
w[numClasses * testResult[k - 1] + testResult[k]];
}
score += b[testResult.back()];
logZ = logSum(logZ, score);
if (goldenLabels == testResult) {
logNominator = score;
}
} while (getNextSequence(testResult, numClasses));
real trueCost = -logNominator + logZ;
real diff = fabs(trueCost - cost);
diff /= fabs(cost) < fabs(trueCost) ? fabs(cost) : fabs(trueCost);
VLOG(1) << "cost=" << cost << " trueCost=" << trueCost << " diff=" << diff
<< std::endl;
if (typeid(real) == typeid(double)) { // NOLINT
EXPECT_LE(diff, 1e-10);
} else {
EXPECT_LE(diff, 5e-3);
}
}
}
}
inline real epsilon() { return typeid(real) == typeid(double) ? 1e-10 : 0.06; }
TestConfig initTestConfig(size_t numClasses, bool withWeight) {
TestConfig config;
config.layerConfig.set_type("crf");
config.layerConfig.set_size(numClasses);
config.biasSize = 0;
config.inputDefs.push_back({INPUT_SEQUENCE_DATA,
"layer_0",
numClasses,
numClasses * (numClasses + 2)});
config.layerConfig.add_inputs();
config.inputDefs.push_back(
{INPUT_SEQUENCE_LABEL, "layer_label", numClasses, 0});
config.layerConfig.add_inputs();
if (withWeight) {
config.inputDefs.push_back({INPUT_DENSE_DIM_DATA, "layer_weight", 1, 0});
config.layerConfig.add_inputs();
}
return config;
}
TEST(Layer, CRFLayer) {
size_t numClasses = 10;
for (int tries = 0; tries < 5; ++tries) {
TestConfig config = initTestConfig(numClasses, /* withWeight= */ false);
for (int length : {1, 3, 100}) {
// Not support GPU now
testLayerGrad(config,
"crf",
length,
/* trans= */ false,
/* useGpu= */ false,
/* useWeight= */ false,
epsilon());
}
}
}
TEST(Layer, CRFLayerUseWeight) {
size_t numClasses = 10;
for (int tries = 0; tries < 5; ++tries) {
TestConfig config = initTestConfig(numClasses, /* withWeight= */ true);
for (int length : {1, 3, 100}) {
// Not support GPU now
testLayerGrad(config,
"crf",
length,
/* trans= */ false,
/* useGpu= */ false,
/* useWeight= */ false,
epsilon());
}
}
}
int main(int argc, char** argv) {
initMain(argc, argv);
hl_start();
hl_init(FLAGS_gpu_id);
FLAGS_thread_local_rand_use_global_seed = true;
srand(1);
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
...@@ -276,27 +276,6 @@ TEST(Layer, AddtoLayer) { ...@@ -276,27 +276,6 @@ TEST(Layer, AddtoLayer) {
} }
} }
TEST(Layer, CRFLayer) {
TestConfig config;
config.layerConfig.set_type("crf");
config.layerConfig.set_size(10);
config.biasSize = 0;
config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 120});
config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "layer_1", 10, 0});
config.layerConfig.add_inputs();
config.layerConfig.add_inputs();
// Not support GPU now
testLayerGrad(config,
"crf",
100,
/* trans */ false,
/* useGpu */ false,
false /*useWeight*/,
0.03 /*epsilon*/);
}
TEST(Layer, CTCLayer) { TEST(Layer, CTCLayer) {
TestConfig config; TestConfig config;
config.layerConfig.set_type("ctc"); config.layerConfig.set_type("ctc");
......
...@@ -36,7 +36,7 @@ TEST(LinearChainCRF, decoding) { ...@@ -36,7 +36,7 @@ TEST(LinearChainCRF, decoding) {
real* a = para.getData(); real* a = para.getData();
real* b = para.getData() + numClasses; real* b = para.getData() + numClasses;
real* w = para.getData() + 2 * numClasses; real* w = para.getData() + 2 * numClasses;
LinearChainCRF crf(4, para.getData(), nullptr); LinearChainCRF crf(4, para.getData());
for (int length : {1, 2, 3, 10}) { for (int length : {1, 2, 3, 10}) {
for (int tries = 0; tries < 10; ++tries) { for (int tries = 0; tries < 10; ++tries) {
CpuMatrix x(length, numClasses); CpuMatrix x(length, numClasses);
......
...@@ -163,7 +163,7 @@ struct Argument { ...@@ -163,7 +163,7 @@ struct Argument {
: sequenceStartPositions->getData(false); : sequenceStartPositions->getData(false);
} }
static inline real sumCosts(const std::vector<Argument>& arguments) { static inline real sum(const std::vector<Argument>& arguments) {
real cost = 0; real cost = 0;
for (auto& arg : arguments) { for (auto& arg : arguments) {
if (arg.value) { if (arg.value) {
......
...@@ -10,9 +10,11 @@ add_test(NAME socket_test ...@@ -10,9 +10,11 @@ add_test(NAME socket_test
add_unittest_without_exec(test_ProtoServer add_unittest_without_exec(test_ProtoServer
test_ProtoServer.cpp) test_ProtoServer.cpp)
add_test(NAME test_ProtoServer IF(NOT ON_TRAVIS)
COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port add_test(NAME test_ProtoServer
${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer) COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port
${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer)
ENDIF(NOT ON_TRAVIS)
# TODO(yuyang18): Run test_ProtoServer when with rdma # TODO(yuyang18): Run test_ProtoServer when with rdma
# add_test(NAME test_ProtoServerRDMA # add_test(NAME test_ProtoServerRDMA
......
...@@ -195,6 +195,12 @@ def __monkeypatch_gradient_machine__(): ...@@ -195,6 +195,12 @@ def __monkeypatch_gradient_machine__():
swig_paddle.GradientMachine.getParameters = getParameters swig_paddle.GradientMachine.getParameters = getParameters
def getNonStaticParameters(self):
return (self.getNonStaticParameter(i)
for i in xrange(self.getNonStaticParameterSize()))
swig_paddle.GradientMachine.getNonStaticParameters = getNonStaticParameters
def getLayerOutputs(self, layerNames): def getLayerOutputs(self, layerNames):
""" """
getLayerOutputs. get outputs of layers and return a numpy matrix dict. getLayerOutputs. get outputs of layers and return a numpy matrix dict.
......
...@@ -5,40 +5,55 @@ ARG DEBIAN_FRONTEND=noninteractive ...@@ -5,40 +5,55 @@ ARG DEBIAN_FRONTEND=noninteractive
ARG UBUNTU_MIRROR ARG UBUNTU_MIRROR
RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
# ENV variables
ARG BUILD_WOBOQ
ARG BUILD_AND_INSTALL
ARG WITH_AVX
ARG WITH_DOC
ARG WITH_STYLE_CHECK
ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF}
ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF}
ENV WITH_GPU=OFF
ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
ENV DOCKER_BUILD=TRUE
ENV HOME /root
# Add bash enhancements
COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y git python-pip python-dev openssh-server bison && \ apt-get install -y git python-pip python-dev openssh-server bison && \
apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \ apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \
apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \ apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \
apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \ apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \
apt-get install -y automake && \ apt-get install -y automake locales clang-format-3.8 && \
apt-get clean -y apt-get clean -y
# git credential to skip password typing
RUN git config --global credential.helper store
# Fix locales to en_US.UTF-8
RUN localedef -i en_US -f UTF-8 en_US.UTF-8
RUN pip install --upgrade pip && \ RUN pip install --upgrade pip && \
pip install -U "protobuf==3.1.0" && \ pip install -U 'protobuf==3.1.0' && \
pip install -U wheel pillow BeautifulSoup && \ pip install -U wheel pillow BeautifulSoup && \
pip install -U docopt PyYAML sphinx && \ pip install -U docopt PyYAML sphinx && \
pip install -U sphinx_rtd_theme recommonmark jupyter pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \
pip install -U pre-commit 'requests==2.9.2' jupyter
RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \ cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \
cd .. && rm -rf cmake-3.4.1 cd .. && rm -rf cmake-3.4.1
ARG BUILD_WOBOQ
ARG BUILD_AND_INSTALL
ARG WITH_AVX
ARG WITH_DOC
ARG WITH_STYLE_CHECK
ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF}
ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF}
ENV WITH_GPU=OFF
ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
RUN mkdir /paddle
COPY . /paddle/ COPY . /paddle/
RUN cd /paddle/ && git submodule update --init --recursive
RUN /paddle/paddle/scripts/docker/build.sh RUN /paddle/paddle/scripts/docker/build.sh
VOLUME ["/usr/share/nginx/html/data", "/usr/share/nginx/html/paddle"] VOLUME ["/usr/share/nginx/html/data", "/usr/share/nginx/html/paddle"]
# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service # Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
...@@ -48,12 +63,9 @@ RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config ...@@ -48,12 +63,9 @@ RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
EXPOSE 22 EXPOSE 22
# Jupyter Notebook directory. # Jupyter Notebook: Paddle book
RUN mkdir /notes/
WORKDIR "/notes"
EXPOSE 8888 EXPOSE 8888
RUN mkdir -p /opt/bin
COPY ./paddle/scripts/docker/entrypoint /opt/bin/ COPY ./paddle/scripts/docker/entrypoint /opt/bin/
CMD ["/opt/bin/entrypoint"] CMD ["/opt/bin/entrypoint"]
...@@ -5,40 +5,55 @@ ARG DEBIAN_FRONTEND=noninteractive ...@@ -5,40 +5,55 @@ ARG DEBIAN_FRONTEND=noninteractive
ARG UBUNTU_MIRROR ARG UBUNTU_MIRROR
RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
# ENV variables
ARG BUILD_WOBOQ
ARG BUILD_AND_INSTALL
ARG WITH_AVX
ARG WITH_DOC
ARG WITH_STYLE_CHECK
ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF}
ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF}
ENV WITH_GPU=ON
ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
ENV DOCKER_BUILD=TRUE
ENV HOME /root
# Add bash enhancements
COPY ./paddle/scripts/docker/root/ /root/
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y git python-pip python-dev openssh-server bison && \ apt-get install -y git python-pip python-dev openssh-server bison && \
apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \ apt-get install -y wget unzip tar xz-utils bzip2 gzip coreutils && \
apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \ apt-get install -y curl sed grep graphviz libjpeg-dev zlib1g-dev && \
apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \ apt-get install -y python-numpy python-matplotlib gcc g++ gfortran && \
apt-get install -y automake && \ apt-get install -y automake locales clang-format-3.8 && \
apt-get clean -y apt-get clean -y
# git credential to skip password typing
RUN git config --global credential.helper store
# Fix locales to en_US.UTF-8
RUN localedef -i en_US -f UTF-8 en_US.UTF-8
RUN pip install --upgrade pip && \ RUN pip install --upgrade pip && \
pip install -U "protobuf==3.1.0" && \ pip install -U 'protobuf==3.1.0' && \
pip install -U wheel pillow BeautifulSoup && \ pip install -U wheel pillow BeautifulSoup && \
pip install -U docopt PyYAML sphinx && \ pip install -U docopt PyYAML sphinx && \
pip install -U sphinx_rtd_theme recommonmark jupyter pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \
pip install -U pre-commit 'requests==2.9.2' jupyter
RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ RUN curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \
cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \ cd cmake-3.4.1 && ./bootstrap && make -j `nproc` && make install && \
cd .. && rm -rf cmake-3.4.1 cd .. && rm -rf cmake-3.4.1
ARG BUILD_WOBOQ
ARG BUILD_AND_INSTALL
ARG WITH_AVX
ARG WITH_DOC
ARG WITH_STYLE_CHECK
ENV BUILD_WOBOQ=${BUILD_WOBOQ:-OFF}
ENV BUILD_AND_INSTALL=${BUILD_AND_INSTALL:-OFF}
ENV WITH_GPU=ON
ENV WITH_AVX=${WITH_AVX:-ON}
ENV WITH_DOC=${WITH_DOC:-OFF}
ENV WITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
RUN mkdir /paddle
COPY . /paddle/ COPY . /paddle/
RUN cd /paddle/ && git submodule update --init --recursive
RUN /paddle/paddle/scripts/docker/build.sh RUN /paddle/paddle/scripts/docker/build.sh
VOLUME ["/usr/share/nginx/html/data", "/usr/share/nginx/html/paddle"] VOLUME ["/usr/share/nginx/html/data", "/usr/share/nginx/html/paddle"]
# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service # Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
...@@ -48,12 +63,9 @@ RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config ...@@ -48,12 +63,9 @@ RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config
RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
EXPOSE 22 EXPOSE 22
# Jupyter Notebook directory. # Jupyter Notebook: Paddle book
RUN mkdir /notes/
WORKDIR "/notes"
EXPOSE 8888 EXPOSE 8888
RUN mkdir -p /opt/bin
COPY ./paddle/scripts/docker/entrypoint /opt/bin/ COPY ./paddle/scripts/docker/entrypoint /opt/bin/
CMD ["/opt/bin/entrypoint"] CMD ["/opt/bin/entrypoint"]
...@@ -17,7 +17,8 @@ if [[ ${BUILD_AND_INSTALL:-OFF} == 'ON' ]]; then ...@@ -17,7 +17,8 @@ if [[ ${BUILD_AND_INSTALL:-OFF} == 'ON' ]]; then
fi fi
mkdir -p /paddle/build # -p means no error if exists mkdir -p /paddle/build # -p means no error if exists
cd /paddle/build # clean local cmake and third_party cache
cd /paddle/build && rm -rf * && rm -rf ../third_party
cmake .. \ cmake .. \
-DWITH_DOC=${WITH_DOC:-OFF} \ -DWITH_DOC=${WITH_DOC:-OFF} \
-DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_GPU=${WITH_GPU:-OFF} \
...@@ -56,6 +57,12 @@ if [[ ${BUILD_AND_INSTALL:-OFF} == 'ON' ]]; then ...@@ -56,6 +57,12 @@ if [[ ${BUILD_AND_INSTALL:-OFF} == 'ON' ]]; then
pip install /usr/local/opt/paddle/share/wheels/py_paddle*linux*.whl pip install /usr/local/opt/paddle/share/wheels/py_paddle*linux*.whl
pip install /usr/local/opt/paddle/share/wheels/paddle*.whl pip install /usr/local/opt/paddle/share/wheels/paddle*.whl
paddle version paddle version
if [[ ${DOCKER_BUILD:-FALSE} == 'TRUE' ]]; then
# reduce docker image size
rm -rf /paddle/build
rm -rf /usr/local/opt/paddle/share/wheels/
fi
fi fi
trap : 0 trap : 0
#!/bin/bash #!/bin/bash
LOG=/var/log/all
touch $LOG /usr/sbin/sshd -D &
jupyter notebook --ip=0.0.0.0 /paddle/book/
/usr/sbin/sshd -D >> $LOG &
jupyter notebook --ip=0.0.0.0 /notes/ >> $LOG &
tail -f $LOG
# Locales
export LC_ALL=en_US.UTF-8
export LANG=en_US.UTF-8
export LANGUAGE=en_US.UTF-8
# Aliases
alias rm='rm -i'
alias cp='cp -i'
alias mv='mv -i'
alias ls='ls -hFG'
alias l='ls -lF'
alias ll='ls -alF'
alias lt='ls -ltrF'
alias ll='ls -alF'
alias lls='ls -alSrF'
alias llt='ls -altrF'
# Colorize directory listing
alias ls="ls -ph --color=auto"
# Colorize grep
if echo hello|grep --color=auto l >/dev/null 2>&1; then
export GREP_OPTIONS="--color=auto" GREP_COLOR="1;31"
fi
# Shell
export CLICOLOR="1"
YELLOW="\[\033[1;33m\]"
NO_COLOUR="\[\033[0m\]"
GREEN="\[\033[1;32m\]"
WHITE="\[\033[1;37m\]"
source ~/.scripts/git-prompt.sh
export PS1="\[\033[1;33m\]λ $WHITE\h $GREEN\w$YELLOW\$(__git_ps1 \" \[\033[35m\]{\[\033[36m\]%s\[\033[35m\]}\")$NO_COLOUR "
# Git
source ~/.scripts/git-completion.sh
[user]
name =
email =
[alias]
st = status --branch --short
ci = commit
br = branch
co = checkout
df = diff
l = log --pretty=format:\"%h %ad | %s%d [%an]\" --graph --date=short
ll = log --stat
[merge]
tool = vimdiff
[core]
excludesfile = ~/.gitignore
editor = vim
[color]
branch = auto
diff = auto
status = auto
[color "branch"]
current = yellow reverse
local = yellow
remote = green
[color "diff"]
meta = yellow bold
frag = magenta bold
old = red bold
new = green bold
[color "status"]
added = yellow
changed = green
untracked = cyan
[push]
default = matching
\ No newline at end of file
#!bash
#
# bash/zsh completion support for core Git.
#
# Copyright (C) 2006,2007 Shawn O. Pearce <spearce@spearce.org>
# Conceptually based on gitcompletion (http://gitweb.hawaga.org.uk/).
# Distributed under the GNU General Public License, version 2.0.
#
# The contained completion routines provide support for completing:
#
# *) local and remote branch names
# *) local and remote tag names
# *) .git/remotes file names
# *) git 'subcommands'
# *) tree paths within 'ref:path/to/file' expressions
# *) file paths within current working directory and index
# *) common --long-options
#
# To use these routines:
#
# 1) Copy this file to somewhere (e.g. ~/.git-completion.sh).
# 2) Add the following line to your .bashrc/.zshrc:
# source ~/.git-completion.sh
# 3) Consider changing your PS1 to also show the current branch,
# see git-prompt.sh for details.
case "$COMP_WORDBREAKS" in
*:*) : great ;;
*) COMP_WORDBREAKS="$COMP_WORDBREAKS:"
esac
# __gitdir accepts 0 or 1 arguments (i.e., location)
# returns location of .git repo
__gitdir ()
{
if [ -z "${1-}" ]; then
if [ -n "${__git_dir-}" ]; then
echo "$__git_dir"
elif [ -n "${GIT_DIR-}" ]; then
test -d "${GIT_DIR-}" || return 1
echo "$GIT_DIR"
elif [ -d .git ]; then
echo .git
else
git rev-parse --git-dir 2>/dev/null
fi
elif [ -d "$1/.git" ]; then
echo "$1/.git"
else
echo "$1"
fi
}
# The following function is based on code from:
#
# bash_completion - programmable completion functions for bash 3.2+
#
# Copyright © 2006-2008, Ian Macdonald <ian@caliban.org>
# © 2009-2010, Bash Completion Maintainers
# <bash-completion-devel@lists.alioth.debian.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# The latest version of this software can be obtained here:
#
# http://bash-completion.alioth.debian.org/
#
# RELEASE: 2.x
# This function can be used to access a tokenized list of words
# on the command line:
#
# __git_reassemble_comp_words_by_ref '=:'
# if test "${words_[cword_-1]}" = -w
# then
# ...
# fi
#
# The argument should be a collection of characters from the list of
# word completion separators (COMP_WORDBREAKS) to treat as ordinary
# characters.
#
# This is roughly equivalent to going back in time and setting
# COMP_WORDBREAKS to exclude those characters. The intent is to
# make option types like --date=<type> and <rev>:<path> easy to
# recognize by treating each shell word as a single token.
#
# It is best not to set COMP_WORDBREAKS directly because the value is
# shared with other completion scripts. By the time the completion
# function gets called, COMP_WORDS has already been populated so local
# changes to COMP_WORDBREAKS have no effect.
#
# Output: words_, cword_, cur_.
__git_reassemble_comp_words_by_ref()
{
local exclude i j first
# Which word separators to exclude?
exclude="${1//[^$COMP_WORDBREAKS]}"
cword_=$COMP_CWORD
if [ -z "$exclude" ]; then
words_=("${COMP_WORDS[@]}")
return
fi
# List of word completion separators has shrunk;
# re-assemble words to complete.
for ((i=0, j=0; i < ${#COMP_WORDS[@]}; i++, j++)); do
# Append each nonempty word consisting of just
# word separator characters to the current word.
first=t
while
[ $i -gt 0 ] &&
[ -n "${COMP_WORDS[$i]}" ] &&
# word consists of excluded word separators
[ "${COMP_WORDS[$i]//[^$exclude]}" = "${COMP_WORDS[$i]}" ]
do
# Attach to the previous token,
# unless the previous token is the command name.
if [ $j -ge 2 ] && [ -n "$first" ]; then
((j--))
fi
first=
words_[$j]=${words_[j]}${COMP_WORDS[i]}
if [ $i = $COMP_CWORD ]; then
cword_=$j
fi
if (($i < ${#COMP_WORDS[@]} - 1)); then
((i++))
else
# Done.
return
fi
done
words_[$j]=${words_[j]}${COMP_WORDS[i]}
if [ $i = $COMP_CWORD ]; then
cword_=$j
fi
done
}
if ! type _get_comp_words_by_ref >/dev/null 2>&1; then
_get_comp_words_by_ref ()
{
local exclude cur_ words_ cword_
if [ "$1" = "-n" ]; then
exclude=$2
shift 2
fi
__git_reassemble_comp_words_by_ref "$exclude"
cur_=${words_[cword_]}
while [ $# -gt 0 ]; do
case "$1" in
cur)
cur=$cur_
;;
prev)
prev=${words_[$cword_-1]}
;;
words)
words=("${words_[@]}")
;;
cword)
cword=$cword_
;;
esac
shift
done
}
fi
__gitcompadd ()
{
local i=0
for x in $1; do
if [[ "$x" == "$3"* ]]; then
COMPREPLY[i++]="$2$x$4"
fi
done
}
# Generates completion reply, appending a space to possible completion words,
# if necessary.
# It accepts 1 to 4 arguments:
# 1: List of possible completion words.
# 2: A prefix to be added to each possible completion word (optional).
# 3: Generate possible completion matches for this word (optional).
# 4: A suffix to be appended to each possible completion word (optional).
__gitcomp ()
{
local cur_="${3-$cur}"
case "$cur_" in
--*=)
;;
*)
local c i=0 IFS=$' \t\n'
for c in $1; do
c="$c${4-}"
if [[ $c == "$cur_"* ]]; then
case $c in
--*=*|*.) ;;
*) c="$c " ;;
esac
COMPREPLY[i++]="${2-}$c"
fi
done
;;
esac
}
# Generates completion reply from newline-separated possible completion words
# by appending a space to all of them.
# It accepts 1 to 4 arguments:
# 1: List of possible completion words, separated by a single newline.
# 2: A prefix to be added to each possible completion word (optional).
# 3: Generate possible completion matches for this word (optional).
# 4: A suffix to be appended to each possible completion word instead of
# the default space (optional). If specified but empty, nothing is
# appended.
__gitcomp_nl ()
{
local IFS=$'\n'
__gitcompadd "$1" "${2-}" "${3-$cur}" "${4- }"
}
# Generates completion reply with compgen from newline-separated possible
# completion filenames.
# It accepts 1 to 3 arguments:
# 1: List of possible completion filenames, separated by a single newline.
# 2: A directory prefix to be added to each possible completion filename
# (optional).
# 3: Generate possible completion matches for this word (optional).
__gitcomp_file ()
{
local IFS=$'\n'
# XXX does not work when the directory prefix contains a tilde,
# since tilde expansion is not applied.
# This means that COMPREPLY will be empty and Bash default
# completion will be used.
__gitcompadd "$1" "${2-}" "${3-$cur}" ""
# use a hack to enable file mode in bash < 4
compopt -o filenames +o nospace 2>/dev/null ||
compgen -f /non-existing-dir/ > /dev/null
}
# Execute 'git ls-files', unless the --committable option is specified, in
# which case it runs 'git diff-index' to find out the files that can be
# committed. It return paths relative to the directory specified in the first
# argument, and using the options specified in the second argument.
__git_ls_files_helper ()
{
(
test -n "${CDPATH+set}" && unset CDPATH
cd "$1"
if [ "$2" == "--committable" ]; then
git diff-index --name-only --relative HEAD
else
# NOTE: $2 is not quoted in order to support multiple options
git ls-files --exclude-standard $2
fi
) 2>/dev/null
}
# __git_index_files accepts 1 or 2 arguments:
# 1: Options to pass to ls-files (required).
# 2: A directory path (optional).
# If provided, only files within the specified directory are listed.
# Sub directories are never recursed. Path must have a trailing
# slash.
__git_index_files ()
{
local dir="$(__gitdir)" root="${2-.}" file
if [ -d "$dir" ]; then
__git_ls_files_helper "$root" "$1" |
while read -r file; do
case "$file" in
?*/*) echo "${file%%/*}" ;;
*) echo "$file" ;;
esac
done | sort | uniq
fi
}
__git_heads ()
{
local dir="$(__gitdir)"
if [ -d "$dir" ]; then
git --git-dir="$dir" for-each-ref --format='%(refname:short)' \
refs/heads
return
fi
}
__git_tags ()
{
local dir="$(__gitdir)"
if [ -d "$dir" ]; then
git --git-dir="$dir" for-each-ref --format='%(refname:short)' \
refs/tags
return
fi
}
# __git_refs accepts 0, 1 (to pass to __gitdir), or 2 arguments
# presence of 2nd argument means use the guess heuristic employed
# by checkout for tracking branches
__git_refs ()
{
local i hash dir="$(__gitdir "${1-}")" track="${2-}"
local format refs
if [ -d "$dir" ]; then
case "$cur" in
refs|refs/*)
format="refname"
refs="${cur%/*}"
track=""
;;
*)
for i in HEAD FETCH_HEAD ORIG_HEAD MERGE_HEAD; do
if [ -e "$dir/$i" ]; then echo $i; fi
done
format="refname:short"
refs="refs/tags refs/heads refs/remotes"
;;
esac
git --git-dir="$dir" for-each-ref --format="%($format)" \
$refs
if [ -n "$track" ]; then
# employ the heuristic used by git checkout
# Try to find a remote branch that matches the completion word
# but only output if the branch name is unique
local ref entry
git --git-dir="$dir" for-each-ref --shell --format="ref=%(refname:short)" \
"refs/remotes/" | \
while read -r entry; do
eval "$entry"
ref="${ref#*/}"
if [[ "$ref" == "$cur"* ]]; then
echo "$ref"
fi
done | sort | uniq -u
fi
return
fi
case "$cur" in
refs|refs/*)
git ls-remote "$dir" "$cur*" 2>/dev/null | \
while read -r hash i; do
case "$i" in
*^{}) ;;
*) echo "$i" ;;
esac
done
;;
*)
echo "HEAD"
git for-each-ref --format="%(refname:short)" -- "refs/remotes/$dir/" | sed -e "s#^$dir/##"
;;
esac
}
# __git_refs2 requires 1 argument (to pass to __git_refs)
__git_refs2 ()
{
local i
for i in $(__git_refs "$1"); do
echo "$i:$i"
done
}
# __git_refs_remotes requires 1 argument (to pass to ls-remote)
__git_refs_remotes ()
{
local i hash
git ls-remote "$1" 'refs/heads/*' 2>/dev/null | \
while read -r hash i; do
echo "$i:refs/remotes/$1/${i#refs/heads/}"
done
}
__git_remotes ()
{
local i IFS=$'\n' d="$(__gitdir)"
test -d "$d/remotes" && ls -1 "$d/remotes"
for i in $(git --git-dir="$d" config --get-regexp 'remote\..*\.url' 2>/dev/null); do
i="${i#remote.}"
echo "${i/.url*/}"
done
}
__git_list_merge_strategies ()
{
git merge -s help 2>&1 |
sed -n -e '/[Aa]vailable strategies are: /,/^$/{
s/\.$//
s/.*://
s/^[ ]*//
s/[ ]*$//
p
}'
}
__git_merge_strategies=
# 'git merge -s help' (and thus detection of the merge strategy
# list) fails, unfortunately, if run outside of any git working
# tree. __git_merge_strategies is set to the empty string in
# that case, and the detection will be repeated the next time it
# is needed.
__git_compute_merge_strategies ()
{
test -n "$__git_merge_strategies" ||
__git_merge_strategies=$(__git_list_merge_strategies)
}
__git_complete_revlist_file ()
{
local pfx ls ref cur_="$cur"
case "$cur_" in
*..?*:*)
return
;;
?*:*)
ref="${cur_%%:*}"
cur_="${cur_#*:}"
case "$cur_" in
?*/*)
pfx="${cur_%/*}"
cur_="${cur_##*/}"
ls="$ref:$pfx"
pfx="$pfx/"
;;
*)
ls="$ref"
;;
esac
case "$COMP_WORDBREAKS" in
*:*) : great ;;
*) pfx="$ref:$pfx" ;;
esac
__gitcomp_nl "$(git --git-dir="$(__gitdir)" ls-tree "$ls" 2>/dev/null \
| sed '/^100... blob /{
s,^.* ,,
s,$, ,
}
/^120000 blob /{
s,^.* ,,
s,$, ,
}
/^040000 tree /{
s,^.* ,,
s,$,/,
}
s/^.* //')" \
"$pfx" "$cur_" ""
;;
*...*)
pfx="${cur_%...*}..."
cur_="${cur_#*...}"
__gitcomp_nl "$(__git_refs)" "$pfx" "$cur_"
;;
*..*)
pfx="${cur_%..*}.."
cur_="${cur_#*..}"
__gitcomp_nl "$(__git_refs)" "$pfx" "$cur_"
;;
*)
__gitcomp_nl "$(__git_refs)"
;;
esac
}
# __git_complete_index_file requires 1 argument:
# 1: the options to pass to ls-file
#
# The exception is --committable, which finds the files appropriate commit.
__git_complete_index_file ()
{
local pfx="" cur_="$cur"
case "$cur_" in
?*/*)
pfx="${cur_%/*}"
cur_="${cur_##*/}"
pfx="${pfx}/"
;;
esac
__gitcomp_file "$(__git_index_files "$1" "$pfx")" "$pfx" "$cur_"
}
__git_complete_file ()
{
__git_complete_revlist_file
}
__git_complete_revlist ()
{
__git_complete_revlist_file
}
__git_complete_remote_or_refspec ()
{
local cur_="$cur" cmd="${words[1]}"
local i c=2 remote="" pfx="" lhs=1 no_complete_refspec=0
if [ "$cmd" = "remote" ]; then
((c++))
fi
while [ $c -lt $cword ]; do
i="${words[c]}"
case "$i" in
--mirror) [ "$cmd" = "push" ] && no_complete_refspec=1 ;;
--all)
case "$cmd" in
push) no_complete_refspec=1 ;;
fetch)
return
;;
*) ;;
esac
;;
-*) ;;
*) remote="$i"; break ;;
esac
((c++))
done
if [ -z "$remote" ]; then
__gitcomp_nl "$(__git_remotes)"
return
fi
if [ $no_complete_refspec = 1 ]; then
return
fi
[ "$remote" = "." ] && remote=
case "$cur_" in
*:*)
case "$COMP_WORDBREAKS" in
*:*) : great ;;
*) pfx="${cur_%%:*}:" ;;
esac
cur_="${cur_#*:}"
lhs=0
;;
+*)
pfx="+"
cur_="${cur_#+}"
;;
esac
case "$cmd" in
fetch)
if [ $lhs = 1 ]; then
__gitcomp_nl "$(__git_refs2 "$remote")" "$pfx" "$cur_"
else
__gitcomp_nl "$(__git_refs)" "$pfx" "$cur_"
fi
;;
pull|remote)
if [ $lhs = 1 ]; then
__gitcomp_nl "$(__git_refs "$remote")" "$pfx" "$cur_"
else
__gitcomp_nl "$(__git_refs)" "$pfx" "$cur_"
fi
;;
push)
if [ $lhs = 1 ]; then
__gitcomp_nl "$(__git_refs)" "$pfx" "$cur_"
else
__gitcomp_nl "$(__git_refs "$remote")" "$pfx" "$cur_"
fi
;;
esac
}
__git_complete_strategy ()
{
__git_compute_merge_strategies
case "$prev" in
-s|--strategy)
__gitcomp "$__git_merge_strategies"
return 0
esac
case "$cur" in
--strategy=*)
__gitcomp "$__git_merge_strategies" "" "${cur##--strategy=}"
return 0
;;
esac
return 1
}
__git_commands () {
if test -n "${GIT_TESTING_COMMAND_COMPLETION:-}"
then
printf "%s" "${GIT_TESTING_COMMAND_COMPLETION}"
else
git help -a|egrep '^ [a-zA-Z0-9]'
fi
}
__git_list_all_commands ()
{
local i IFS=" "$'\n'
for i in $(__git_commands)
do
case $i in
*--*) : helper pattern;;
*) echo $i;;
esac
done
}
__git_all_commands=
__git_compute_all_commands ()
{
test -n "$__git_all_commands" ||
__git_all_commands=$(__git_list_all_commands)
}
__git_list_porcelain_commands ()
{
local i IFS=" "$'\n'
__git_compute_all_commands
for i in $__git_all_commands
do
case $i in
*--*) : helper pattern;;
applymbox) : ask gittus;;
applypatch) : ask gittus;;
archimport) : import;;
cat-file) : plumbing;;
check-attr) : plumbing;;
check-ignore) : plumbing;;
check-mailmap) : plumbing;;
check-ref-format) : plumbing;;
checkout-index) : plumbing;;
commit-tree) : plumbing;;
count-objects) : infrequent;;
credential-cache) : credentials helper;;
credential-store) : credentials helper;;
cvsexportcommit) : export;;
cvsimport) : import;;
cvsserver) : daemon;;
daemon) : daemon;;
diff-files) : plumbing;;
diff-index) : plumbing;;
diff-tree) : plumbing;;
fast-import) : import;;
fast-export) : export;;
fsck-objects) : plumbing;;
fetch-pack) : plumbing;;
fmt-merge-msg) : plumbing;;
for-each-ref) : plumbing;;
hash-object) : plumbing;;
http-*) : transport;;
index-pack) : plumbing;;
init-db) : deprecated;;
local-fetch) : plumbing;;
lost-found) : infrequent;;
ls-files) : plumbing;;
ls-remote) : plumbing;;
ls-tree) : plumbing;;
mailinfo) : plumbing;;
mailsplit) : plumbing;;
merge-*) : plumbing;;
mktree) : plumbing;;
mktag) : plumbing;;
pack-objects) : plumbing;;
pack-redundant) : plumbing;;
pack-refs) : plumbing;;
parse-remote) : plumbing;;
patch-id) : plumbing;;
peek-remote) : plumbing;;
prune) : plumbing;;
prune-packed) : plumbing;;
quiltimport) : import;;
read-tree) : plumbing;;
receive-pack) : plumbing;;
remote-*) : transport;;
repo-config) : deprecated;;
rerere) : plumbing;;
rev-list) : plumbing;;
rev-parse) : plumbing;;
runstatus) : plumbing;;
sh-setup) : internal;;
shell) : daemon;;
show-ref) : plumbing;;
send-pack) : plumbing;;
show-index) : plumbing;;
ssh-*) : transport;;
stripspace) : plumbing;;
symbolic-ref) : plumbing;;
tar-tree) : deprecated;;
unpack-file) : plumbing;;
unpack-objects) : plumbing;;
update-index) : plumbing;;
update-ref) : plumbing;;
update-server-info) : daemon;;
upload-archive) : plumbing;;
upload-pack) : plumbing;;
write-tree) : plumbing;;
var) : infrequent;;
verify-pack) : infrequent;;
verify-tag) : plumbing;;
*) echo $i;;
esac
done
}
__git_porcelain_commands=
__git_compute_porcelain_commands ()
{
__git_compute_all_commands
test -n "$__git_porcelain_commands" ||
__git_porcelain_commands=$(__git_list_porcelain_commands)
}
__git_pretty_aliases ()
{
local i IFS=$'\n'
for i in $(git --git-dir="$(__gitdir)" config --get-regexp "pretty\..*" 2>/dev/null); do
case "$i" in
pretty.*)
i="${i#pretty.}"
echo "${i/ */}"
;;
esac
done
}
__git_aliases ()
{
local i IFS=$'\n'
for i in $(git --git-dir="$(__gitdir)" config --get-regexp "alias\..*" 2>/dev/null); do
case "$i" in
alias.*)
i="${i#alias.}"
echo "${i/ */}"
;;
esac
done
}
# __git_aliased_command requires 1 argument
__git_aliased_command ()
{
local word cmdline=$(git --git-dir="$(__gitdir)" \
config --get "alias.$1")
for word in $cmdline; do
case "$word" in
\!gitk|gitk)
echo "gitk"
return
;;
\!*) : shell command alias ;;
-*) : option ;;
*=*) : setting env ;;
git) : git itself ;;
*)
echo "$word"
return
esac
done
}
# __git_find_on_cmdline requires 1 argument
__git_find_on_cmdline ()
{
local word subcommand c=1
while [ $c -lt $cword ]; do
word="${words[c]}"
for subcommand in $1; do
if [ "$subcommand" = "$word" ]; then
echo "$subcommand"
return
fi
done
((c++))
done
}
__git_has_doubledash ()
{
local c=1
while [ $c -lt $cword ]; do
if [ "--" = "${words[c]}" ]; then
return 0
fi
((c++))
done
return 1
}
# Try to count non option arguments passed on the command line for the
# specified git command.
# When options are used, it is necessary to use the special -- option to
# tell the implementation were non option arguments begin.
# XXX this can not be improved, since options can appear everywhere, as
# an example:
# git mv x -n y
#
# __git_count_arguments requires 1 argument: the git command executed.
__git_count_arguments ()
{
local word i c=0
# Skip "git" (first argument)
for ((i=1; i < ${#words[@]}; i++)); do
word="${words[i]}"
case "$word" in
--)
# Good; we can assume that the following are only non
# option arguments.
((c = 0))
;;
"$1")
# Skip the specified git command and discard git
# main options
((c = 0))
;;
?*)
((c++))
;;
esac
done
printf "%d" $c
}
__git_whitespacelist="nowarn warn error error-all fix"
_git_am ()
{
local dir="$(__gitdir)"
if [ -d "$dir"/rebase-apply ]; then
__gitcomp "--skip --continue --resolved --abort"
return
fi
case "$cur" in
--whitespace=*)
__gitcomp "$__git_whitespacelist" "" "${cur##--whitespace=}"
return
;;
--*)
__gitcomp "
--3way --committer-date-is-author-date --ignore-date
--ignore-whitespace --ignore-space-change
--interactive --keep --no-utf8 --signoff --utf8
--whitespace= --scissors
"
return
esac
}
_git_apply ()
{
case "$cur" in
--whitespace=*)
__gitcomp "$__git_whitespacelist" "" "${cur##--whitespace=}"
return
;;
--*)
__gitcomp "
--stat --numstat --summary --check --index
--cached --index-info --reverse --reject --unidiff-zero
--apply --no-add --exclude=
--ignore-whitespace --ignore-space-change
--whitespace= --inaccurate-eof --verbose
"
return
esac
}
_git_add ()
{
case "$cur" in
--*)
__gitcomp "
--interactive --refresh --patch --update --dry-run
--ignore-errors --intent-to-add
"
return
esac
# XXX should we check for --update and --all options ?
__git_complete_index_file "--others --modified"
}
_git_archive ()
{
case "$cur" in
--format=*)
__gitcomp "$(git archive --list)" "" "${cur##--format=}"
return
;;
--remote=*)
__gitcomp_nl "$(__git_remotes)" "" "${cur##--remote=}"
return
;;
--*)
__gitcomp "
--format= --list --verbose
--prefix= --remote= --exec=
"
return
;;
esac
__git_complete_file
}
_git_bisect ()
{
__git_has_doubledash && return
local subcommands="start bad good skip reset visualize replay log run"
local subcommand="$(__git_find_on_cmdline "$subcommands")"
if [ -z "$subcommand" ]; then
if [ -f "$(__gitdir)"/BISECT_START ]; then
__gitcomp "$subcommands"
else
__gitcomp "replay start"
fi
return
fi
case "$subcommand" in
bad|good|reset|skip|start)
__gitcomp_nl "$(__git_refs)"
;;
*)
;;
esac
}
_git_branch ()
{
local i c=1 only_local_ref="n" has_r="n"
while [ $c -lt $cword ]; do
i="${words[c]}"
case "$i" in
-d|-m) only_local_ref="y" ;;
-r) has_r="y" ;;
esac
((c++))
done
case "$cur" in
--set-upstream-to=*)
__gitcomp "$(__git_refs)" "" "${cur##--set-upstream-to=}"
;;
--*)
__gitcomp "
--color --no-color --verbose --abbrev= --no-abbrev
--track --no-track --contains --merged --no-merged
--set-upstream-to= --edit-description --list
--unset-upstream
"
;;
*)
if [ $only_local_ref = "y" -a $has_r = "n" ]; then
__gitcomp_nl "$(__git_heads)"
else
__gitcomp_nl "$(__git_refs)"
fi
;;
esac
}
_git_bundle ()
{
local cmd="${words[2]}"
case "$cword" in
2)
__gitcomp "create list-heads verify unbundle"
;;
3)
# looking for a file
;;
*)
case "$cmd" in
create)
__git_complete_revlist
;;
esac
;;
esac
}
_git_checkout ()
{
__git_has_doubledash && return
case "$cur" in
--conflict=*)
__gitcomp "diff3 merge" "" "${cur##--conflict=}"
;;
--*)
__gitcomp "
--quiet --ours --theirs --track --no-track --merge
--conflict= --orphan --patch
"
;;
*)
# check if --track, --no-track, or --no-guess was specified
# if so, disable DWIM mode
local flags="--track --no-track --no-guess" track=1
if [ -n "$(__git_find_on_cmdline "$flags")" ]; then
track=''
fi
__gitcomp_nl "$(__git_refs '' $track)"
;;
esac
}
_git_cherry ()
{
__gitcomp "$(__git_refs)"
}
_git_cherry_pick ()
{
local dir="$(__gitdir)"
if [ -f "$dir"/CHERRY_PICK_HEAD ]; then
__gitcomp "--continue --quit --abort"
return
fi
case "$cur" in
--*)
__gitcomp "--edit --no-commit --signoff --strategy= --mainline"
;;
*)
__gitcomp_nl "$(__git_refs)"
;;
esac
}
_git_clean ()
{
case "$cur" in
--*)
__gitcomp "--dry-run --quiet"
return
;;
esac
# XXX should we check for -x option ?
__git_complete_index_file "--others"
}
_git_clone ()
{
case "$cur" in
--*)
__gitcomp "
--local
--no-hardlinks
--shared
--reference
--quiet
--no-checkout
--bare
--mirror
--origin
--upload-pack
--template=
--depth
--single-branch
--branch
"
return
;;
esac
}
_git_commit ()
{
case "$prev" in
-c|-C)
__gitcomp_nl "$(__git_refs)" "" "${cur}"
return
;;
esac
case "$cur" in
--cleanup=*)
__gitcomp "default strip verbatim whitespace
" "" "${cur##--cleanup=}"
return
;;
--reuse-message=*|--reedit-message=*|\
--fixup=*|--squash=*)
__gitcomp_nl "$(__git_refs)" "" "${cur#*=}"
return
;;
--untracked-files=*)
__gitcomp "all no normal" "" "${cur##--untracked-files=}"
return
;;
--*)
__gitcomp "
--all --author= --signoff --verify --no-verify
--edit --no-edit
--amend --include --only --interactive
--dry-run --reuse-message= --reedit-message=
--reset-author --file= --message= --template=
--cleanup= --untracked-files --untracked-files=
--verbose --quiet --fixup= --squash=
"
return
esac
if git rev-parse --verify --quiet HEAD >/dev/null; then
__git_complete_index_file "--committable"
else
# This is the first commit
__git_complete_index_file "--cached"
fi
}
_git_describe ()
{
case "$cur" in
--*)
__gitcomp "
--all --tags --contains --abbrev= --candidates=
--exact-match --debug --long --match --always
"
return
esac
__gitcomp_nl "$(__git_refs)"
}
__git_diff_algorithms="myers minimal patience histogram"
__git_diff_common_options="--stat --numstat --shortstat --summary
--patch-with-stat --name-only --name-status --color
--no-color --color-words --no-renames --check
--full-index --binary --abbrev --diff-filter=
--find-copies-harder
--text --ignore-space-at-eol --ignore-space-change
--ignore-all-space --exit-code --quiet --ext-diff
--no-ext-diff
--no-prefix --src-prefix= --dst-prefix=
--inter-hunk-context=
--patience --histogram --minimal
--raw --word-diff
--dirstat --dirstat= --dirstat-by-file
--dirstat-by-file= --cumulative
--diff-algorithm=
"
_git_diff ()
{
__git_has_doubledash && return
case "$cur" in
--diff-algorithm=*)
__gitcomp "$__git_diff_algorithms" "" "${cur##--diff-algorithm=}"
return
;;
--*)
__gitcomp "--cached --staged --pickaxe-all --pickaxe-regex
--base --ours --theirs --no-index
$__git_diff_common_options
"
return
;;
esac
__git_complete_revlist_file
}
__git_mergetools_common="diffuse ecmerge emerge kdiff3 meld opendiff
tkdiff vimdiff gvimdiff xxdiff araxis p4merge bc3 codecompare
"
_git_difftool ()
{
__git_has_doubledash && return
case "$cur" in
--tool=*)
__gitcomp "$__git_mergetools_common kompare" "" "${cur##--tool=}"
return
;;
--*)
__gitcomp "--cached --staged --pickaxe-all --pickaxe-regex
--base --ours --theirs
--no-renames --diff-filter= --find-copies-harder
--relative --ignore-submodules
--tool="
return
;;
esac
__git_complete_revlist_file
}
__git_fetch_options="
--quiet --verbose --append --upload-pack --force --keep --depth=
--tags --no-tags --all --prune --dry-run
"
_git_fetch ()
{
case "$cur" in
--*)
__gitcomp "$__git_fetch_options"
return
;;
esac
__git_complete_remote_or_refspec
}
__git_format_patch_options="
--stdout --attach --no-attach --thread --thread= --no-thread
--numbered --start-number --numbered-files --keep-subject --signoff
--signature --no-signature --in-reply-to= --cc= --full-index --binary
--not --all --cover-letter --no-prefix --src-prefix= --dst-prefix=
--inline --suffix= --ignore-if-in-upstream --subject-prefix=
--output-directory --reroll-count --to= --quiet --notes
"
_git_format_patch ()
{
case "$cur" in
--thread=*)
__gitcomp "
deep shallow
" "" "${cur##--thread=}"
return
;;
--*)
__gitcomp "$__git_format_patch_options"
return
;;
esac
__git_complete_revlist
}
_git_fsck ()
{
case "$cur" in
--*)
__gitcomp "
--tags --root --unreachable --cache --no-reflogs --full
--strict --verbose --lost-found
"
return
;;
esac
}
_git_gc ()
{
case "$cur" in
--*)
__gitcomp "--prune --aggressive"
return
;;
esac
}
_git_gitk ()
{
_gitk
}
__git_match_ctag() {
awk "/^${1////\\/}/ { print \$1 }" "$2"
}
_git_grep ()
{
__git_has_doubledash && return
case "$cur" in
--*)
__gitcomp "
--cached
--text --ignore-case --word-regexp --invert-match
--full-name --line-number
--extended-regexp --basic-regexp --fixed-strings
--perl-regexp
--files-with-matches --name-only
--files-without-match
--max-depth
--count
--and --or --not --all-match
"
return
;;
esac
case "$cword,$prev" in
2,*|*,-*)
if test -r tags; then
__gitcomp_nl "$(__git_match_ctag "$cur" tags)"
return
fi
;;
esac
__gitcomp_nl "$(__git_refs)"
}
_git_help ()
{
case "$cur" in
--*)
__gitcomp "--all --info --man --web"
return
;;
esac
__git_compute_all_commands
__gitcomp "$__git_all_commands $(__git_aliases)
attributes cli core-tutorial cvs-migration
diffcore gitk glossary hooks ignore modules
namespaces repository-layout tutorial tutorial-2
workflows
"
}
_git_init ()
{
case "$cur" in
--shared=*)
__gitcomp "
false true umask group all world everybody
" "" "${cur##--shared=}"
return
;;
--*)
__gitcomp "--quiet --bare --template= --shared --shared="
return
;;
esac
}
_git_ls_files ()
{
case "$cur" in
--*)
__gitcomp "--cached --deleted --modified --others --ignored
--stage --directory --no-empty-directory --unmerged
--killed --exclude= --exclude-from=
--exclude-per-directory= --exclude-standard
--error-unmatch --with-tree= --full-name
--abbrev --ignored --exclude-per-directory
"
return
;;
esac
# XXX ignore options like --modified and always suggest all cached
# files.
__git_complete_index_file "--cached"
}
_git_ls_remote ()
{
__gitcomp_nl "$(__git_remotes)"
}
_git_ls_tree ()
{
__git_complete_file
}
# Options that go well for log, shortlog and gitk
__git_log_common_options="
--not --all
--branches --tags --remotes
--first-parent --merges --no-merges
--max-count=
--max-age= --since= --after=
--min-age= --until= --before=
--min-parents= --max-parents=
--no-min-parents --no-max-parents
"
# Options that go well for log and gitk (not shortlog)
__git_log_gitk_options="
--dense --sparse --full-history
--simplify-merges --simplify-by-decoration
--left-right --notes --no-notes
"
# Options that go well for log and shortlog (not gitk)
__git_log_shortlog_options="
--author= --committer= --grep=
--all-match
"
__git_log_pretty_formats="oneline short medium full fuller email raw format:"
__git_log_date_formats="relative iso8601 rfc2822 short local default raw"
_git_log ()
{
__git_has_doubledash && return
local g="$(git rev-parse --git-dir 2>/dev/null)"
local merge=""
if [ -f "$g/MERGE_HEAD" ]; then
merge="--merge"
fi
case "$cur" in
--pretty=*|--format=*)
__gitcomp "$__git_log_pretty_formats $(__git_pretty_aliases)
" "" "${cur#*=}"
return
;;
--date=*)
__gitcomp "$__git_log_date_formats" "" "${cur##--date=}"
return
;;
--decorate=*)
__gitcomp "long short" "" "${cur##--decorate=}"
return
;;
--*)
__gitcomp "
$__git_log_common_options
$__git_log_shortlog_options
$__git_log_gitk_options
--root --topo-order --date-order --reverse
--follow --full-diff
--abbrev-commit --abbrev=
--relative-date --date=
--pretty= --format= --oneline
--cherry-pick
--graph
--decorate --decorate=
--walk-reflogs
--parents --children
$merge
$__git_diff_common_options
--pickaxe-all --pickaxe-regex
"
return
;;
esac
__git_complete_revlist
}
__git_merge_options="
--no-commit --no-stat --log --no-log --squash --strategy
--commit --stat --no-squash --ff --no-ff --ff-only --edit --no-edit
"
_git_merge ()
{
__git_complete_strategy && return
case "$cur" in
--*)
__gitcomp "$__git_merge_options"
return
esac
__gitcomp_nl "$(__git_refs)"
}
_git_mergetool ()
{
case "$cur" in
--tool=*)
__gitcomp "$__git_mergetools_common tortoisemerge" "" "${cur##--tool=}"
return
;;
--*)
__gitcomp "--tool="
return
;;
esac
}
_git_merge_base ()
{
__gitcomp_nl "$(__git_refs)"
}
_git_mv ()
{
case "$cur" in
--*)
__gitcomp "--dry-run"
return
;;
esac
if [ $(__git_count_arguments "mv") -gt 0 ]; then
# We need to show both cached and untracked files (including
# empty directories) since this may not be the last argument.
__git_complete_index_file "--cached --others --directory"
else
__git_complete_index_file "--cached"
fi
}
_git_name_rev ()
{
__gitcomp "--tags --all --stdin"
}
_git_notes ()
{
local subcommands='add append copy edit list prune remove show'
local subcommand="$(__git_find_on_cmdline "$subcommands")"
case "$subcommand,$cur" in
,--*)
__gitcomp '--ref'
;;
,*)
case "$prev" in
--ref)
__gitcomp_nl "$(__git_refs)"
;;
*)
__gitcomp "$subcommands --ref"
;;
esac
;;
add,--reuse-message=*|append,--reuse-message=*|\
add,--reedit-message=*|append,--reedit-message=*)
__gitcomp_nl "$(__git_refs)" "" "${cur#*=}"
;;
add,--*|append,--*)
__gitcomp '--file= --message= --reedit-message=
--reuse-message='
;;
copy,--*)
__gitcomp '--stdin'
;;
prune,--*)
__gitcomp '--dry-run --verbose'
;;
prune,*)
;;
*)
case "$prev" in
-m|-F)
;;
*)
__gitcomp_nl "$(__git_refs)"
;;
esac
;;
esac
}
_git_pull ()
{
__git_complete_strategy && return
case "$cur" in
--*)
__gitcomp "
--rebase --no-rebase
$__git_merge_options
$__git_fetch_options
"
return
;;
esac
__git_complete_remote_or_refspec
}
_git_push ()
{
case "$prev" in
--repo)
__gitcomp_nl "$(__git_remotes)"
return
esac
case "$cur" in
--repo=*)
__gitcomp_nl "$(__git_remotes)" "" "${cur##--repo=}"
return
;;
--*)
__gitcomp "
--all --mirror --tags --dry-run --force --verbose
--receive-pack= --repo= --set-upstream
"
return
;;
esac
__git_complete_remote_or_refspec
}
_git_rebase ()
{
local dir="$(__gitdir)"
if [ -d "$dir"/rebase-apply ] || [ -d "$dir"/rebase-merge ]; then
__gitcomp "--continue --skip --abort"
return
fi
__git_complete_strategy && return
case "$cur" in
--whitespace=*)
__gitcomp "$__git_whitespacelist" "" "${cur##--whitespace=}"
return
;;
--*)
__gitcomp "
--onto --merge --strategy --interactive
--preserve-merges --stat --no-stat
--committer-date-is-author-date --ignore-date
--ignore-whitespace --whitespace=
--autosquash
"
return
esac
__gitcomp_nl "$(__git_refs)"
}
_git_reflog ()
{
local subcommands="show delete expire"
local subcommand="$(__git_find_on_cmdline "$subcommands")"
if [ -z "$subcommand" ]; then
__gitcomp "$subcommands"
else
__gitcomp_nl "$(__git_refs)"
fi
}
__git_send_email_confirm_options="always never auto cc compose"
__git_send_email_suppresscc_options="author self cc bodycc sob cccmd body all"
_git_send_email ()
{
case "$cur" in
--confirm=*)
__gitcomp "
$__git_send_email_confirm_options
" "" "${cur##--confirm=}"
return
;;
--suppress-cc=*)
__gitcomp "
$__git_send_email_suppresscc_options
" "" "${cur##--suppress-cc=}"
return
;;
--smtp-encryption=*)
__gitcomp "ssl tls" "" "${cur##--smtp-encryption=}"
return
;;
--thread=*)
__gitcomp "
deep shallow
" "" "${cur##--thread=}"
return
;;
--*)
__gitcomp "--annotate --bcc --cc --cc-cmd --chain-reply-to
--compose --confirm= --dry-run --envelope-sender
--from --identity
--in-reply-to --no-chain-reply-to --no-signed-off-by-cc
--no-suppress-from --no-thread --quiet
--signed-off-by-cc --smtp-pass --smtp-server
--smtp-server-port --smtp-encryption= --smtp-user
--subject --suppress-cc= --suppress-from --thread --to
--validate --no-validate
$__git_format_patch_options"
return
;;
esac
__git_complete_revlist
}
_git_stage ()
{
_git_add
}
__git_config_get_set_variables ()
{
local prevword word config_file= c=$cword
while [ $c -gt 1 ]; do
word="${words[c]}"
case "$word" in
--system|--global|--local|--file=*)
config_file="$word"
break
;;
-f|--file)
config_file="$word $prevword"
break
;;
esac
prevword=$word
c=$((--c))
done
git --git-dir="$(__gitdir)" config $config_file --list 2>/dev/null |
while read -r line
do
case "$line" in
*.*=*)
echo "${line/=*/}"
;;
esac
done
}
_git_config ()
{
case "$prev" in
branch.*.remote|branch.*.pushremote)
__gitcomp_nl "$(__git_remotes)"
return
;;
branch.*.merge)
__gitcomp_nl "$(__git_refs)"
return
;;
branch.*.rebase)
__gitcomp "false true"
return
;;
remote.pushdefault)
__gitcomp_nl "$(__git_remotes)"
return
;;
remote.*.fetch)
local remote="${prev#remote.}"
remote="${remote%.fetch}"
if [ -z "$cur" ]; then
__gitcomp_nl "refs/heads/" "" "" ""
return
fi
__gitcomp_nl "$(__git_refs_remotes "$remote")"
return
;;
remote.*.push)
local remote="${prev#remote.}"
remote="${remote%.push}"
__gitcomp_nl "$(git --git-dir="$(__gitdir)" \
for-each-ref --format='%(refname):%(refname)' \
refs/heads)"
return
;;
pull.twohead|pull.octopus)
__git_compute_merge_strategies
__gitcomp "$__git_merge_strategies"
return
;;
color.branch|color.diff|color.interactive|\
color.showbranch|color.status|color.ui)
__gitcomp "always never auto"
return
;;
color.pager)
__gitcomp "false true"
return
;;
color.*.*)
__gitcomp "
normal black red green yellow blue magenta cyan white
bold dim ul blink reverse
"
return
;;
diff.submodule)
__gitcomp "log short"
return
;;
help.format)
__gitcomp "man info web html"
return
;;
log.date)
__gitcomp "$__git_log_date_formats"
return
;;
sendemail.aliasesfiletype)
__gitcomp "mutt mailrc pine elm gnus"
return
;;
sendemail.confirm)
__gitcomp "$__git_send_email_confirm_options"
return
;;
sendemail.suppresscc)
__gitcomp "$__git_send_email_suppresscc_options"
return
;;
--get|--get-all|--unset|--unset-all)
__gitcomp_nl "$(__git_config_get_set_variables)"
return
;;
*.*)
return
;;
esac
case "$cur" in
--*)
__gitcomp "
--system --global --local --file=
--list --replace-all
--get --get-all --get-regexp
--add --unset --unset-all
--remove-section --rename-section
"
return
;;
branch.*.*)
local pfx="${cur%.*}." cur_="${cur##*.}"
__gitcomp "remote pushremote merge mergeoptions rebase" "$pfx" "$cur_"
return
;;
branch.*)
local pfx="${cur%.*}." cur_="${cur#*.}"
__gitcomp_nl "$(__git_heads)" "$pfx" "$cur_" "."
return
;;
guitool.*.*)
local pfx="${cur%.*}." cur_="${cur##*.}"
__gitcomp "
argprompt cmd confirm needsfile noconsole norescan
prompt revprompt revunmerged title
" "$pfx" "$cur_"
return
;;
difftool.*.*)
local pfx="${cur%.*}." cur_="${cur##*.}"
__gitcomp "cmd path" "$pfx" "$cur_"
return
;;
man.*.*)
local pfx="${cur%.*}." cur_="${cur##*.}"
__gitcomp "cmd path" "$pfx" "$cur_"
return
;;
mergetool.*.*)
local pfx="${cur%.*}." cur_="${cur##*.}"
__gitcomp "cmd path trustExitCode" "$pfx" "$cur_"
return
;;
pager.*)
local pfx="${cur%.*}." cur_="${cur#*.}"
__git_compute_all_commands
__gitcomp_nl "$__git_all_commands" "$pfx" "$cur_"
return
;;
remote.*.*)
local pfx="${cur%.*}." cur_="${cur##*.}"
__gitcomp "
url proxy fetch push mirror skipDefaultUpdate
receivepack uploadpack tagopt pushurl
" "$pfx" "$cur_"
return
;;
remote.*)
local pfx="${cur%.*}." cur_="${cur#*.}"
__gitcomp_nl "$(__git_remotes)" "$pfx" "$cur_" "."
return
;;
url.*.*)
local pfx="${cur%.*}." cur_="${cur##*.}"
__gitcomp "insteadOf pushInsteadOf" "$pfx" "$cur_"
return
;;
esac
__gitcomp "
add.ignoreErrors
advice.commitBeforeMerge
advice.detachedHead
advice.implicitIdentity
advice.pushNonFastForward
advice.resolveConflict
advice.statusHints
alias.
am.keepcr
apply.ignorewhitespace
apply.whitespace
branch.autosetupmerge
branch.autosetuprebase
browser.
clean.requireForce
color.branch
color.branch.current
color.branch.local
color.branch.plain
color.branch.remote
color.decorate.HEAD
color.decorate.branch
color.decorate.remoteBranch
color.decorate.stash
color.decorate.tag
color.diff
color.diff.commit
color.diff.frag
color.diff.func
color.diff.meta
color.diff.new
color.diff.old
color.diff.plain
color.diff.whitespace
color.grep
color.grep.context
color.grep.filename
color.grep.function
color.grep.linenumber
color.grep.match
color.grep.selected
color.grep.separator
color.interactive
color.interactive.error
color.interactive.header
color.interactive.help
color.interactive.prompt
color.pager
color.showbranch
color.status
color.status.added
color.status.changed
color.status.header
color.status.nobranch
color.status.untracked
color.status.updated
color.ui
commit.status
commit.template
core.abbrev
core.askpass
core.attributesfile
core.autocrlf
core.bare
core.bigFileThreshold
core.compression
core.createObject
core.deltaBaseCacheLimit
core.editor
core.eol
core.excludesfile
core.fileMode
core.fsyncobjectfiles
core.gitProxy
core.ignoreStat
core.ignorecase
core.logAllRefUpdates
core.loosecompression
core.notesRef
core.packedGitLimit
core.packedGitWindowSize
core.pager
core.preferSymlinkRefs
core.preloadindex
core.quotepath
core.repositoryFormatVersion
core.safecrlf
core.sharedRepository
core.sparseCheckout
core.symlinks
core.trustctime
core.warnAmbiguousRefs
core.whitespace
core.worktree
diff.autorefreshindex
diff.external
diff.ignoreSubmodules
diff.mnemonicprefix
diff.noprefix
diff.renameLimit
diff.renames
diff.statGraphWidth
diff.submodule
diff.suppressBlankEmpty
diff.tool
diff.wordRegex
diff.algorithm
difftool.
difftool.prompt
fetch.recurseSubmodules
fetch.unpackLimit
format.attach
format.cc
format.headers
format.numbered
format.pretty
format.signature
format.signoff
format.subjectprefix
format.suffix
format.thread
format.to
gc.
gc.aggressiveWindow
gc.auto
gc.autopacklimit
gc.packrefs
gc.pruneexpire
gc.reflogexpire
gc.reflogexpireunreachable
gc.rerereresolved
gc.rerereunresolved
gitcvs.allbinary
gitcvs.commitmsgannotation
gitcvs.dbTableNamePrefix
gitcvs.dbdriver
gitcvs.dbname
gitcvs.dbpass
gitcvs.dbuser
gitcvs.enabled
gitcvs.logfile
gitcvs.usecrlfattr
guitool.
gui.blamehistoryctx
gui.commitmsgwidth
gui.copyblamethreshold
gui.diffcontext
gui.encoding
gui.fastcopyblame
gui.matchtrackingbranch
gui.newbranchtemplate
gui.pruneduringfetch
gui.spellingdictionary
gui.trustmtime
help.autocorrect
help.browser
help.format
http.lowSpeedLimit
http.lowSpeedTime
http.maxRequests
http.minSessions
http.noEPSV
http.postBuffer
http.proxy
http.sslCAInfo
http.sslCAPath
http.sslCert
http.sslCertPasswordProtected
http.sslKey
http.sslVerify
http.useragent
i18n.commitEncoding
i18n.logOutputEncoding
imap.authMethod
imap.folder
imap.host
imap.pass
imap.port
imap.preformattedHTML
imap.sslverify
imap.tunnel
imap.user
init.templatedir
instaweb.browser
instaweb.httpd
instaweb.local
instaweb.modulepath
instaweb.port
interactive.singlekey
log.date
log.decorate
log.showroot
mailmap.file
man.
man.viewer
merge.
merge.conflictstyle
merge.log
merge.renameLimit
merge.renormalize
merge.stat
merge.tool
merge.verbosity
mergetool.
mergetool.keepBackup
mergetool.keepTemporaries
mergetool.prompt
notes.displayRef
notes.rewrite.
notes.rewrite.amend
notes.rewrite.rebase
notes.rewriteMode
notes.rewriteRef
pack.compression
pack.deltaCacheLimit
pack.deltaCacheSize
pack.depth
pack.indexVersion
pack.packSizeLimit
pack.threads
pack.window
pack.windowMemory
pager.
pretty.
pull.octopus
pull.twohead
push.default
rebase.autosquash
rebase.stat
receive.autogc
receive.denyCurrentBranch
receive.denyDeleteCurrent
receive.denyDeletes
receive.denyNonFastForwards
receive.fsckObjects
receive.unpackLimit
receive.updateserverinfo
remote.pushdefault
remotes.
repack.usedeltabaseoffset
rerere.autoupdate
rerere.enabled
sendemail.
sendemail.aliasesfile
sendemail.aliasfiletype
sendemail.bcc
sendemail.cc
sendemail.cccmd
sendemail.chainreplyto
sendemail.confirm
sendemail.envelopesender
sendemail.from
sendemail.identity
sendemail.multiedit
sendemail.signedoffbycc
sendemail.smtpdomain
sendemail.smtpencryption
sendemail.smtppass
sendemail.smtpserver
sendemail.smtpserveroption
sendemail.smtpserverport
sendemail.smtpuser
sendemail.suppresscc
sendemail.suppressfrom
sendemail.thread
sendemail.to
sendemail.validate
showbranch.default
status.relativePaths
status.showUntrackedFiles
status.submodulesummary
submodule.
tar.umask
transfer.unpackLimit
url.
user.email
user.name
user.signingkey
web.browser
branch. remote.
"
}
_git_remote ()
{
local subcommands="add rename remove set-head set-branches set-url show prune update"
local subcommand="$(__git_find_on_cmdline "$subcommands")"
if [ -z "$subcommand" ]; then
__gitcomp "$subcommands"
return
fi
case "$subcommand" in
rename|remove|set-url|show|prune)
__gitcomp_nl "$(__git_remotes)"
;;
set-head|set-branches)
__git_complete_remote_or_refspec
;;
update)
local i c='' IFS=$'\n'
for i in $(git --git-dir="$(__gitdir)" config --get-regexp "remotes\..*" 2>/dev/null); do
i="${i#remotes.}"
c="$c ${i/ */}"
done
__gitcomp "$c"
;;
*)
;;
esac
}
_git_replace ()
{
__gitcomp_nl "$(__git_refs)"
}
_git_reset ()
{
__git_has_doubledash && return
case "$cur" in
--*)
__gitcomp "--merge --mixed --hard --soft --patch"
return
;;
esac
__gitcomp_nl "$(__git_refs)"
}
_git_revert ()
{
case "$cur" in
--*)
__gitcomp "--edit --mainline --no-edit --no-commit --signoff"
return
;;
esac
__gitcomp_nl "$(__git_refs)"
}
_git_rm ()
{
case "$cur" in
--*)
__gitcomp "--cached --dry-run --ignore-unmatch --quiet"
return
;;
esac
__git_complete_index_file "--cached"
}
_git_shortlog ()
{
__git_has_doubledash && return
case "$cur" in
--*)
__gitcomp "
$__git_log_common_options
$__git_log_shortlog_options
--numbered --summary
"
return
;;
esac
__git_complete_revlist
}
_git_show ()
{
__git_has_doubledash && return
case "$cur" in
--pretty=*|--format=*)
__gitcomp "$__git_log_pretty_formats $(__git_pretty_aliases)
" "" "${cur#*=}"
return
;;
--diff-algorithm=*)
__gitcomp "$__git_diff_algorithms" "" "${cur##--diff-algorithm=}"
return
;;
--*)
__gitcomp "--pretty= --format= --abbrev-commit --oneline
$__git_diff_common_options
"
return
;;
esac
__git_complete_revlist_file
}
_git_show_branch ()
{
case "$cur" in
--*)
__gitcomp "
--all --remotes --topo-order --current --more=
--list --independent --merge-base --no-name
--color --no-color
--sha1-name --sparse --topics --reflog
"
return
;;
esac
__git_complete_revlist
}
_git_stash ()
{
local save_opts='--keep-index --no-keep-index --quiet --patch'
local subcommands='save list show apply clear drop pop create branch'
local subcommand="$(__git_find_on_cmdline "$subcommands")"
if [ -z "$subcommand" ]; then
case "$cur" in
--*)
__gitcomp "$save_opts"
;;
*)
if [ -z "$(__git_find_on_cmdline "$save_opts")" ]; then
__gitcomp "$subcommands"
fi
;;
esac
else
case "$subcommand,$cur" in
save,--*)
__gitcomp "$save_opts"
;;
apply,--*|pop,--*)
__gitcomp "--index --quiet"
;;
show,--*|drop,--*|branch,--*)
;;
show,*|apply,*|drop,*|pop,*|branch,*)
__gitcomp_nl "$(git --git-dir="$(__gitdir)" stash list \
| sed -n -e 's/:.*//p')"
;;
*)
;;
esac
fi
}
_git_submodule ()
{
__git_has_doubledash && return
local subcommands="add status init deinit update summary foreach sync"
if [ -z "$(__git_find_on_cmdline "$subcommands")" ]; then
case "$cur" in
--*)
__gitcomp "--quiet --cached"
;;
*)
__gitcomp "$subcommands"
;;
esac
return
fi
}
_git_svn ()
{
local subcommands="
init fetch clone rebase dcommit log find-rev
set-tree commit-diff info create-ignore propget
proplist show-ignore show-externals branch tag blame
migrate mkdirs reset gc
"
local subcommand="$(__git_find_on_cmdline "$subcommands")"
if [ -z "$subcommand" ]; then
__gitcomp "$subcommands"
else
local remote_opts="--username= --config-dir= --no-auth-cache"
local fc_opts="
--follow-parent --authors-file= --repack=
--no-metadata --use-svm-props --use-svnsync-props
--log-window-size= --no-checkout --quiet
--repack-flags --use-log-author --localtime
--ignore-paths= --include-paths= $remote_opts
"
local init_opts="
--template= --shared= --trunk= --tags=
--branches= --stdlayout --minimize-url
--no-metadata --use-svm-props --use-svnsync-props
--rewrite-root= --prefix= --use-log-author
--add-author-from $remote_opts
"
local cmt_opts="
--edit --rmdir --find-copies-harder --copy-similarity=
"
case "$subcommand,$cur" in
fetch,--*)
__gitcomp "--revision= --fetch-all $fc_opts"
;;
clone,--*)
__gitcomp "--revision= $fc_opts $init_opts"
;;
init,--*)
__gitcomp "$init_opts"
;;
dcommit,--*)
__gitcomp "
--merge --strategy= --verbose --dry-run
--fetch-all --no-rebase --commit-url
--revision --interactive $cmt_opts $fc_opts
"
;;
set-tree,--*)
__gitcomp "--stdin $cmt_opts $fc_opts"
;;
create-ignore,--*|propget,--*|proplist,--*|show-ignore,--*|\
show-externals,--*|mkdirs,--*)
__gitcomp "--revision="
;;
log,--*)
__gitcomp "
--limit= --revision= --verbose --incremental
--oneline --show-commit --non-recursive
--authors-file= --color
"
;;
rebase,--*)
__gitcomp "
--merge --verbose --strategy= --local
--fetch-all --dry-run $fc_opts
"
;;
commit-diff,--*)
__gitcomp "--message= --file= --revision= $cmt_opts"
;;
info,--*)
__gitcomp "--url"
;;
branch,--*)
__gitcomp "--dry-run --message --tag"
;;
tag,--*)
__gitcomp "--dry-run --message"
;;
blame,--*)
__gitcomp "--git-format"
;;
migrate,--*)
__gitcomp "
--config-dir= --ignore-paths= --minimize
--no-auth-cache --username=
"
;;
reset,--*)
__gitcomp "--revision= --parent"
;;
*)
;;
esac
fi
}
_git_tag ()
{
local i c=1 f=0
while [ $c -lt $cword ]; do
i="${words[c]}"
case "$i" in
-d|-v)
__gitcomp_nl "$(__git_tags)"
return
;;
-f)
f=1
;;
esac
((c++))
done
case "$prev" in
-m|-F)
;;
-*|tag)
if [ $f = 1 ]; then
__gitcomp_nl "$(__git_tags)"
fi
;;
*)
__gitcomp_nl "$(__git_refs)"
;;
esac
}
_git_whatchanged ()
{
_git_log
}
__git_main ()
{
local i c=1 command __git_dir
while [ $c -lt $cword ]; do
i="${words[c]}"
case "$i" in
--git-dir=*) __git_dir="${i#--git-dir=}" ;;
--git-dir) ((c++)) ; __git_dir="${words[c]}" ;;
--bare) __git_dir="." ;;
--help) command="help"; break ;;
-c|--work-tree|--namespace) ((c++)) ;;
-*) ;;
*) command="$i"; break ;;
esac
((c++))
done
if [ -z "$command" ]; then
case "$cur" in
--*) __gitcomp "
--paginate
--no-pager
--git-dir=
--bare
--version
--exec-path
--exec-path=
--html-path
--man-path
--info-path
--work-tree=
--namespace=
--no-replace-objects
--help
"
;;
*) __git_compute_porcelain_commands
__gitcomp "$__git_porcelain_commands $(__git_aliases)" ;;
esac
return
fi
local completion_func="_git_${command//-/_}"
declare -f $completion_func >/dev/null && $completion_func && return
local expansion=$(__git_aliased_command "$command")
if [ -n "$expansion" ]; then
completion_func="_git_${expansion//-/_}"
declare -f $completion_func >/dev/null && $completion_func
fi
}
__gitk_main ()
{
__git_has_doubledash && return
local g="$(__gitdir)"
local merge=""
if [ -f "$g/MERGE_HEAD" ]; then
merge="--merge"
fi
case "$cur" in
--*)
__gitcomp "
$__git_log_common_options
$__git_log_gitk_options
$merge
"
return
;;
esac
__git_complete_revlist
}
if [[ -n ${ZSH_VERSION-} ]]; then
echo "WARNING: this script is deprecated, please see git-completion.zsh" 1>&2
autoload -U +X compinit && compinit
__gitcomp ()
{
emulate -L zsh
local cur_="${3-$cur}"
case "$cur_" in
--*=)
;;
*)
local c IFS=$' \t\n'
local -a array
for c in ${=1}; do
c="$c${4-}"
case $c in
--*=*|*.) ;;
*) c="$c " ;;
esac
array[$#array+1]="$c"
done
compset -P '*[=:]'
compadd -Q -S '' -p "${2-}" -a -- array && _ret=0
;;
esac
}
__gitcomp_nl ()
{
emulate -L zsh
local IFS=$'\n'
compset -P '*[=:]'
compadd -Q -S "${4- }" -p "${2-}" -- ${=1} && _ret=0
}
__gitcomp_file ()
{
emulate -L zsh
local IFS=$'\n'
compset -P '*[=:]'
compadd -Q -p "${2-}" -f -- ${=1} && _ret=0
}
_git ()
{
local _ret=1 cur cword prev
cur=${words[CURRENT]}
prev=${words[CURRENT-1]}
let cword=CURRENT-1
emulate ksh -c __${service}_main
let _ret && _default && _ret=0
return _ret
}
compdef _git git gitk
return
fi
__git_func_wrap ()
{
local cur words cword prev
_get_comp_words_by_ref -n =: cur words cword prev
$1
}
# Setup completion for certain functions defined above by setting common
# variables and workarounds.
# This is NOT a public function; use at your own risk.
__git_complete ()
{
local wrapper="__git_wrap${2}"
eval "$wrapper () { __git_func_wrap $2 ; }"
complete -o bashdefault -o default -o nospace -F $wrapper $1 2>/dev/null \
|| complete -o default -o nospace -F $wrapper $1
}
# wrapper for backwards compatibility
_git ()
{
__git_wrap__git_main
}
# wrapper for backwards compatibility
_gitk ()
{
__git_wrap__gitk_main
}
__git_complete git __git_main
__git_complete gitk __gitk_main
# The following are necessary only for Cygwin, and only are needed
# when the user has tab-completed the executable name and consequently
# included the '.exe' suffix.
#
if [ Cygwin = "$(uname -o 2>/dev/null)" ]; then
__git_complete git.exe __git_main
fi
# bash/zsh git prompt support
#
# Copyright (C) 2006,2007 Shawn O. Pearce <spearce@spearce.org>
# Distributed under the GNU General Public License, version 2.0.
#
# This script allows you to see repository status in your prompt.
#
# To enable:
#
# 1) Copy this file to somewhere (e.g. ~/.git-prompt.sh).
# 2) Add the following line to your .bashrc/.zshrc:
# source ~/.git-prompt.sh
# 3a) Change your PS1 to call __git_ps1 as
# command-substitution:
# Bash: PS1='[\u@\h \W$(__git_ps1 " (%s)")]\$ '
# ZSH: setopt PROMPT_SUBST ; PS1='[%n@%m %c$(__git_ps1 " (%s)")]\$ '
# the optional argument will be used as format string.
# 3b) Alternatively, for a slightly faster prompt, __git_ps1 can
# be used for PROMPT_COMMAND in Bash or for precmd() in Zsh
# with two parameters, <pre> and <post>, which are strings
# you would put in $PS1 before and after the status string
# generated by the git-prompt machinery. e.g.
# Bash: PROMPT_COMMAND='__git_ps1 "\u@\h:\w" "\\\$ "'
# will show username, at-sign, host, colon, cwd, then
# various status string, followed by dollar and SP, as
# your prompt.
# ZSH: precmd () { __git_ps1 "%n" ":%~$ " "|%s" }
# will show username, pipe, then various status string,
# followed by colon, cwd, dollar and SP, as your prompt.
# Optionally, you can supply a third argument with a printf
# format string to finetune the output of the branch status
#
# The repository status will be displayed only if you are currently in a
# git repository. The %s token is the placeholder for the shown status.
#
# The prompt status always includes the current branch name.
#
# In addition, if you set GIT_PS1_SHOWDIRTYSTATE to a nonempty value,
# unstaged (*) and staged (+) changes will be shown next to the branch
# name. You can configure this per-repository with the
# bash.showDirtyState variable, which defaults to true once
# GIT_PS1_SHOWDIRTYSTATE is enabled.
#
# You can also see if currently something is stashed, by setting
# GIT_PS1_SHOWSTASHSTATE to a nonempty value. If something is stashed,
# then a '$' will be shown next to the branch name.
#
# If you would like to see if there're untracked files, then you can set
# GIT_PS1_SHOWUNTRACKEDFILES to a nonempty value. If there're untracked
# files, then a '%' will be shown next to the branch name. You can
# configure this per-repository with the bash.showUntrackedFiles
# variable, which defaults to true once GIT_PS1_SHOWUNTRACKEDFILES is
# enabled.
#
# If you would like to see the difference between HEAD and its upstream,
# set GIT_PS1_SHOWUPSTREAM="auto". A "<" indicates you are behind, ">"
# indicates you are ahead, "<>" indicates you have diverged and "="
# indicates that there is no difference. You can further control
# behaviour by setting GIT_PS1_SHOWUPSTREAM to a space-separated list
# of values:
#
# verbose show number of commits ahead/behind (+/-) upstream
# legacy don't use the '--count' option available in recent
# versions of git-rev-list
# git always compare HEAD to @{upstream}
# svn always compare HEAD to your SVN upstream
#
# By default, __git_ps1 will compare HEAD to your SVN upstream if it can
# find one, or @{upstream} otherwise. Once you have set
# GIT_PS1_SHOWUPSTREAM, you can override it on a per-repository basis by
# setting the bash.showUpstream config variable.
#
# If you would like to see more information about the identity of
# commits checked out as a detached HEAD, set GIT_PS1_DESCRIBE_STYLE
# to one of these values:
#
# contains relative to newer annotated tag (v1.6.3.2~35)
# branch relative to newer tag or branch (master~4)
# describe relative to older annotated tag (v1.6.3.1-13-gdd42c2f)
# default exactly matching tag
#
# If you would like a colored hint about the current dirty state, set
# GIT_PS1_SHOWCOLORHINTS to a nonempty value. The colors are based on
# the colored output of "git status -sb" and are available only when
# using __git_ps1 for PROMPT_COMMAND or precmd.
# stores the divergence from upstream in $p
# used by GIT_PS1_SHOWUPSTREAM
__git_ps1_show_upstream ()
{
local key value
local svn_remote svn_url_pattern count n
local upstream=git legacy="" verbose=""
svn_remote=()
# get some config options from git-config
local output="$(git config -z --get-regexp '^(svn-remote\..*\.url|bash\.showupstream)$' 2>/dev/null | tr '\0\n' '\n ')"
while read -r key value; do
case "$key" in
bash.showupstream)
GIT_PS1_SHOWUPSTREAM="$value"
if [[ -z "${GIT_PS1_SHOWUPSTREAM}" ]]; then
p=""
return
fi
;;
svn-remote.*.url)
svn_remote[$((${#svn_remote[@]} + 1))]="$value"
svn_url_pattern+="\\|$value"
upstream=svn+git # default upstream is SVN if available, else git
;;
esac
done <<< "$output"
# parse configuration values
for option in ${GIT_PS1_SHOWUPSTREAM}; do
case "$option" in
git|svn) upstream="$option" ;;
verbose) verbose=1 ;;
legacy) legacy=1 ;;
esac
done
# Find our upstream
case "$upstream" in
git) upstream="@{upstream}" ;;
svn*)
# get the upstream from the "git-svn-id: ..." in a commit message
# (git-svn uses essentially the same procedure internally)
local -a svn_upstream
svn_upstream=($(git log --first-parent -1 \
--grep="^git-svn-id: \(${svn_url_pattern#??}\)" 2>/dev/null))
if [[ 0 -ne ${#svn_upstream[@]} ]]; then
svn_upstream=${svn_upstream[${#svn_upstream[@]} - 2]}
svn_upstream=${svn_upstream%@*}
local n_stop="${#svn_remote[@]}"
for ((n=1; n <= n_stop; n++)); do
svn_upstream=${svn_upstream#${svn_remote[$n]}}
done
if [[ -z "$svn_upstream" ]]; then
# default branch name for checkouts with no layout:
upstream=${GIT_SVN_ID:-git-svn}
else
upstream=${svn_upstream#/}
fi
elif [[ "svn+git" = "$upstream" ]]; then
upstream="@{upstream}"
fi
;;
esac
# Find how many commits we are ahead/behind our upstream
if [[ -z "$legacy" ]]; then
count="$(git rev-list --count --left-right \
"$upstream"...HEAD 2>/dev/null)"
else
# produce equivalent output to --count for older versions of git
local commits
if commits="$(git rev-list --left-right "$upstream"...HEAD 2>/dev/null)"
then
local commit behind=0 ahead=0
for commit in $commits
do
case "$commit" in
"<"*) ((behind++)) ;;
*) ((ahead++)) ;;
esac
done
count="$behind $ahead"
else
count=""
fi
fi
# calculate the result
if [[ -z "$verbose" ]]; then
case "$count" in
"") # no upstream
p="" ;;
"0 0") # equal to upstream
p="=" ;;
"0 "*) # ahead of upstream
p=">" ;;
*" 0") # behind upstream
p="<" ;;
*) # diverged from upstream
p="<>" ;;
esac
else
case "$count" in
"") # no upstream
p="" ;;
"0 0") # equal to upstream
p=" u=" ;;
"0 "*) # ahead of upstream
p=" u+${count#0 }" ;;
*" 0") # behind upstream
p=" u-${count% 0}" ;;
*) # diverged from upstream
p=" u+${count#* }-${count% *}" ;;
esac
fi
}
# Helper function that is meant to be called from __git_ps1. It
# injects color codes into the appropriate gitstring variables used
# to build a gitstring.
__git_ps1_colorize_gitstring ()
{
if [[ -n ${ZSH_VERSION-} ]]; then
local c_red='%F{red}'
local c_green='%F{green}'
local c_lblue='%F{blue}'
local c_clear='%f'
else
# Using \[ and \] around colors is necessary to prevent
# issues with command line editing/browsing/completion!
local c_red='\[\e[31m\]'
local c_green='\[\e[32m\]'
local c_lblue='\[\e[1;34m\]'
local c_clear='\[\e[0m\]'
fi
local bad_color=$c_red
local ok_color=$c_green
local flags_color="$c_lblue"
local branch_color=""
if [ $detached = no ]; then
branch_color="$ok_color"
else
branch_color="$bad_color"
fi
c="$branch_color$c"
z="$c_clear$z"
if [ "$w" = "*" ]; then
w="$bad_color$w"
fi
if [ -n "$i" ]; then
i="$ok_color$i"
fi
if [ -n "$s" ]; then
s="$flags_color$s"
fi
if [ -n "$u" ]; then
u="$bad_color$u"
fi
r="$c_clear$r"
}
# __git_ps1 accepts 0 or 1 arguments (i.e., format string)
# when called from PS1 using command substitution
# in this mode it prints text to add to bash PS1 prompt (includes branch name)
#
# __git_ps1 requires 2 or 3 arguments when called from PROMPT_COMMAND (pc)
# in that case it _sets_ PS1. The arguments are parts of a PS1 string.
# when two arguments are given, the first is prepended and the second appended
# to the state string when assigned to PS1.
# The optional third parameter will be used as printf format string to further
# customize the output of the git-status string.
# In this mode you can request colored hints using GIT_PS1_SHOWCOLORHINTS=true
__git_ps1 ()
{
local pcmode=no
local detached=no
local ps1pc_start='\u@\h:\w '
local ps1pc_end='\$ '
local printf_format=' (%s)'
case "$#" in
2|3) pcmode=yes
ps1pc_start="$1"
ps1pc_end="$2"
printf_format="${3:-$printf_format}"
;;
0|1) printf_format="${1:-$printf_format}"
;;
*) return
;;
esac
local repo_info rev_parse_exit_code
repo_info="$(git rev-parse --git-dir --is-inside-git-dir \
--is-bare-repository --is-inside-work-tree \
--short HEAD 2>/dev/null)"
rev_parse_exit_code="$?"
if [ -z "$repo_info" ]; then
if [ $pcmode = yes ]; then
#In PC mode PS1 always needs to be set
PS1="$ps1pc_start$ps1pc_end"
fi
return
fi
local short_sha
if [ "$rev_parse_exit_code" = "0" ]; then
short_sha="${repo_info##*$'\n'}"
repo_info="${repo_info%$'\n'*}"
fi
local inside_worktree="${repo_info##*$'\n'}"
repo_info="${repo_info%$'\n'*}"
local bare_repo="${repo_info##*$'\n'}"
repo_info="${repo_info%$'\n'*}"
local inside_gitdir="${repo_info##*$'\n'}"
local g="${repo_info%$'\n'*}"
local r=""
local b=""
local step=""
local total=""
if [ -d "$g/rebase-merge" ]; then
read b 2>/dev/null <"$g/rebase-merge/head-name"
read step 2>/dev/null <"$g/rebase-merge/msgnum"
read total 2>/dev/null <"$g/rebase-merge/end"
if [ -f "$g/rebase-merge/interactive" ]; then
r="|REBASE-i"
else
r="|REBASE-m"
fi
else
if [ -d "$g/rebase-apply" ]; then
read step 2>/dev/null <"$g/rebase-apply/next"
read total 2>/dev/null <"$g/rebase-apply/last"
if [ -f "$g/rebase-apply/rebasing" ]; then
read b 2>/dev/null <"$g/rebase-apply/head-name"
r="|REBASE"
elif [ -f "$g/rebase-apply/applying" ]; then
r="|AM"
else
r="|AM/REBASE"
fi
elif [ -f "$g/MERGE_HEAD" ]; then
r="|MERGING"
elif [ -f "$g/CHERRY_PICK_HEAD" ]; then
r="|CHERRY-PICKING"
elif [ -f "$g/REVERT_HEAD" ]; then
r="|REVERTING"
elif [ -f "$g/BISECT_LOG" ]; then
r="|BISECTING"
fi
if [ -n "$b" ]; then
:
elif [ -h "$g/HEAD" ]; then
# symlink symbolic ref
b="$(git symbolic-ref HEAD 2>/dev/null)"
else
local head=""
if ! read head 2>/dev/null <"$g/HEAD"; then
if [ $pcmode = yes ]; then
PS1="$ps1pc_start$ps1pc_end"
fi
return
fi
# is it a symbolic ref?
b="${head#ref: }"
if [ "$head" = "$b" ]; then
detached=yes
b="$(
case "${GIT_PS1_DESCRIBE_STYLE-}" in
(contains)
git describe --contains HEAD ;;
(branch)
git describe --contains --all HEAD ;;
(describe)
git describe HEAD ;;
(* | default)
git describe --tags --exact-match HEAD ;;
esac 2>/dev/null)" ||
b="$short_sha..."
b="($b)"
fi
fi
fi
if [ -n "$step" ] && [ -n "$total" ]; then
r="$r $step/$total"
fi
local w=""
local i=""
local s=""
local u=""
local c=""
local p=""
if [ "true" = "$inside_gitdir" ]; then
if [ "true" = "$bare_repo" ]; then
c="BARE:"
else
b="GIT_DIR!"
fi
elif [ "true" = "$inside_worktree" ]; then
if [ -n "${GIT_PS1_SHOWDIRTYSTATE-}" ] &&
[ "$(git config --bool bash.showDirtyState)" != "false" ]
then
git diff --no-ext-diff --quiet --exit-code || w="*"
if [ -n "$short_sha" ]; then
git diff-index --cached --quiet HEAD -- || i="+"
else
i="#"
fi
fi
if [ -n "${GIT_PS1_SHOWSTASHSTATE-}" ] &&
[ -r "$g/refs/stash" ]; then
s="$"
fi
if [ -n "${GIT_PS1_SHOWUNTRACKEDFILES-}" ] &&
[ "$(git config --bool bash.showUntrackedFiles)" != "false" ] &&
git ls-files --others --exclude-standard --error-unmatch -- '*' >/dev/null 2>/dev/null
then
u="%${ZSH_VERSION+%}"
fi
if [ -n "${GIT_PS1_SHOWUPSTREAM-}" ]; then
__git_ps1_show_upstream
fi
fi
local z="${GIT_PS1_STATESEPARATOR-" "}"
# NO color option unless in PROMPT_COMMAND mode
if [ $pcmode = yes ] && [ -n "${GIT_PS1_SHOWCOLORHINTS-}" ]; then
__git_ps1_colorize_gitstring
fi
local f="$w$i$s$u"
local gitstring="$c${b##refs/heads/}${f:+$z$f}$r$p"
if [ $pcmode = yes ]; then
if [[ -n ${ZSH_VERSION-} ]]; then
gitstring=$(printf -- "$printf_format" "$gitstring")
else
printf -v gitstring -- "$printf_format" "$gitstring"
fi
PS1="$ps1pc_start$gitstring$ps1pc_end"
else
printf -- "$printf_format" "$gitstring"
fi
}
#!/bin/bash
brew update
brew tap homebrew/science
brew install openblas swig md5sha1sum
...@@ -2,18 +2,11 @@ ...@@ -2,18 +2,11 @@
source ./common.sh source ./common.sh
NPROC=1 NPROC=1
if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages
export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages export PYTHONHOME=/opt/python/2.7.12
export PYTHONHOME=/opt/python/2.7.12 export PATH=/opt/python/2.7.12/bin:${PATH}
export PATH=/opt/python/2.7.12/bin:${PATH} cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS} NRPOC=`nproc`
NRPOC=`nproc` make -j $NPROC
make -j $NPROC make coveralls
make coveralls sudo make install
sudo make install
elif [[ "$TRAVIS_OS_NAME" == "osx" ]]; then
export PYTHONPATH=/usr/local/lib/python2.7/site-packages
cmake .. -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
NPROC=`sysctl -n hw.ncpu`
make -j $NPROC
fi
...@@ -2,8 +2,12 @@ ...@@ -2,8 +2,12 @@
# Add set -e, cd to directory. # Add set -e, cd to directory.
source ./common.sh source ./common.sh
# Compile Documentation only. # Compile Documentation only.
cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF ${EXTRA_CMAKE_OPTS}
mkdir output
make DESTDIR=./output install -j `nproc`
pip install ./output/usr/local/opt/paddle/share/wheels/*
rm -rf *
cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS} cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS}
make paddle_docs paddle_docs_cn make paddle_docs paddle_docs_cn
...@@ -25,26 +29,41 @@ TARGET_BRANCH="gh-pages" ...@@ -25,26 +29,41 @@ TARGET_BRANCH="gh-pages"
# Only deploy master branch to build latest documentation. # Only deploy master branch to build latest documentation.
SOURCE_BRANCH="master" SOURCE_BRANCH="master"
# If is not a Github pull request, and in master branch.
if [ "$TRAVIS_PULL_REQUEST" != "false" -o "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ]; then
exit 0
fi
# Clone the repo to output directory # Clone the repo to output directory
git clone $REPO output git clone $REPO output
cd output cd output
# checkout github page branch function deploy_docs() {
git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH SOURCE_BRANCH=$1
DIR=$2
# If is not a Github pull request
if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then
exit 0
fi
# If it is not watched branch.
if [ "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ]; then
return
fi
# remove old docs. mv new docs. # checkout github page branch
rm -rf doc doc_cn git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH
mv ../doc/cn/html doc_cn
mv ../doc/en/html doc mkdir -p ${DIR}
# remove old docs. mv new docs.
set +e
rm -rf ${DIR}/doc ${DIR}/doc_cn
set -e
mv ../doc/cn/html ${DIR}/doc_cn
mv ../doc/en/html ${DIR}/doc
git add .
}
deploy_docs "master" "."
deploy_docs "develop" "./develop/"
# Check is there anything changed. # Check is there anything changed.
set +e set +e
git diff --exit-code >/dev/null git diff --cached --exit-code >/dev/null
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then
echo "No changes to the output on this push; exiting." echo "No changes to the output on this push; exiting."
exit 0 exit 0
...@@ -57,7 +76,6 @@ if [ -n $SSL_KEY ]; then # Only push updated docs for github.com/PaddlePaddle/P ...@@ -57,7 +76,6 @@ if [ -n $SSL_KEY ]; then # Only push updated docs for github.com/PaddlePaddle/P
git config user.name "Travis CI" git config user.name "Travis CI"
git config user.email "paddle-dev@baidu.com" git config user.email "paddle-dev@baidu.com"
git commit -m "Deploy to GitHub Pages: ${SHA}" git commit -m "Deploy to GitHub Pages: ${SHA}"
# Set ssh private key # Set ssh private key
openssl aes-256-cbc -K $SSL_KEY -iv $SSL_IV -in ../../paddle/scripts/travis/deploy_key.enc -out deploy_key -d openssl aes-256-cbc -K $SSL_KEY -iv $SSL_IV -in ../../paddle/scripts/travis/deploy_key.enc -out deploy_key -d
chmod 600 deploy_key chmod 600 deploy_key
......
...@@ -72,6 +72,7 @@ setup(name="py_paddle", ...@@ -72,6 +72,7 @@ setup(name="py_paddle",
packages=['py_paddle'], packages=['py_paddle'],
include_dirs = include_dirs, include_dirs = include_dirs,
install_requires = [ install_requires = [
'nltk>=3.2.2',
'numpy>=1.8.0', # The numpy is required. 'numpy>=1.8.0', # The numpy is required.
'protobuf>=3.0.0' # The paddle protobuf version 'protobuf>=3.0.0' # The paddle protobuf version
], ],
......
...@@ -208,7 +208,7 @@ real Tester::forwardOneBatch(const DataBatch& dataBatch, ...@@ -208,7 +208,7 @@ real Tester::forwardOneBatch(const DataBatch& dataBatch,
return 0.0; // In this case, there is no meaning to calculate cost return 0.0; // In this case, there is no meaning to calculate cost
} }
return Argument::sumCosts(outArgs); return Argument::sum(outArgs);
} }
void Tester::testOnePassBatch(int passId) { void Tester::testOnePassBatch(int passId) {
......
...@@ -310,7 +310,7 @@ real Trainer::checkGradient() { ...@@ -310,7 +310,7 @@ real Trainer::checkGradient() {
std::vector<Argument> outArgs; std::vector<Argument> outArgs;
trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC); trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
real cost = Argument::sumCosts(outArgs); real cost = Argument::sum(outArgs);
LOG(INFO) << "original cost=" << cost; LOG(INFO) << "original cost=" << cost;
trainerInternal_.getGradientMachine()->backward(); trainerInternal_.getGradientMachine()->backward();
...@@ -340,7 +340,7 @@ real Trainer::checkGradient() { ...@@ -340,7 +340,7 @@ real Trainer::checkGradient() {
parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara); parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara);
parameter->setValueUpdated(); parameter->setValueUpdated();
trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC); trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
real newCost1 = Argument::sumCosts(outArgs); real newCost1 = Argument::sum(outArgs);
for (size_t i = 0; i < dim; ++i) { for (size_t i = 0; i < dim; ++i) {
newp[i] = oldp[i] - step * d[i]; newp[i] = oldp[i] - step * d[i];
...@@ -349,7 +349,7 @@ real Trainer::checkGradient() { ...@@ -349,7 +349,7 @@ real Trainer::checkGradient() {
parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara); parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara);
parameter->setValueUpdated(); parameter->setValueUpdated();
trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC); trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
real newCost2 = Argument::sumCosts(outArgs); real newCost2 = Argument::sum(outArgs);
real trueDelta = 0.5 * (newCost1 - newCost2); real trueDelta = 0.5 * (newCost1 - newCost2);
real diff = (1e-20 + trueDelta) / (1e-20 + delta) - 1; real diff = (1e-20 + trueDelta) / (1e-20 + delta) - 1;
...@@ -575,7 +575,7 @@ real Trainer::calcGradient(const DataBatch& dataBatch, ...@@ -575,7 +575,7 @@ real Trainer::calcGradient(const DataBatch& dataBatch,
trainerInternal_.getGradientMachine()->forwardBackward( trainerInternal_.getGradientMachine()->forwardBackward(
inArgs, &outArgs, PASS_TRAIN); inArgs, &outArgs, PASS_TRAIN);
real cost = Argument::sumCosts(outArgs); real cost = Argument::sum(outArgs);
offset = 0; offset = 0;
for (auto& para : parameters) { for (auto& para : parameters) {
......
...@@ -134,7 +134,7 @@ void TrainerInternal::trainOneBatch(int64_t batchId, ...@@ -134,7 +134,7 @@ void TrainerInternal::trainOneBatch(int64_t batchId,
real cost = 0; real cost = 0;
{ {
REGISTER_TIMER("sumCost"); REGISTER_TIMER("sumCost");
cost = Argument::sumCosts(*outArgs); cost = Argument::sum(*outArgs);
} }
if (batchId % intconfig_->log_period == 0) { if (batchId % intconfig_->log_period == 0) {
......
...@@ -45,6 +45,23 @@ class CacheType(object): ...@@ -45,6 +45,23 @@ class CacheType(object):
class InputType(object): class InputType(object):
"""
InputType is the base class for paddle input types.
.. note::
this is a base class, and should never be used by user.
:param dim: dimension of input. If the input is an integer, it means the
value range. Otherwise, it means the size of layer.
:type dim: int
:param seq_type: sequence type of input. 0 means it is not a sequence. 1
means it is a variable length sequence. 2 means it is a
nested sequence.
:type seq_type: int
:param type: data type of input.
:type type: int
"""
__slots__ = ['dim', 'seq_type', 'type'] __slots__ = ['dim', 'seq_type', 'type']
def __init__(self, dim, seq_type, tp): def __init__(self, dim, seq_type, tp):
...@@ -54,19 +71,63 @@ class InputType(object): ...@@ -54,19 +71,63 @@ class InputType(object):
def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE): def dense_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
"""
Dense Vector. It means the input feature is dense float vector. For example,
if the input is an image with 28*28 pixels, the input of Paddle neural
network should be a dense vector with dimension 784.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return InputType(dim, seq_type, DataType.Dense) return InputType(dim, seq_type, DataType.Dense)
def sparse_non_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE): def sparse_non_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
"""
Sparse binary vector. It means the input feature is a sparse vector and the
every element in this vector is either zero or one.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of this input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return InputType(dim, seq_type, DataType.SparseNonValue) return InputType(dim, seq_type, DataType.SparseNonValue)
def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE): def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
"""
Sparse vector. It means the input feature is a sparse vector. Most of the
elements in this vector are zero, others could be any float value.
:param dim: dimension of this vector.
:type dim: int
:param seq_type: sequence type of this input.
:type seq_type: int
:return: An input type object.
:rtype: InputType
"""
return InputType(dim, seq_type, DataType.SparseValue) return InputType(dim, seq_type, DataType.SparseValue)
def index_slot(dim, seq_type=SequenceType.NO_SEQUENCE): def index_slot(value_range, seq_type=SequenceType.NO_SEQUENCE):
return InputType(dim, seq_type, DataType.Index) """
Data type of integer.
:param seq_type: sequence type of this input.
:type seq_type: int
:param value_range: range of this integer.
:type value_range: int
:return: An input type object
:rtype: InputType
"""
return InputType(value_range, seq_type, DataType.Index)
dense_vector = dense_slot dense_vector = dense_slot
...@@ -76,6 +137,14 @@ integer_value = index_slot ...@@ -76,6 +137,14 @@ integer_value = index_slot
def dense_vector_sequence(dim): def dense_vector_sequence(dim):
"""
Data type of a sequence of dense vector.
:param dim: dimension of dense vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return dense_vector(dim, seq_type=SequenceType.SEQUENCE) return dense_vector(dim, seq_type=SequenceType.SEQUENCE)
...@@ -84,6 +153,15 @@ def dense_vector_sub_sequence(dim): ...@@ -84,6 +153,15 @@ def dense_vector_sub_sequence(dim):
def sparse_binary_vector_sequence(dim): def sparse_binary_vector_sequence(dim):
"""
Data type of a sequence of sparse vector, which every element is either zero
or one.
:param dim: dimension of sparse vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return sparse_binary_vector(dim, seq_type=SequenceType.SEQUENCE) return sparse_binary_vector(dim, seq_type=SequenceType.SEQUENCE)
...@@ -92,6 +170,15 @@ def sparse_binary_vector_sub_sequence(dim): ...@@ -92,6 +170,15 @@ def sparse_binary_vector_sub_sequence(dim):
def sparse_vector_sequence(dim): def sparse_vector_sequence(dim):
"""
Data type of a sequence of sparse vector, which most elements are zero,
others could be any float value.
:param dim: dimension of sparse vector.
:type dim: int
:return: An input type object
:rtype: InputType
"""
return sparse_vector(dim, seq_type=SequenceType.SEQUENCE) return sparse_vector(dim, seq_type=SequenceType.SEQUENCE)
...@@ -99,8 +186,14 @@ def sparse_vector_sub_sequence(dim): ...@@ -99,8 +186,14 @@ def sparse_vector_sub_sequence(dim):
return sparse_vector(dim, seq_type=SequenceType.SUB_SEQUENCE) return sparse_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
def integer_value_sequence(dim): def integer_value_sequence(value_range):
return integer_value(dim, seq_type=SequenceType.SEQUENCE) """
Data type of a sequence of integer.
:param value_range: range of each element.
:type value_range: int
"""
return integer_value(value_range, seq_type=SequenceType.SEQUENCE)
def integer_value_sub_sequence(dim): def integer_value_sub_sequence(dim):
......
...@@ -2301,14 +2301,9 @@ def Generator( ...@@ -2301,14 +2301,9 @@ def Generator(
@config_layer('expand') @config_layer('expand')
class ExpandLayer(LayerBase): class ExpandLayer(LayerBase):
def __init__(self, def __init__(self, name, inputs, trans_type='non-seq', bias=False, **xargs):
name,
inputs,
trans_type='non-seq',
device=None,
bias=False):
super(ExpandLayer, self).__init__( super(ExpandLayer, self).__init__(
name, 'expand', 0, inputs=inputs, device=device) name, 'expand', 0, inputs=inputs, **xargs)
config_assert( config_assert(
len(self.inputs) == 2, 'ExpandLayer takes 2 and only 2 inputs') len(self.inputs) == 2, 'ExpandLayer takes 2 and only 2 inputs')
self.config.trans_type = trans_type self.config.trans_type = trans_type
...@@ -2339,11 +2334,10 @@ class MaxLayer(LayerBase): ...@@ -2339,11 +2334,10 @@ class MaxLayer(LayerBase):
inputs, inputs,
trans_type='non-seq', trans_type='non-seq',
active_type='linear', active_type='linear',
device=None,
bias=False, bias=False,
output_max_index=None): output_max_index=None,
super(MaxLayer, self).__init__( **xargs):
name, 'max', 0, inputs=inputs, device=device) super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs)
config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input') config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input')
self.config.trans_type = trans_type self.config.trans_type = trans_type
self.config.active_type = active_type self.config.active_type = active_type
...@@ -2390,15 +2384,15 @@ class SequenceLastInstanceLayer(LayerBase): ...@@ -2390,15 +2384,15 @@ class SequenceLastInstanceLayer(LayerBase):
inputs, inputs,
active_type='linear', active_type='linear',
trans_type='non-seq', trans_type='non-seq',
device=None, bias=False,
bias=False): **xargs):
super(SequenceLastInstanceLayer, self).__init__( super(SequenceLastInstanceLayer, self).__init__(
name, name,
'seqlastins', 'seqlastins',
0, 0,
inputs=inputs, inputs=inputs,
device=device, active_type=active_type,
active_type=active_type) **xargs)
config_assert( config_assert(
len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input') len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input')
self.config.trans_type = trans_type self.config.trans_type = trans_type
...@@ -2410,39 +2404,29 @@ class SequenceLastInstanceLayer(LayerBase): ...@@ -2410,39 +2404,29 @@ class SequenceLastInstanceLayer(LayerBase):
@config_layer('seqfirstins') @config_layer('seqfirstins')
class SequenceFirstInstanceLayer(SequenceLastInstanceLayer): class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
def __init__( def __init__(self,
self, name,
name, inputs,
inputs, active_type='linear',
active_type='linear', trans_type='non-seq',
trans_type='non-seq', bias=False,
device=None, **xargs):
bias=False, ):
super(SequenceFirstInstanceLayer, self).__init__( super(SequenceFirstInstanceLayer, self).__init__(
name, name, inputs=inputs, active_type=active_type, bias=bias, **xargs)
inputs=inputs,
active_type=active_type,
device=device,
bias=bias)
self.config.trans_type = trans_type self.config.trans_type = trans_type
self.config.select_first = True self.config.select_first = True
@config_layer('seqconcat') @config_layer('seqconcat')
class SequenceConcatLayer(LayerBase): class SequenceConcatLayer(LayerBase):
def __init__(self, def __init__(self, name, inputs, active_type='linear', bias=False, **xargs):
name,
inputs,
active_type='linear',
device=None,
bias=False):
super(SequenceConcatLayer, self).__init__( super(SequenceConcatLayer, self).__init__(
name, name,
'seqconcat', 'seqconcat',
0, 0,
inputs=inputs, inputs=inputs,
device=device, active_type=active_type,
active_type=active_type) **xargs)
config_assert( config_assert(
len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs') len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs')
for input_index in xrange(len(self.inputs)): for input_index in xrange(len(self.inputs)):
...@@ -2458,15 +2442,15 @@ class SequenceReshapeLayer(LayerBase): ...@@ -2458,15 +2442,15 @@ class SequenceReshapeLayer(LayerBase):
size, size,
inputs, inputs,
active_type='linear', active_type='linear',
device=None, bias=False,
bias=False): **xargs):
super(SequenceReshapeLayer, self).__init__( super(SequenceReshapeLayer, self).__init__(
name, name,
'seqreshape', 'seqreshape',
size, size,
inputs=inputs, inputs=inputs,
device=device, active_type=active_type,
active_type=active_type) **xargs)
config_assert( config_assert(
len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs') len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs')
self.set_layer_size(size) self.set_layer_size(size)
...@@ -2475,19 +2459,9 @@ class SequenceReshapeLayer(LayerBase): ...@@ -2475,19 +2459,9 @@ class SequenceReshapeLayer(LayerBase):
@config_layer('subseq') @config_layer('subseq')
class SubSequenceLayer(LayerBase): class SubSequenceLayer(LayerBase):
def __init__(self, def __init__(self, name, inputs, active_type='linear', bias=False, **xargs):
name,
inputs,
active_type='linear',
device=None,
bias=False):
super(SubSequenceLayer, self).__init__( super(SubSequenceLayer, self).__init__(
name, name, 'subseq', 0, inputs=inputs, active_type=active_type, **xargs)
'subseq',
0,
inputs=inputs,
device=device,
active_type=active_type)
config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs') config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs')
input_layer0 = self.get_input_layer(0) input_layer0 = self.get_input_layer(0)
size = input_layer0.size size = input_layer0.size
...@@ -2644,15 +2618,10 @@ class AverageLayer(LayerBase): ...@@ -2644,15 +2618,10 @@ class AverageLayer(LayerBase):
average_strategy='average', average_strategy='average',
trans_type='non-seq', trans_type='non-seq',
active_type='linear', active_type='linear',
device=None, bias=False,
bias=False): **xargs):
super(AverageLayer, self).__init__( super(AverageLayer, self).__init__(
name, name, 'average', 0, inputs=inputs, active_type=active_type, **xargs)
'average',
0,
inputs=inputs,
device=device,
active_type=active_type)
self.config.average_strategy = average_strategy self.config.average_strategy = average_strategy
self.config.trans_type = trans_type self.config.trans_type = trans_type
config_assert(len(inputs) == 1, 'AverageLayer must have 1 input') config_assert(len(inputs) == 1, 'AverageLayer must have 1 input')
...@@ -2676,9 +2645,9 @@ class CosSimLayer(LayerBase): ...@@ -2676,9 +2645,9 @@ class CosSimLayer(LayerBase):
@config_layer('tensor') @config_layer('tensor')
class TensorLayer(LayerBase): class TensorLayer(LayerBase):
def __init__(self, name, size, inputs, device=None, bias=True, **xargs): def __init__(self, name, size, inputs, bias=True, **xargs):
super(TensorLayer, self).__init__( super(TensorLayer, self).__init__(
name, 'tensor', size, inputs=inputs, device=device, **xargs) name, 'tensor', size, inputs=inputs, **xargs)
config_assert(len(self.inputs) == 2, 'TensorLayer must have 2 inputs') config_assert(len(self.inputs) == 2, 'TensorLayer must have 2 inputs')
config_assert(size > 0, 'size must be positive') config_assert(size > 0, 'size must be positive')
config_assert(inputs[1].parameter_name == None, config_assert(inputs[1].parameter_name == None,
...@@ -3029,7 +2998,7 @@ class CRFLayer(LayerBase): ...@@ -3029,7 +2998,7 @@ class CRFLayer(LayerBase):
super(CRFLayer, self).__init__(name, 'crf', size, inputs, device=device) super(CRFLayer, self).__init__(name, 'crf', size, inputs, device=device)
config_assert(2 <= len(self.inputs) <= 3, config_assert(2 <= len(self.inputs) <= 3,
'CRFLayer must have 2 or 3 inputs') 'CRFLayer must have 2 or 3 inputs')
self.create_input_parameter(0, size * (size + 2), [size, size + 2]) self.create_input_parameter(0, size * (size + 2), [size + 2, size])
self.config.coeff = coeff self.config.coeff = coeff
...@@ -3051,7 +3020,7 @@ class CRFDecodingLayer(LayerBase): ...@@ -3051,7 +3020,7 @@ class CRFDecodingLayer(LayerBase):
config_assert( config_assert(
len(self.inputs) <= 2, len(self.inputs) <= 2,
'CRFDecodingLayer cannot have more than 2 inputs') 'CRFDecodingLayer cannot have more than 2 inputs')
self.create_input_parameter(0, size * (size + 2), [size, size + 2]) self.create_input_parameter(0, size * (size + 2), [size + 2, size])
@config_layer('ctc') @config_layer('ctc')
......
...@@ -39,6 +39,7 @@ register_unary_math_op('abs', act.AbsActivation()) ...@@ -39,6 +39,7 @@ register_unary_math_op('abs', act.AbsActivation())
register_unary_math_op('sigmoid', act.SigmoidActivation()) register_unary_math_op('sigmoid', act.SigmoidActivation())
register_unary_math_op('tanh', act.TanhActivation()) register_unary_math_op('tanh', act.TanhActivation())
register_unary_math_op('square', act.SquareActivation()) register_unary_math_op('square', act.SquareActivation())
register_unary_math_op('relu', act.ReluActivation())
def add(layeroutput, other): def add(layeroutput, other):
......
...@@ -795,17 +795,16 @@ def data_layer(name, size, height=None, width=None, layer_attr=None): ...@@ -795,17 +795,16 @@ def data_layer(name, size, height=None, width=None, layer_attr=None):
.. code-block:: python .. code-block:: python
data = data_layer(name="input", data = data_layer(name="input", size=1000)
size=1000)
:param name: Name of this data layer. :param name: Name of this data layer.
:type name: basestring :type name: basestring
:param size: Size of this data layer. :param size: Size of this data layer.
:type size: int :type size: int
:param height: Height of this data layer, used for image :param height: Height of this data layer, used for image
:type size: int|None :type height: int|None
:param width: Width of this data layer, used for image :param width: Width of this data layer, used for image
:type size: int|None :type width: int|None
:param layer_attr: Extra Layer Attribute. :param layer_attr: Extra Layer Attribute.
:type layer_attr: ExtraLayerAttribute. :type layer_attr: ExtraLayerAttribute.
:return: LayerOutput object. :return: LayerOutput object.
......
...@@ -7,8 +7,9 @@ x = layer_math.exp(x) ...@@ -7,8 +7,9 @@ x = layer_math.exp(x)
x = layer_math.log(x) x = layer_math.log(x)
x = layer_math.abs(x) x = layer_math.abs(x)
x = layer_math.sigmoid(x) x = layer_math.sigmoid(x)
x = layer_math.tanh(x)
x = layer_math.square(x) x = layer_math.square(x)
x = layer_math.square(x) x = layer_math.relu(x)
y = 1 + x y = 1 + x
y = y + 1 y = y + 1
y = x + y y = x + y
......
...@@ -65,13 +65,28 @@ layers { ...@@ -65,13 +65,28 @@ layers {
} }
} }
} }
layers {
name: "__tanh_0__"
type: "mixed"
size: 100
active_type: "tanh"
inputs {
input_layer_name: "__sigmoid_0__"
proj_conf {
type: "identity"
name: "___tanh_0__.w0"
input_size: 100
output_size: 100
}
}
}
layers { layers {
name: "__square_0__" name: "__square_0__"
type: "mixed" type: "mixed"
size: 100 size: 100
active_type: "square" active_type: "square"
inputs { inputs {
input_layer_name: "__sigmoid_0__" input_layer_name: "__tanh_0__"
proj_conf { proj_conf {
type: "identity" type: "identity"
name: "___square_0__.w0" name: "___square_0__.w0"
...@@ -81,15 +96,15 @@ layers { ...@@ -81,15 +96,15 @@ layers {
} }
} }
layers { layers {
name: "__square_1__" name: "__relu_0__"
type: "mixed" type: "mixed"
size: 100 size: 100
active_type: "square" active_type: "relu"
inputs { inputs {
input_layer_name: "__square_0__" input_layer_name: "__square_0__"
proj_conf { proj_conf {
type: "identity" type: "identity"
name: "___square_1__.w0" name: "___relu_0__.w0"
input_size: 100 input_size: 100
output_size: 100 output_size: 100
} }
...@@ -101,7 +116,7 @@ layers { ...@@ -101,7 +116,7 @@ layers {
size: 100 size: 100
active_type: "" active_type: ""
inputs { inputs {
input_layer_name: "__square_1__" input_layer_name: "__relu_0__"
} }
slope: 1.0 slope: 1.0
intercept: 1 intercept: 1
...@@ -123,7 +138,7 @@ layers { ...@@ -123,7 +138,7 @@ layers {
size: 100 size: 100
active_type: "" active_type: ""
inputs { inputs {
input_layer_name: "__square_1__" input_layer_name: "__relu_0__"
proj_conf { proj_conf {
type: "identity" type: "identity"
name: "___mixed_0__.w0" name: "___mixed_0__.w0"
...@@ -147,7 +162,7 @@ layers { ...@@ -147,7 +162,7 @@ layers {
size: 100 size: 100
active_type: "" active_type: ""
inputs { inputs {
input_layer_name: "__square_1__" input_layer_name: "__relu_0__"
} }
slope: -1.0 slope: -1.0
intercept: 0.0 intercept: 0.0
...@@ -339,8 +354,9 @@ sub_models { ...@@ -339,8 +354,9 @@ sub_models {
layer_names: "__log_0__" layer_names: "__log_0__"
layer_names: "__abs_0__" layer_names: "__abs_0__"
layer_names: "__sigmoid_0__" layer_names: "__sigmoid_0__"
layer_names: "__tanh_0__"
layer_names: "__square_0__" layer_names: "__square_0__"
layer_names: "__square_1__" layer_names: "__relu_0__"
layer_names: "__slope_intercept_layer_0__" layer_names: "__slope_intercept_layer_0__"
layer_names: "__slope_intercept_layer_1__" layer_names: "__slope_intercept_layer_1__"
layer_names: "__mixed_0__" layer_names: "__mixed_0__"
......
...@@ -239,9 +239,9 @@ parameters { ...@@ -239,9 +239,9 @@ parameters {
name: "___crf_layer_0__.w0" name: "___crf_layer_0__.w0"
size: 24 size: 24
initial_mean: 0.0 initial_mean: 0.0
initial_std: 0.5 initial_std: 0.408248290464
dims: 4
dims: 6 dims: 6
dims: 4
initial_strategy: 0 initial_strategy: 0
initial_smart: true initial_smart: true
} }
......
...@@ -20,16 +20,20 @@ import event ...@@ -20,16 +20,20 @@ import event
import data_type import data_type
import topology import topology
import data_feeder import data_feeder
import networks
from . import dataset from . import dataset
from . import reader from . import reader
import attr import attr
import pooling import pooling
import inference
import networks
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
import minibatch
__all__ = [ __all__ = [
'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer',
'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader', 'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader',
'topology' 'topology', 'networks', 'infer'
] ]
...@@ -39,3 +43,7 @@ def init(**kwargs): ...@@ -39,3 +43,7 @@ def init(**kwargs):
args.append('--%s=%s' % (key, str(kwargs[key]))) args.append('--%s=%s' % (key, str(kwargs[key])))
api.initPaddle(*args) api.initPaddle(*args)
infer = inference.infer
batch = minibatch.batch
...@@ -12,26 +12,15 @@ ...@@ -12,26 +12,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle.trainer_config_helpers.activations import * import paddle.trainer_config_helpers.activations
import copy
__all__ = [ __all__ = []
"Base", "Tanh", "Sigmoid", "Softmax", "Identity", "Linear",
'SequenceSoftmax', "Exp", "Relu", "BRelu", "SoftRelu", "STanh", "Abs",
"Square", "Log"
]
Base = BaseActivation suffix = 'Activation'
Tanh = TanhActivation for act in paddle.trainer_config_helpers.activations.__all__:
Sigmoid = SigmoidActivation new_name = act[:-len(suffix)]
Softmax = SoftmaxActivation globals()[new_name] = copy.copy(
SequenceSoftmax = SequenceSoftmaxActivation getattr(paddle.trainer_config_helpers.activations, act))
Identity = IdentityActivation globals()[new_name].__name__ = new_name
Linear = Identity __all__.append(new_name)
Relu = ReluActivation
BRelu = BReluActivation
SoftRelu = SoftReluActivation
STanh = STanhActivation
Abs = AbsActivation
Square = SquareActivation
Exp = ExpActivation
Log = LogActivation
...@@ -12,12 +12,16 @@ ...@@ -12,12 +12,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle.trainer_config_helpers.attrs import * import paddle.trainer_config_helpers.attrs
__all__ = [ __all__ = [
"Param", "Param",
"Extra", "Extra",
] ]
Param = ParameterAttribute Param = paddle.trainer_config_helpers.attrs.ParameterAttribute
Extra = ExtraLayerAttribute Extra = paddle.trainer_config_helpers.attrs.ExtraLayerAttribute
for each in paddle.trainer_config_helpers.attrs.__all__:
globals()[each] = getattr(paddle.trainer_config_helpers.attrs, each)
__all__.append(each)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import re
from paddle.trainer_config_helpers.default_decorators import wrap_name_default
import paddle.trainer_config_helpers as conf_helps
class LayerType(type):
def __new__(cls, name, bases, attrs):
method_name = attrs.get('METHOD_NAME', None)
if method_name is not None:
method = getattr(conf_helps, method_name)
if method.__doc__ is not None:
mapper = attrs.get("__map_docstr__", None)
if mapper is not None:
attrs['__doc__'] = LayerType.__map_docstr__(
mapper(method.__doc__),
method_name=method_name,
name=name)
else:
attrs['__doc__'] = LayerType.__map_docstr__(
method.__doc__, method_name=method_name, name=name)
return super(LayerType, cls).__new__(cls, name, bases, attrs)
@staticmethod
def __map_docstr__(doc, name, method_name):
assert isinstance(doc, basestring)
# replace LayerOutput to paddle.v2.config_base.Layer
doc = doc.replace("LayerOutput", "paddle.v2.config_base.Layer")
doc = doc.replace('ParameterAttribute',
'paddle.v2.attr.ParameterAttribute')
doc = re.sub(r'ExtraLayerAttribute[^\s]?',
'paddle.v2.attr.ExtraAttribute', doc)
# xxx_layer to xxx
doc = re.sub(r"(?P<name>[a-z]+)_layer", r"\g<name>", doc)
# XxxxActivation to paddle.v2.Activation.Xxxx
doc = re.sub(r"(?P<name>[A-Z][a-zA-Z]+)Activation",
r"paddle.v2.Activation.\g<name>", doc)
# TODO(yuyang18): Add more rules if needed.
return doc
class Layer(object):
__metaclass__ = LayerType
def __init__(self, name=None, parent_layers=None):
assert isinstance(parent_layers, dict)
self.name = name
self.__contex__ = {}
self.__parent_layers__ = parent_layers
def to_proto(self, context):
"""
function to set proto attribute
"""
kwargs = dict()
for layer_name in self.__parent_layers__:
if not isinstance(self.__parent_layers__[layer_name],
collections.Sequence):
v1_layer = self.__parent_layers__[layer_name].to_proto(
context=context)
else:
v1_layer = map(lambda x: x.to_proto(context=context),
self.__parent_layers__[layer_name])
kwargs[layer_name] = v1_layer
if self.context_name() is None:
return self.to_proto_impl(**kwargs)
elif self.context_name() not in context:
context[self.context_name()] = self.to_proto_impl(**kwargs)
self.__contex__ = context
if self.use_context_name():
return context[self.context_name()]
else:
return context[self.name]
def to_proto_impl(self, **kwargs):
raise NotImplementedError()
def context_name(self):
"""
Context name means the context which stores `to_proto_impl` result.
If multiple layer share same context_name, the `to_proto_impl` of them
will be invoked only once.
"""
return self.name
def use_context_name(self):
return False
def calculate_size(self):
"""
lazy calculate size of the layer, should be called when to_proto_impl of
this layer is called.
:return:
"""
return self.__contex__[self.context_name()].size
def __convert_to_v2__(method_name, parent_names, is_default_name=True):
if is_default_name:
wrapper = wrap_name_default(name_prefix=method_name)
else:
wrapper = None
class V2LayerImpl(Layer):
METHOD_NAME = method_name
def __init__(self, **kwargs):
parent_layers = dict()
other_kwargs = dict()
for pname in parent_names:
if kwargs.has_key(pname):
parent_layers[pname] = kwargs[pname]
for key in kwargs.keys():
if key not in parent_names:
other_kwargs[key] = kwargs[key]
name = kwargs.get('name', None)
super(V2LayerImpl, self).__init__(name, parent_layers)
self.__other_kwargs__ = other_kwargs
if wrapper is not None:
__init__ = wrapper(__init__)
def to_proto_impl(self, **kwargs):
args = dict()
for each in kwargs:
args[each] = kwargs[each]
for each in self.__other_kwargs__:
args[each] = self.__other_kwargs__[each]
return getattr(conf_helps, method_name)(**args)
return V2LayerImpl
...@@ -12,13 +12,20 @@ ...@@ -12,13 +12,20 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from py_paddle import swig_paddle
from py_paddle import DataProviderConverter from py_paddle import DataProviderConverter
import data_type
import paddle.trainer.PyDataProvider2 as pydp2
__all__ = ['DataFeeder'] __all__ = ['DataFeeder']
def default_feeding_map(data_types):
reader_dict = dict()
for i, tp in enumerate(data_types):
reader_dict[tp[0]] = i
return reader_dict
class DataFeeder(DataProviderConverter): class DataFeeder(DataProviderConverter):
""" """
DataFeeder converts the data returned by paddle.reader into a data structure DataFeeder converts the data returned by paddle.reader into a data structure
...@@ -29,7 +36,10 @@ class DataFeeder(DataProviderConverter): ...@@ -29,7 +36,10 @@ class DataFeeder(DataProviderConverter):
to feed it to C++ interface. to feed it to C++ interface.
The example usage: The example usage:
.. code-block:: python
data_types = [('image', paddle.data_type.dense_vector(784)), data_types = [('image', paddle.data_type.dense_vector(784)),
('label', paddle.data_type.integer_value(10))] ('label', paddle.data_type.integer_value(10))]
reader_dict = {'image':0, 'label':1} reader_dict = {'image':0, 'label':1}
...@@ -43,49 +53,51 @@ class DataFeeder(DataProviderConverter): ...@@ -43,49 +53,51 @@ class DataFeeder(DataProviderConverter):
# [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample # [ [1.0,2.0,3.0,4.0], 5, [6,7,8] ] # second sample
# ] # ]
arg = feeder(minibatch_data) arg = feeder(minibatch_data)
.. note::
This module is for internal use only. Users should use the `reader`
interface.
:param data_types: A list to specify data name and type. Each item is
a tuple of (data_name, data_type).
:type data_types: list
:param reader_dict: A dictionary to specify the position of each data
in the input data.
:type feeding: dict
""" """
def __init__(self, data_types, reader_dict): def __init__(self, data_types, feeding=None):
"""
:param data_types: A list to specify data name and type. Each item is
a tuple of (data_name, data_type). For example:
[('image', paddle.data_type.dense_vector(784)),
('label', paddle.data_type.integer_value(10))]
:type data_types: A list of tuple
:param reader_dict: A dictionary to specify the position of each data
in the input data.
:type reader_dict: dict()
"""
self.input_names = [] self.input_names = []
input_types = [] input_types = []
self.reader_dict = reader_dict if feeding is None:
feeding = default_feeding_map(data_types)
self.feeding = feeding
for each in data_types: for each in data_types:
self.input_names.append(each[0]) self.input_names.append(each[0])
assert isinstance(each[1], data_type.InputType) if not isinstance(each[1], pydp2.InputType):
raise TypeError("second item in each data_type should be an "
"InputType")
input_types.append(each[1]) input_types.append(each[1])
DataProviderConverter.__init__(self, input_types) DataProviderConverter.__init__(self, input_types)
def __len__(self):
return len(self.input_names)
def convert(self, dat, argument=None): def convert(self, dat, argument=None):
""" """
:param dat: A list of mini-batch data. Each sample is a list or tuple :param dat: A list of mini-batch data. Each sample is a list or tuple
one feature or multiple features. one feature or multiple features.
for example:
[ :type dat: list
([0.2, 0.2], ), # first sample
([0.8, 0.3], ), # second sample
]
or,
[
[[0.2, 0.2], ], # first sample
[[0.8, 0.3], ], # second sample
]
:type dat: List
:param argument: An Arguments object contains this mini-batch data with :param argument: An Arguments object contains this mini-batch data with
one or multiple features. The Arguments definition is one or multiple features. The Arguments definition is
in the API. in the API.
:type argument: swig_paddle.Arguments :type argument: py_paddle.swig_paddle.Arguments
""" """
def reorder_data(data): def reorder_data(data):
...@@ -93,7 +105,7 @@ class DataFeeder(DataProviderConverter): ...@@ -93,7 +105,7 @@ class DataFeeder(DataProviderConverter):
for each in data: for each in data:
reorder = [] reorder = []
for name in self.input_names: for name in self.input_names:
reorder.append(each[self.reader_dict[name]]) reorder.append(each[self.feeding[name]])
retv.append(reorder) retv.append(reorder)
return retv return retv
......
...@@ -12,11 +12,15 @@ ...@@ -12,11 +12,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle.trainer.PyDataProvider2 import \ import paddle.trainer.PyDataProvider2 as pydp2
InputType, DataType, dense_vector, sparse_binary_vector,\
sparse_vector, integer_value, integer_value_sequence
__all__ = [ import_list = [
'InputType', 'DataType', 'dense_vector', 'sparse_binary_vector', nm for nm in dir(pydp2)
'sparse_vector', 'integer_value', 'integer_value_sequence' if '_' in nm and nm[0] != '_' and ('value' in nm or 'vector' in nm)
] ]
import_list.extend(['InputType'])
for nm in import_list:
globals()[nm] = getattr(pydp2, nm)
__all__ = import_list
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Dataset package.
"""
import mnist import mnist
import imikolov
import imdb
import cifar
import movielens
import conll05
import uci_housing
import sentiment
import wmt14
__all__ = ['mnist'] __all__ = [
'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment'
'uci_housing', 'wmt14'
]
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" """
CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
TODO(yuyang18): Complete the comments.
""" """
import cPickle import cPickle
import itertools import itertools
import numpy import numpy
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import requests import requests
import hashlib import hashlib
import os import os
import shutil import shutil
import sys
__all__ = ['DATA_HOME', 'download', 'md5file'] __all__ = ['DATA_HOME', 'download', 'md5file']
...@@ -27,9 +42,24 @@ def download(url, module_name, md5sum): ...@@ -27,9 +42,24 @@ def download(url, module_name, md5sum):
filename = os.path.join(dirname, url.split('/')[-1]) filename = os.path.join(dirname, url.split('/')[-1])
if not (os.path.exists(filename) and md5file(filename) == md5sum): if not (os.path.exists(filename) and md5file(filename) == md5sum):
print "Cache file %s not found, downloading %s" % (filename, url)
r = requests.get(url, stream=True) r = requests.get(url, stream=True)
with open(filename, 'w') as f: total_length = r.headers.get('content-length')
shutil.copyfileobj(r.raw, f)
if total_length is None:
with open(filename, 'w') as f:
shutil.copyfileobj(r.raw, f)
else:
with open(filename, 'w') as f:
dl = 0
total_length = int(total_length)
for data in r.iter_content(chunk_size=4096):
dl += len(data)
f.write(data)
done = int(50 * dl / total_length)
sys.stdout.write("\r[%s%s]" % ('=' * done,
' ' * (50 - done)))
sys.stdout.flush()
return filename return filename
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tarfile
import gzip
import itertools
from common import download
"""
Conll 2005 dataset. Paddle semantic role labeling Book and demo use this
dataset as an example. Because Conll 2005 is not free in public, the default
downloaded URL is test set of Conll 2005 (which is public). Users can change
URL and MD5 to their Conll dataset.
TODO(yuyang18): Complete comments.
"""
__all__ = ['test, get_dict', 'get_embedding']
DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_MD5 = '387719152ae52d60422c016e92a742fc'
WORDDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt'
WORDDICT_MD5 = 'ea7fb7d4c75cc6254716f0177a506baa'
VERBDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt'
VERBDICT_MD5 = '0d2977293bbb6cbefab5b0f97db1e77c'
TRGDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt'
TRGDICT_MD5 = 'd8c7f03ceb5fc2e5a0fa7503a4353751'
EMB_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb'
EMB_MD5 = 'bf436eb0faa1f6f9103017f8be57cdb7'
UNK_IDX = 0
def load_dict(filename):
d = dict()
with open(filename, 'r') as f:
for i, line in enumerate(f):
d[line.strip()] = i
return d
def corpus_reader(data_path, words_name, props_name):
"""
Read one corpus. It returns an iterator. Each element of
this iterator is a tuple including sentence and labels. The sentence is
consist of a list of word IDs. The labels include a list of label IDs.
:return: a iterator of data.
:rtype: iterator
"""
def reader():
tf = tarfile.open(data_path)
wf = tf.extractfile(words_name)
pf = tf.extractfile(props_name)
with gzip.GzipFile(fileobj=wf) as words_file, gzip.GzipFile(
fileobj=pf) as props_file:
sentences = []
labels = []
one_seg = []
for word, label in itertools.izip(words_file, props_file):
word = word.strip()
label = label.strip().split()
if len(label) == 0: # end of sentence
for i in xrange(len(one_seg[0])):
a_kind_lable = [x[i] for x in one_seg]
labels.append(a_kind_lable)
if len(labels) >= 1:
verb_list = []
for x in labels[0]:
if x != '-':
verb_list.append(x)
for i, lbl in enumerate(labels[1:]):
cur_tag = 'O'
is_in_bracket = False
lbl_seq = []
verb_word = ''
for l in lbl:
if l == '*' and is_in_bracket == False:
lbl_seq.append('O')
elif l == '*' and is_in_bracket == True:
lbl_seq.append('I-' + cur_tag)
elif l == '*)':
lbl_seq.append('I-' + cur_tag)
is_in_bracket = False
elif l.find('(') != -1 and l.find(')') != -1:
cur_tag = l[1:l.find('*')]
lbl_seq.append('B-' + cur_tag)
is_in_bracket = False
elif l.find('(') != -1 and l.find(')') == -1:
cur_tag = l[1:l.find('*')]
lbl_seq.append('B-' + cur_tag)
is_in_bracket = True
else:
raise RuntimeError('Unexpected label: %s' %
l)
yield sentences, verb_list[i], lbl_seq
sentences = []
labels = []
one_seg = []
else:
sentences.append(word)
one_seg.append(label)
pf.close()
wf.close()
tf.close()
return reader
def reader_creator(corpus_reader,
word_dict=None,
predicate_dict=None,
label_dict=None):
def reader():
for sentence, predicate, labels in corpus_reader():
sen_len = len(sentence)
verb_index = labels.index('B-V')
mark = [0] * len(labels)
if verb_index > 0:
mark[verb_index - 1] = 1
ctx_n1 = sentence[verb_index - 1]
else:
ctx_n1 = 'bos'
if verb_index > 1:
mark[verb_index - 2] = 1
ctx_n2 = sentence[verb_index - 2]
else:
ctx_n2 = 'bos'
mark[verb_index] = 1
ctx_0 = sentence[verb_index]
if verb_index < len(labels) - 1:
mark[verb_index + 1] = 1
ctx_p1 = sentence[verb_index + 1]
else:
ctx_p1 = 'eos'
if verb_index < len(labels) - 2:
mark[verb_index + 2] = 1
ctx_p2 = sentence[verb_index + 2]
else:
ctx_p2 = 'eos'
word_idx = [word_dict.get(w, UNK_IDX) for w in sentence]
ctx_n2_idx = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_idx = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_idx = [word_dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_idx = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len
ctx_p2_idx = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len
pred_idx = [predicate_dict.get(predicate)] * sen_len
label_idx = [label_dict.get(w) for w in labels]
yield word_idx, ctx_n2_idx, ctx_n1_idx, \
ctx_0_idx, ctx_p1_idx, ctx_p2_idx, pred_idx, mark, label_idx
return reader
def get_dict():
word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5))
verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5))
label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5))
return word_dict, verb_dict, label_dict
def get_embedding():
return download(EMB_URL, 'conll05st', EMB_MD5)
def test():
word_dict, verb_dict, label_dict = get_dict()
reader = corpus_reader(
download(DATA_URL, 'conll05st', DATA_MD5),
words_name='conll05st-release/test.wsj/words/test.wsj.words.gz',
props_name='conll05st-release/test.wsj/props/test.wsj.props.gz')
return reader_creator(reader, word_dict, verb_dict, label_dict)
# /usr/bin/env python
# -*- coding:utf-8 -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
...@@ -16,7 +13,10 @@ ...@@ -16,7 +13,10 @@
# limitations under the License. # limitations under the License.
""" """
IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
TODO(yuyang18): Complete comments.
""" """
import paddle.v2.dataset.common import paddle.v2.dataset.common
import tarfile import tarfile
import Queue import Queue
...@@ -118,3 +118,8 @@ def test(word_idx): ...@@ -118,3 +118,8 @@ def test(word_idx):
return reader_creator( return reader_creator(
re.compile("aclImdb/test/pos/.*\.txt$"), re.compile("aclImdb/test/pos/.*\.txt$"),
re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000) re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000)
def word_dict():
return build_dict(
re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" """
imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
Complete comments.
""" """
import paddle.v2.dataset.common import paddle.v2.dataset.common
import tarfile import tarfile
__all__ = ['train', 'test'] __all__ = ['train', 'test', 'build_dict']
URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz' URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
MD5 = '30177ea32e27c525793142b6bf2c8e2d' MD5 = '30177ea32e27c525793142b6bf2c8e2d'
...@@ -24,7 +39,9 @@ def word_count(f, word_freq=None): ...@@ -24,7 +39,9 @@ def word_count(f, word_freq=None):
return word_freq return word_freq
def build_dict(train_filename, test_filename): def build_dict():
train_filename = './simple-examples/data/ptb.train.txt'
test_filename = './simple-examples/data/ptb.valid.txt'
with tarfile.open( with tarfile.open(
paddle.v2.dataset.common.download( paddle.v2.dataset.common.download(
paddle.v2.dataset.imikolov.URL, 'imikolov', paddle.v2.dataset.imikolov.URL, 'imikolov',
...@@ -32,27 +49,22 @@ def build_dict(train_filename, test_filename): ...@@ -32,27 +49,22 @@ def build_dict(train_filename, test_filename):
trainf = tf.extractfile(train_filename) trainf = tf.extractfile(train_filename)
testf = tf.extractfile(test_filename) testf = tf.extractfile(test_filename)
word_freq = word_count(testf, word_count(trainf)) word_freq = word_count(testf, word_count(trainf))
if '<unk>' in word_freq:
# remove <unk> for now, since we will set it as last index
del word_freq['<unk>']
TYPO_FREQ = 50 TYPO_FREQ = 50
word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items()) word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items())
dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0])) word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*dictionary)) words, _ = list(zip(*word_freq_sorted))
word_idx = dict(zip(words, xrange(len(words)))) word_idx = dict(zip(words, xrange(len(words))))
word_idx['<unk>'] = len(words) word_idx['<unk>'] = len(words)
return word_idx return word_idx
word_idx = {} def reader_creator(filename, word_idx, n):
def reader_creator(filename, n):
global word_idx
if len(word_idx) == 0:
word_idx = build_dict('./simple-examples/data/ptb.train.txt',
'./simple-examples/data/ptb.valid.txt')
def reader(): def reader():
with tarfile.open( with tarfile.open(
paddle.v2.dataset.common.download( paddle.v2.dataset.common.download(
...@@ -71,9 +83,9 @@ def reader_creator(filename, n): ...@@ -71,9 +83,9 @@ def reader_creator(filename, n):
return reader return reader
def train(n): def train(word_idx, n):
return reader_creator('./simple-examples/data/ptb.train.txt', n) return reader_creator('./simple-examples/data/ptb.train.txt', word_idx, n)
def test(n): def test(word_idx, n):
return reader_creator('./simple-examples/data/ptb.valid.txt', n) return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" """
MNIST dataset. MNIST dataset.
This module will download dataset from http://yann.lecun.com/exdb/mnist/ and
parse train set and test set into paddle reader creators.
""" """
import paddle.v2.dataset.common import paddle.v2.dataset.common
import subprocess import subprocess
...@@ -35,29 +51,39 @@ def reader_creator(image_filename, label_filename, buffer_size): ...@@ -35,29 +51,39 @@ def reader_creator(image_filename, label_filename, buffer_size):
l = subprocess.Popen([zcat_cmd, label_filename], stdout=subprocess.PIPE) l = subprocess.Popen([zcat_cmd, label_filename], stdout=subprocess.PIPE)
l.stdout.read(8) # skip some magic bytes l.stdout.read(8) # skip some magic bytes
while True: try: # reader could be break.
labels = numpy.fromfile( while True:
l.stdout, 'ubyte', count=buffer_size).astype("int") labels = numpy.fromfile(
l.stdout, 'ubyte', count=buffer_size).astype("int")
if labels.size != buffer_size:
break # numpy.fromfile returns empty slice after EOF.
images = numpy.fromfile( if labels.size != buffer_size:
m.stdout, 'ubyte', count=buffer_size * 28 * 28).reshape( break # numpy.fromfile returns empty slice after EOF.
(buffer_size, 28 * 28)).astype('float32')
images = images / 255.0 * 2.0 - 1.0 images = numpy.fromfile(
m.stdout, 'ubyte', count=buffer_size * 28 * 28).reshape(
(buffer_size, 28 * 28)).astype('float32')
for i in xrange(buffer_size): images = images / 255.0 * 2.0 - 1.0
yield images[i, :], int(labels[i])
m.terminate() for i in xrange(buffer_size):
l.terminate() yield images[i, :], int(labels[i])
finally:
m.terminate()
l.terminate()
return reader return reader
def train(): def train():
"""
MNIST train set creator.
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
:return: Train reader creator
:rtype: callable
"""
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist',
TRAIN_IMAGE_MD5), TRAIN_IMAGE_MD5),
...@@ -66,6 +92,15 @@ def train(): ...@@ -66,6 +92,15 @@ def train():
def test(): def test():
"""
MNIST test set cretor.
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
:return: Test reader creator.
:rtype: callable
"""
return reader_creator( return reader_creator(
paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist',
TEST_IMAGE_MD5), TEST_IMAGE_MD5),
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Movielens 1-M dataset.
TODO(yuyang18): Complete comments.
"""
import zipfile import zipfile
from common import download from common import download
import re import re
import random import random
import functools import functools
__all__ = ['train_creator', 'test_creator'] __all__ = [
'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id',
'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info'
]
age_table = [1, 18, 25, 35, 45, 50, 56]
class MovieInfo(object): class MovieInfo(object):
...@@ -19,17 +43,32 @@ class MovieInfo(object): ...@@ -19,17 +43,32 @@ class MovieInfo(object):
[MOVIE_TITLE_DICT[w.lower()] for w in self.title.split()] [MOVIE_TITLE_DICT[w.lower()] for w in self.title.split()]
] ]
def __str__(self):
return "<MovieInfo id(%d), title(%s), categories(%s)>" % (
self.index, self.title, self.categories)
def __repr__(self):
return self.__str__()
class UserInfo(object): class UserInfo(object):
def __init__(self, index, gender, age, job_id): def __init__(self, index, gender, age, job_id):
self.index = int(index) self.index = int(index)
self.is_male = gender == 'M' self.is_male = gender == 'M'
self.age = [1, 18, 25, 35, 45, 50, 56].index(int(age)) self.age = age_table.index(int(age))
self.job_id = int(job_id) self.job_id = int(job_id)
def value(self): def value(self):
return [self.index, 0 if self.is_male else 1, self.age, self.job_id] return [self.index, 0 if self.is_male else 1, self.age, self.job_id]
def __str__(self):
return "<UserInfo id(%d), gender(%s), age(%d), job(%d)>" % (
self.index, "M"
if self.is_male else "F", age_table[self.age], self.job_id)
def __repr__(self):
return str(self)
MOVIE_INFO = None MOVIE_INFO = None
MOVIE_TITLE_DICT = None MOVIE_TITLE_DICT = None
...@@ -40,7 +79,8 @@ USER_INFO = None ...@@ -40,7 +79,8 @@ USER_INFO = None
def __initialize_meta_info__(): def __initialize_meta_info__():
fn = download( fn = download(
url='http://files.grouplens.org/datasets/movielens/ml-1m.zip', url='http://files.grouplens.org/datasets/movielens/ml-1m.zip',
md5='c4d9eecfca2ab87c1945afe126590906') module_name='movielens',
md5sum='c4d9eecfca2ab87c1945afe126590906')
global MOVIE_INFO global MOVIE_INFO
if MOVIE_INFO is None: if MOVIE_INFO is None:
pattern = re.compile(r'^(.*)\((\d+)\)$') pattern = re.compile(r'^(.*)\((\d+)\)$')
...@@ -103,14 +143,63 @@ def __reader_creator__(**kwargs): ...@@ -103,14 +143,63 @@ def __reader_creator__(**kwargs):
return lambda: __reader__(**kwargs) return lambda: __reader__(**kwargs)
train_creator = functools.partial(__reader_creator__, is_test=False) train = functools.partial(__reader_creator__, is_test=False)
test_creator = functools.partial(__reader_creator__, is_test=True) test = functools.partial(__reader_creator__, is_test=True)
def get_movie_title_dict():
__initialize_meta_info__()
return MOVIE_TITLE_DICT
def __max_index_info__(a, b):
if a.index > b.index:
return a
else:
return b
def max_movie_id():
__initialize_meta_info__()
return reduce(__max_index_info__, MOVIE_INFO.viewvalues()).index
def max_user_id():
__initialize_meta_info__()
return reduce(__max_index_info__, USER_INFO.viewvalues()).index
def __max_job_id_impl__(a, b):
if a.job_id > b.job_id:
return a
else:
return b
def max_job_id():
__initialize_meta_info__()
return reduce(__max_job_id_impl__, USER_INFO.viewvalues()).job_id
def movie_categories():
__initialize_meta_info__()
return CATEGORIES_DICT
def user_info():
__initialize_meta_info__()
return USER_INFO
def movie_info():
__initialize_meta_info__()
return MOVIE_INFO
def unittest(): def unittest():
for train_count, _ in enumerate(train_creator()()): for train_count, _ in enumerate(train()()):
pass pass
for test_count, _ in enumerate(test_creator()()): for test_count, _ in enumerate(test()()):
pass pass
print train_count, test_count print train_count, test_count
......
# /usr/bin/env python
# -*- coding:utf-8 -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
The script fetch and preprocess movie_reviews data set that provided by NLTK
TODO(yuyang18): Complete dataset.
"""
import collections
from itertools import chain
import nltk
from nltk.corpus import movie_reviews
import common
__all__ = ['train', 'test', 'get_word_dict']
NUM_TRAINING_INSTANCES = 1600
NUM_TOTAL_INSTANCES = 2000
def download_data_if_not_yet():
"""
Download the data set, if the data set is not download.
"""
try:
# make sure that nltk can find the data
if common.DATA_HOME not in nltk.data.path:
nltk.data.path.append(common.DATA_HOME)
movie_reviews.categories()
except LookupError:
print "Downloading movie_reviews data set, please wait....."
nltk.download('movie_reviews', download_dir=common.DATA_HOME)
print "Download data set success....."
print "Path is " + nltk.data.find('corpora/movie_reviews').path
def get_word_dict():
"""
Sorted the words by the frequency of words which occur in sample
:return:
words_freq_sorted
"""
words_freq_sorted = list()
word_freq_dict = collections.defaultdict(int)
download_data_if_not_yet()
for category in movie_reviews.categories():
for field in movie_reviews.fileids(category):
for words in movie_reviews.words(field):
word_freq_dict[words] += 1
words_sort_list = word_freq_dict.items()
words_sort_list.sort(cmp=lambda a, b: b[1] - a[1])
for index, word in enumerate(words_sort_list):
words_freq_sorted.append((word[0], index))
return words_freq_sorted
def sort_files():
"""
Sorted the sample for cross reading the sample
:return:
files_list
"""
files_list = list()
neg_file_list = movie_reviews.fileids('neg')
pos_file_list = movie_reviews.fileids('pos')
files_list = list(chain.from_iterable(zip(neg_file_list, pos_file_list)))
return files_list
def load_sentiment_data():
"""
Load the data set
:return:
data_set
"""
data_set = list()
download_data_if_not_yet()
words_ids = dict(get_word_dict())
for sample_file in sort_files():
words_list = list()
category = 0 if 'neg' in sample_file else 1
for word in movie_reviews.words(sample_file):
words_list.append(words_ids[word.lower()])
data_set.append((words_list, category))
return data_set
def reader_creator(data):
"""
Reader creator, generate an iterator for data set
:param data:
train data set or test data set
"""
for each in data:
yield each[0], each[1]
def train():
"""
Default train set reader creator
"""
data_set = load_sentiment_data()
return reader_creator(data_set[0:NUM_TRAINING_INSTANCES])
def test():
"""
Default test set reader creator
"""
data_set = load_sentiment_data()
return reader_creator(data_set[NUM_TRAINING_INSTANCES:])
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.cifar import paddle.v2.dataset.cifar
import unittest import unittest
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.common import paddle.v2.dataset.common
import unittest import unittest
import tempfile import tempfile
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.imdb import paddle.v2.dataset.imdb
import unittest import unittest
import re import re
......
import paddle.v2.dataset.imikolov import paddle.v2.dataset.imikolov
import unittest import unittest
WORD_DICT = paddle.v2.dataset.imikolov.build_dict()
class TestMikolov(unittest.TestCase): class TestMikolov(unittest.TestCase):
def check_reader(self, reader, n): def check_reader(self, reader, n):
...@@ -9,11 +11,15 @@ class TestMikolov(unittest.TestCase): ...@@ -9,11 +11,15 @@ class TestMikolov(unittest.TestCase):
def test_train(self): def test_train(self):
n = 5 n = 5
self.check_reader(paddle.v2.dataset.imikolov.train(n), n) self.check_reader(paddle.v2.dataset.imikolov.train(WORD_DICT, n), n)
def test_test(self): def test_test(self):
n = 5 n = 5
self.check_reader(paddle.v2.dataset.imikolov.test(n), n) self.check_reader(paddle.v2.dataset.imikolov.test(WORD_DICT, n), n)
def test_total(self):
_, idx = zip(*WORD_DICT.items())
self.assertEqual(sorted(idx)[-1], len(WORD_DICT) - 1)
if __name__ == '__main__': if __name__ == '__main__':
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.mnist import paddle.v2.dataset.mnist
import unittest import unittest
......
# /usr/bin/env python
# -*- coding:utf-8 -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import nltk
import paddle.v2.dataset.sentiment as st
from nltk.corpus import movie_reviews
class TestSentimentMethods(unittest.TestCase):
def test_get_word_dict(self):
word_dict = st.get_word_dict()[0:10]
test_word_list = [(u',', 0), (u'the', 1), (u'.', 2), (u'a', 3),
(u'and', 4), (u'of', 5), (u'to', 6), (u"'", 7),
(u'is', 8), (u'in', 9)]
for idx, each in enumerate(word_dict):
self.assertEqual(each, test_word_list[idx])
self.assertTrue("/root/.cache/paddle/dataset" in nltk.data.path)
def test_sort_files(self):
last_label = ''
for sample_file in st.sort_files():
current_label = sample_file.split("/")[0]
self.assertNotEqual(current_label, last_label)
last_label = current_label
def test_data_set(self):
data_set = st.load_sentiment_data()
last_label = -1
for each in st.test():
self.assertNotEqual(each[1], last_label)
last_label = each[1]
self.assertEqual(len(data_set), st.NUM_TOTAL_INSTANCES)
self.assertEqual(len(list(st.train())), st.NUM_TRAINING_INSTANCES)
self.assertEqual(
len(list(st.test())),
(st.NUM_TOTAL_INSTANCES - st.NUM_TRAINING_INSTANCES))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
UCI Housing dataset.
TODO(yuyang18): Complete comments.
"""
import numpy as np
import os
from common import download
__all__ = ['train', 'test']
URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data'
MD5 = 'd4accdce7a25600298819f8e28e8d593'
feature_names = [
'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
'PTRATIO', 'B', 'LSTAT'
]
UCI_TRAIN_DATA = None
UCI_TEST_DATA = None
def feature_range(maximums, minimums):
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
feature_num = len(maximums)
ax.bar(range(feature_num), maximums - minimums, color='r', align='center')
ax.set_title('feature scale')
plt.xticks(range(feature_num), feature_names)
plt.xlim([-1, feature_num])
fig.set_figheight(6)
fig.set_figwidth(10)
if not os.path.exists('./image'):
os.makedirs('./image')
fig.savefig('image/ranges.png', dpi=48)
plt.close(fig)
def load_data(filename, feature_num=14, ratio=0.8):
global UCI_TRAIN_DATA, UCI_TEST_DATA
if UCI_TRAIN_DATA is not None and UCI_TEST_DATA is not None:
return
data = np.fromfile(filename, sep=' ')
data = data.reshape(data.shape[0] / feature_num, feature_num)
maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum(
axis=0) / data.shape[0]
feature_range(maximums[:-1], minimums[:-1])
for i in xrange(feature_num - 1):
data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])
offset = int(data.shape[0] * ratio)
UCI_TRAIN_DATA = data[:offset]
UCI_TEST_DATA = data[offset:]
def train():
global UCI_TRAIN_DATA
load_data(download(URL, 'uci_housing', MD5))
def reader():
for d in UCI_TRAIN_DATA:
yield d[:-1], d[-1:]
return reader
def test():
global UCI_TEST_DATA
load_data(download(URL, 'uci_housing', MD5))
def reader():
for d in UCI_TEST_DATA:
yield d[:-1], d[-1:]
return reader
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
wmt14 dataset
"""
import tarfile
import paddle.v2.dataset.common
__all__ = ['train', 'test', 'build_dict']
URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
# this is a small set of data for test. The original data is too large and will be add later.
URL_TRAIN = 'http://paddlepaddle.bj.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz'
MD5_TRAIN = 'a755315dd01c2c35bde29a744ede23a6'
START = "<s>"
END = "<e>"
UNK = "<unk>"
UNK_IDX = 2
def __read_to_dict__(tar_file, dict_size):
def __to_dict__(fd, size):
out_dict = dict()
for line_count, line in enumerate(fd):
if line_count < size:
out_dict[line.strip()] = line_count
else:
break
return out_dict
with tarfile.open(tar_file, mode='r') as f:
names = [
each_item.name for each_item in f
if each_item.name.endswith("src.dict")
]
assert len(names) == 1
src_dict = __to_dict__(f.extractfile(names[0]), dict_size)
names = [
each_item.name for each_item in f
if each_item.name.endswith("trg.dict")
]
assert len(names) == 1
trg_dict = __to_dict__(f.extractfile(names[0]), dict_size)
return src_dict, trg_dict
def reader_creator(tar_file, file_name, dict_size):
def reader():
src_dict, trg_dict = __read_to_dict__(tar_file, dict_size)
with tarfile.open(tar_file, mode='r') as f:
names = [
each_item.name for each_item in f
if each_item.name.endswith(file_name)
]
for name in names:
for line in f.extractfile(name):
line_split = line.strip().split('\t')
if len(line_split) != 2:
continue
src_seq = line_split[0] # one source sequence
src_words = src_seq.split()
src_ids = [
src_dict.get(w, UNK_IDX)
for w in [START] + src_words + [END]
]
trg_seq = line_split[1] # one target sequence
trg_words = trg_seq.split()
trg_ids = [trg_dict.get(w, UNK_IDX) for w in trg_words]
# remove sequence whose length > 80 in training mode
if len(src_ids) > 80 or len(trg_ids) > 80:
continue
trg_ids_next = trg_ids + [trg_dict[END]]
trg_ids = [trg_dict[START]] + trg_ids
yield src_ids, trg_ids, trg_ids_next
return reader
def train(dict_size):
return reader_creator(
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
'train/train', dict_size)
def test(dict_size):
return reader_creator(
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN),
'test/test', dict_size)
...@@ -34,8 +34,13 @@ class WithMetric(object): ...@@ -34,8 +34,13 @@ class WithMetric(object):
class TestResult(WithMetric): class TestResult(WithMetric):
def __init__(self, evaluator): """
Result that trainer.test return.
"""
def __init__(self, evaluator, cost):
super(TestResult, self).__init__(evaluator) super(TestResult, self).__init__(evaluator)
self.cost = cost
class BeginPass(object): class BeginPass(object):
......
import numpy
import py_paddle.swig_paddle as api
import collections
import topology
import minibatch
from data_feeder import DataFeeder
__all__ = ['infer']
class Inference(object):
def __init__(self, output_layer, parameters):
topo = topology.Topology(output_layer)
gm = api.GradientMachine.createFromConfigProto(
topo.proto(), api.CREATE_MODE_TESTING, [api.PARAMETER_VALUE])
for param in gm.getParameters():
val = param.getBuf(api.PARAMETER_VALUE)
name = param.getName()
assert isinstance(val, api.Vector)
val.copyFromNumpyArray(parameters.get(name).flatten())
self.__gradient_machine__ = gm
self.__data_types__ = topo.data_type()
def iter_infer(self, input, feeding=None):
feeder = DataFeeder(self.__data_types__, feeding)
batch_size = len(input)
def __reader_impl__():
for each_sample in input:
yield each_sample
reader = minibatch.batch(__reader_impl__, batch_size=batch_size)
self.__gradient_machine__.start()
for data_batch in reader():
yield self.__gradient_machine__.forwardTest(feeder(data_batch))
self.__gradient_machine__.finish()
def iter_infer_field(self, field, **kwargs):
for result in self.iter_infer(**kwargs):
yield [each_result[field] for each_result in result]
def infer(self, field='value', **kwargs):
retv = None
for result in self.iter_infer_field(field=field, **kwargs):
if retv is None:
retv = [[]] * len(result)
for i, item in enumerate(result):
retv[i].append(item)
retv = [numpy.concatenate(out) for out in retv]
if len(retv) == 1:
return retv[0]
else:
return retv
def infer(output_layer, parameters, input, feeding=None, field='value'):
"""
Infer a neural network by given neural network output and parameters. The
user should pass either a batch of input data or reader method.
Example usages:
.. code-block:: python
result = paddle.infer(prediction, parameters, input=SomeData,
batch_size=32)
print result
:param output_layer: output of the neural network that would be inferred
:type output_layer: paddle.v2.config_base.Layer
:param parameters: parameters of the neural network.
:type parameters: paddle.v2.parameters.Parameters
:param input: input data batch. Should be a python iterable object, and each
element is the data batch.
:type input: collections.Iterable
:param feeding: Reader dictionary. Default could generate from input
value.
:param field: The prediction field. It should in [`value`, `ids`]. `value`
means return the prediction probabilities, `ids` means return
the prediction labels. Default is `value`
:type field: str
:return: a numpy array
:rtype: numpy.ndarray
"""
inferer = Inference(output_layer=output_layer, parameters=parameters)
return inferer.infer(field=field, input=input, feeding=feeding)
...@@ -12,94 +12,67 @@ ...@@ -12,94 +12,67 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
Before this new package paddle.v2.layer, users would need to use functions `paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2,
in paddle.trainer_config_helpers.layers to configure networks. we want to make Paddle a plain Python package. The model config package defined
the way how to configure a neural network topology in Paddle Python code.
The Old Way:
========= The primary usage shows below.
This old way requires that the creation of a network be defined in a Python
function, say network_config, and that this Python function being passed to .. code-block:: python
paddle.trainer_config_helpers.parse_network_config for the creation of
protobuf message description of this network. import paddle.v2 as paddle
```python img = paddle.layer.data(name='img', type=paddle.data_type.dense_vector(784))
def network_config(): hidden = paddle.layer.fc(input=img, size=200)
img = paddle.trainer_config_helpers.data_layer(name="pixel", size=784) prediction = paddle.layer.fc(input=hidden, size=10,
inference = paddle.trainer_config_helpers.fc_layer( act=paddle.activation.Softmax())
input=img,
size=10, # use prediction instance where needed.
act=paddle.trainer_config_helpers.SoftmaxActivation()) parameters = paddle.parameters.create(cost)
cost = paddle.trainer_config_helpers.classification_cost(
input=inference,
label=paddle.trainer_config_helpers.data_layer(name="label", size=10))
proto_desc = parse_network_config(network_config)
```
When parse_network_config executes network_config, those layer definition
functions like data_layer and fc_layer would change some Python global variables,
so that after the execution, parse_network_config could collect information from
these global variables and generates the protobuf message.
The New Way:
=========
In this PR, we define a function in paddle.v2.layer which creates a Python
class for each layer creation function in paddle.trainer_config_helpers.layers.
Users can use create a network as follows:
```python
img = paddle.v2.layer.data(name="pixel", size=784)
inference = paddle.v2.layer.fc(input=img, size=10, act=paddle.v2.layer.Softmax())
cost = paddle.v2.layer.classification(
input=inference,
label=paddle.v2.layer.data(name="label", size=10))
parameters = paddle.v2.parameters.create(cost)
```
This new way doesn't require those invocations to layer definition functions
to be in a Python function but could be anywhere.
Also, the creation of a protobuf message is hidden in the invocation of
paddle.v2.parameters.create, no longer exposed to users.
""" """
import collections import collections
import inspect import inspect
from config_base import Layer, __convert_to_v2__
import paddle.trainer_config_helpers as conf_helps import paddle.trainer_config_helpers as conf_helps
from paddle.trainer_config_helpers.config_parser_utils import \ from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as __parse__ parse_network_config as __parse__
from paddle.trainer_config_helpers.default_decorators import wrap_name_default
from paddle.trainer_config_helpers.default_decorators import wrap_act_default from paddle.trainer_config_helpers.default_decorators import wrap_act_default
from paddle.trainer_config_helpers.default_decorators import \ from paddle.trainer_config_helpers.default_decorators import \
wrap_bias_attr_default wrap_bias_attr_default
from paddle.trainer_config_helpers.default_decorators import wrap_name_default
from paddle.trainer_config_helpers.layers import layer_support from paddle.trainer_config_helpers.layers import layer_support
from paddle.trainer.config_parser import \
RecurrentLayerGroupWithoutOutLinksBegin, RecurrentLayerGroupSetOutLink, \
RecurrentLayerGroupEnd, model_type
import data_type
import activation import activation
import re
import data_type
__all__ = ['parse_network', 'data'] __all__ = ['parse_network', 'data']
__projection_names__ = filter(lambda x: x.endswith('_projection'),
dir(conf_helps))
__all__ += __projection_names__
__operator_names__ = filter(lambda x: x.endswith('_operator'), dir(conf_helps))
__all__ += __operator_names__
def parse_network(*outputs): def parse_network(*outputs):
""" """
parse all output layers and then generate a model config proto. Parse all output layers and then generate a ModelConfig object.
:param outputs:
:return: .. note::
This function is used internally in paddle.v2 module. User should never
invoke this method.
:param outputs: Output layers.
:type outputs: Layer
:return: A ModelConfig object instance.
:rtype: ModelConfig
""" """
def __real_func__(): def __real_func__():
"""
__real_func__ is the function that config_parser.parse invoked. It is
the plain old paddle configuration function.
"""
context = dict() context = dict()
real_output = [each.to_proto(context=context) for each in outputs] real_output = [each.to_proto(context=context) for each in outputs]
conf_helps.outputs(real_output) conf_helps.outputs(real_output)
...@@ -107,17 +80,65 @@ def parse_network(*outputs): ...@@ -107,17 +80,65 @@ def parse_network(*outputs):
return __parse__(__real_func__) return __parse__(__real_func__)
class Layer(object): """
Some layer may need some special config, and can not use __convert_to_v2__ to convert.
So we also need to implement some special LayerV2.
"""
class DataLayerV2(Layer):
METHOD_NAME = 'data_layer'
def __init__(self, name, type, **kwargs):
assert isinstance(type, data_type.InputType)
self.type = type
self.__method_name__ = 'data_layer'
self.__kwargs__ = kwargs
super(DataLayerV2, self).__init__(name=name, parent_layers=dict())
def to_proto_impl(self, **kwargs):
args = dict()
args['size'] = self.type.dim
for each in kwargs:
args[each] = kwargs[each]
for each in self.__kwargs__:
args[each] = self.__kwargs__[each]
return getattr(conf_helps, self.__method_name__)(name=self.name, **args)
def __map_docstr__(doc):
doc = re.sub(r'(data = [^\)]+)\).*',
"data = paddle.layer.data(name=\"input\", "
"type=paddle.data_type.dense_vector(1000))", doc)
doc = re.sub(r':param size:.*',
':param type: Data type of this data layer', doc)
doc = re.sub(r':type size:.*',
":type size: paddle.v2.data_type.InputType", doc)
return doc
class WithExtraParent(Layer):
def extra_parent(self):
return self.__extra_parent__
def __init__(self, name=None, parent_layers=None): def __init__(self, name=None, parent_layers=None):
assert isinstance(parent_layers, dict) self.__extra_parent__ = []
self.name = name super(WithExtraParent, self).__init__(
self.__parent_layers__ = parent_layers name=name, parent_layers=parent_layers)
def append_extra_parent(self, parent):
self.__extra_parent__.append(parent)
def to_proto(self, context): def to_proto(self, context):
""" """
function to set proto attribute function to set proto attribute
""" """
kwargs = dict() kwargs = dict()
for p in self.__extra_parent__:
p.to_proto(context=context)
for layer_name in self.__parent_layers__: for layer_name in self.__parent_layers__:
if not isinstance(self.__parent_layers__[layer_name], if not isinstance(self.__parent_layers__[layer_name],
collections.Sequence): collections.Sequence):
...@@ -128,77 +149,105 @@ class Layer(object): ...@@ -128,77 +149,105 @@ class Layer(object):
self.__parent_layers__[layer_name]) self.__parent_layers__[layer_name])
kwargs[layer_name] = v1_layer kwargs[layer_name] = v1_layer
if self.name is None: if self.context_name() is None:
return self.to_proto_impl(**kwargs) return self.to_proto_impl(context=context, **kwargs)
elif self.name not in context: elif self.context_name() not in context:
context[self.name] = self.to_proto_impl(**kwargs) context[self.context_name()] = self.to_proto_impl(
context=context, **kwargs)
return context[self.name]
def to_proto_impl(self, **kwargs):
raise NotImplementedError()
def __convert_to_v2__(method_name, parent_names, is_default_name=True): if self.use_context_name():
if is_default_name: return context[self.context_name()]
wrapper = wrap_name_default(name_prefix=method_name) else:
else: return context[self.name]
wrapper = None
class V2LayerImpl(Layer):
def __init__(self, **kwargs):
parent_layers = dict()
other_kwargs = dict()
for pname in parent_names:
if kwargs.has_key(pname):
parent_layers[pname] = kwargs[pname]
for key in kwargs.keys(): class MemoryV2(WithExtraParent):
if key not in parent_names: def __init__(self, name, **kwargs):
other_kwargs[key] = kwargs[key] self.name = name
super(MemoryV2, self).__init__(name=name, parent_layers=dict())
self.__kwargs__ = kwargs
self.__boot_layer_name__ = None
if 'boot_layer' in kwargs:
begin_of_current_rnn = []
# TODO(yuyang18): Fix inspect, it could be wrong when user invoke a
# function inside step.
st = inspect.stack()
for i in xrange(len(st)):
locs = inspect.stack()[i][0].f_locals
keys = locs.keys()
for key in keys:
val = locs[key]
if isinstance(val, RecurrentLayerInput):
begin_of_current_rnn.append(val)
elif isinstance(val, collections.Sequence):
for v in val:
if isinstance(v, RecurrentLayerInput):
begin_of_current_rnn.append(v)
if begin_of_current_rnn:
break
assert begin_of_current_rnn is not None
for extra in begin_of_current_rnn:
self.append_extra_parent(extra)
assert isinstance(extra, WithExtraParent)
extra.append_extra_parent(kwargs['boot_layer'])
self.__boot_layer_name__ = kwargs['boot_layer'].name
def to_proto_impl(self, context, **kwargs):
args = dict()
for each in kwargs:
args[each] = kwargs[each]
for each in self.__kwargs__:
args[each] = self.__kwargs__[each]
name = kwargs.get('name', None) if self.__boot_layer_name__ is not None:
super(V2LayerImpl, self).__init__(name, parent_layers) args['boot_layer'] = context[self.__boot_layer_name__]
self.__other_kwargs__ = other_kwargs
if wrapper is not None: size = args.get('size', None)
__init__ = wrapper(__init__) if size is not None:
if callable(size):
real_size = size()
else:
real_size = size
args['size'] = real_size
return conf_helps.memory(name=self.name, **args)
def to_proto_impl(self, **kwargs): def context_name(self):
args = dict() return self.name + "#memory"
for each in kwargs:
args[each] = kwargs[each]
for each in self.__other_kwargs__:
args[each] = self.__other_kwargs__[each]
return getattr(conf_helps, method_name)(**args)
return V2LayerImpl def use_context_name(self):
"""
memory layer will have the same name with some layer
:return:
"""
return True
""" class LayerOutputV2(Layer):
Some layer may need some special config, and can not use __convert_to_v2__ to convert. """
So we also need to implement some special LayerV2. LayerOutputV2 is used to store the result of LayerOutput in v1 api.
""" It will not store it's parents because layer_output has been parsed already.
"""
def __init__(self, layer_output):
assert isinstance(layer_output, conf_helps.LayerOutput)
self.layer_output = layer_output
super(LayerOutputV2, self).__init__(
name=layer_output.name, parent_layers=dict())
class DataLayerV2(Layer): def to_proto_impl(self):
def __init__(self, name, type, **kwargs): return self.layer_output
assert isinstance(type, data_type.InputType)
self.type = type
self.__method_name__ = 'data_layer'
self.__kwargs__ = kwargs
super(DataLayerV2, self).__init__(name=name, parent_layers=dict()) class StaticInputV2(object):
def __init__(self, input, is_seq=False, size=None):
def to_proto_impl(self, **kwargs): assert isinstance(input, LayerV2)
args = dict() self.name = input.name
args['size'] = self.type.dim self.input = input
for each in kwargs: self.is_seq = is_seq
args[each] = kwargs[each] self.size = size
for each in self.__kwargs__: # TODO(add size check)
args[each] = self.__kwargs__[each] # assert input.size is not None or size is not None
return getattr(conf_helps, self.__method_name__)(name=self.name, **args)
class MixedLayerV2(Layer): class MixedLayerV2(Layer):
...@@ -232,7 +281,6 @@ class MixedLayerV2(Layer): ...@@ -232,7 +281,6 @@ class MixedLayerV2(Layer):
other_kwargs['act'] = act other_kwargs['act'] = act
other_kwargs['bias_attr'] = bias_attr other_kwargs['bias_attr'] = bias_attr
other_kwargs['layer_attr'] = layer_attr other_kwargs['layer_attr'] = layer_attr
parent_layers = {"input": self.__inputs__} parent_layers = {"input": self.__inputs__}
super(MixedLayerV2, self).__init__(name, parent_layers) super(MixedLayerV2, self).__init__(name, parent_layers)
self.__other_kwargs__ = other_kwargs self.__other_kwargs__ = other_kwargs
...@@ -242,7 +290,7 @@ class MixedLayerV2(Layer): ...@@ -242,7 +290,7 @@ class MixedLayerV2(Layer):
self.__inputs__.append(other) self.__inputs__.append(other)
return self return self
else: else:
raise MixedLayerTypeV2.AddToSealedMixedLayerExceptionV2() raise MixedLayerV2.AddToSealedMixedLayerExceptionV2()
def __enter__(self): def __enter__(self):
assert len(self.__inputs__) == 0 assert len(self.__inputs__) == 0
...@@ -257,6 +305,13 @@ class MixedLayerV2(Layer): ...@@ -257,6 +305,13 @@ class MixedLayerV2(Layer):
args[each] = kwargs[each] args[each] = kwargs[each]
for each in self.__other_kwargs__: for each in self.__other_kwargs__:
args[each] = self.__other_kwargs__[each] args[each] = self.__other_kwargs__[each]
size = args.get('size', None)
if size is not None:
if callable(size):
real_size = size()
else:
real_size = size
args['size'] = real_size
return getattr(conf_helps, self.__method_name__)(**args) return getattr(conf_helps, self.__method_name__)(**args)
...@@ -273,14 +328,52 @@ def mixed(size=0, ...@@ -273,14 +328,52 @@ def mixed(size=0,
return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) return MixedLayerV2(size, input, name, act, bias_attr, layer_attr)
class RecurrentLayerInput(WithExtraParent):
def __init__(self, recurrent_name, index, parent_layers):
assert len(parent_layers) == 1
self.__parents__ = parent_layers.values()[0]
super(RecurrentLayerInput, self).__init__(
name=self.__parents__[index].name, parent_layers=parent_layers)
self.__recurrent_name__ = recurrent_name
def context_name(self):
return self.__recurrent_name__ + ".begin"
def to_proto_impl(self, context, **kwargs):
model_type('recurrent_nn')
RecurrentLayerGroupWithoutOutLinksBegin(
name=self.__recurrent_name__,
in_links=map(lambda x: x.name, self.__parents__))
return self
class RecurrentLayerOutput(Layer):
def __init__(self, recurrent_name, index, parent_layers):
assert len(parent_layers) == 1
self.__parents__ = parent_layers.values()[0]
super(RecurrentLayerOutput, self).__init__(
name=self.__parents__[index].name, parent_layers=parent_layers)
self.__recurrent_name__ = recurrent_name
def context_name(self):
return self.__recurrent_name__ + ".end"
def to_proto_impl(self, **kwargs):
for l in self.__parents__:
RecurrentLayerGroupSetOutLink(l.name)
RecurrentLayerGroupEnd(name=self.__recurrent_name__)
LayerV2 = Layer LayerV2 = Layer
data = DataLayerV2 data = DataLayerV2
data.__name__ = 'data'
AggregateLevel = conf_helps.layers.AggregateLevel AggregateLevel = conf_helps.layers.AggregateLevel
ExpandLevel = conf_helps.layers.ExpandLevel ExpandLevel = conf_helps.layers.ExpandLevel
memory = MemoryV2
def __layer_name_mapping__(inname): def __layer_name_mapping__(inname):
if inname in ['data_layer', 'memory', 'mixed_layer']: if inname in ['data_layer', 'memory', 'mixed_layer', 'recurrent_group']:
# Do Not handle these layers # Do Not handle these layers
return return
elif inname == 'maxid_layer': elif inname == 'maxid_layer':
...@@ -302,8 +395,10 @@ def __layer_name_mapping__(inname): ...@@ -302,8 +395,10 @@ def __layer_name_mapping__(inname):
def __layer_name_mapping_parent_names__(inname): def __layer_name_mapping_parent_names__(inname):
all_args = getattr(conf_helps, inname).argspec.args all_args = getattr(conf_helps, inname).argspec.args
return filter( return filter(
lambda x: x in ['input1', 'input2','label', 'input', 'a', 'b', 'expand_as', lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b',
'weights', 'vectors', 'weight', 'score', 'left', 'right'], 'expand_as',
'weights', 'vectors', 'weight', 'score', 'left',
'right', 'output_mem'],
all_args) all_args)
...@@ -311,6 +406,7 @@ def __convert_layer__(_new_name_, _old_name_, _parent_names_): ...@@ -311,6 +406,7 @@ def __convert_layer__(_new_name_, _old_name_, _parent_names_):
global __all__ global __all__
__all__.append(_new_name_) __all__.append(_new_name_)
globals()[new_name] = __convert_to_v2__(_old_name_, _parent_names_) globals()[new_name] = __convert_to_v2__(_old_name_, _parent_names_)
globals()[new_name].__name__ = new_name
for each_layer_name in dir(conf_helps): for each_layer_name in dir(conf_helps):
...@@ -324,10 +420,71 @@ del parent_names ...@@ -324,10 +420,71 @@ del parent_names
del new_name del new_name
del each_layer_name del each_layer_name
@wrap_name_default()
def recurrent_group(step, input, name=None):
if not isinstance(input, collections.Sequence):
input = [input]
non_static_inputs = filter(lambda x: not isinstance(x, StaticInputV2),
input)
actual_input = [
RecurrentLayerInput(
recurrent_name=name,
index=i,
parent_layers={'recurrent_inputs': non_static_inputs})
for i in xrange(len(non_static_inputs))
]
def __real_step__(*args):
rnn_input = list(args)
static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input)
for static_input in static_inputs:
mem_name = "__%s_memory__" % static_input.input.name
mem = memory(
name=mem_name,
is_seq=static_input.is_seq,
size=static_input.input.calculate_size,
boot_layer=static_input.input)
with mixed(
name=mem_name,
size=static_input.input.calculate_size,
act=activation.Identity()) as mix:
mix += identity_projection(input=mem)
rnn_input.insert(input.index(static_input), mix)
return step(*rnn_input)
actual_output = __real_step__(*actual_input)
if not isinstance(actual_output, collections.Sequence):
actual_output = [actual_output]
retv = [
RecurrentLayerOutput(
recurrent_name=name,
index=i,
parent_layers={'recurrent_outputs': actual_output})
for i in xrange(len(actual_output))
]
if len(retv) == 1:
return retv[0]
else:
return retv
__projection_names__ = filter(lambda x: x.endswith('_projection'),
dir(conf_helps))
__all__ += __projection_names__
__operator_names__ = filter(lambda x: x.endswith('_operator'), dir(conf_helps))
__all__ += __operator_names__
# convert projection # convert projection
for prj in __projection_names__: for prj in __projection_names__:
globals()[prj] = __convert_to_v2__( globals()[prj] = __convert_to_v2__(
prj, parent_names=['input'], is_default_name=False) prj, parent_names=['input'], is_default_name=False)
globals()[prj].__name__ = prj
# convert operator # convert operator
operator_list = [ operator_list = [
...@@ -338,3 +495,4 @@ operator_list = [ ...@@ -338,3 +495,4 @@ operator_list = [
for op in operator_list: for op in operator_list:
globals()[op[0]] = __convert_to_v2__( globals()[op[0]] = __convert_to_v2__(
op[0], parent_names=op[1], is_default_name=False) op[0], parent_names=op[1], is_default_name=False)
globals()[op[0]].__name__ = op[0]
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ['batch']
def batch(reader, batch_size):
"""
Create a batched reader.
:param reader: the data reader to read from.
:type reader: callable
:param batch_size: size of each mini-batch
:type batch_size: int
:return: the batched reader.
:rtype: callable
"""
def batch_reader():
r = reader()
b = []
for instance in r:
b.append(instance)
if len(b) == batch_size:
yield b
b = []
if b:
yield b
return batch_reader
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.trainer_config_helpers.networks as conf_nw
import inspect
from config_base import __convert_to_v2__
__all__ = []
def __initialize__():
for each_subnetwork in conf_nw.__all__:
if each_subnetwork in ['inputs', 'outputs']:
continue
func = getattr(conf_nw, each_subnetwork)
if hasattr(func, 'argspec'):
argspec = func.argspec
else:
argspec = inspect.getargspec(func)
if each_subnetwork == 'simple_attention':
parents = ['encoded_sequence', 'encoded_proj', 'decoder_state']
else:
parents = filter(lambda x: x.startswith('input'), argspec.args)
assert len(parents) != 0, each_subnetwork
v2_subnet = __convert_to_v2__(
each_subnetwork,
parent_names=parents,
is_default_name='name' in argspec.args)
globals()[each_subnetwork] = v2_subnet
globals()[each_subnetwork].__name__ = each_subnetwork
global __all__
__all__.append(each_subnetwork)
__initialize__()
import py_paddle.swig_paddle as swig_api import py_paddle.swig_paddle as swig_api
import paddle.trainer_config_helpers.optimizers as v1_optimizers
import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils
import paddle.v2 import paddle.trainer_config_helpers.optimizers as v1_optimizers
"""
Optimizers(update equation) for SGD method.
TODO(yuyang18): Complete comments.
"""
__all__ = [ __all__ = [
'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta', 'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta',
...@@ -44,7 +49,7 @@ class Optimizer(object): ...@@ -44,7 +49,7 @@ class Optimizer(object):
class Momentum(Optimizer): class Momentum(Optimizer):
def __init__(self, momentum=None, sparse=False, **kwargs): def __init__(self, momentum=None, sparse=False, **kwargs):
learning_method = v1_optimizers.MomentumOptimizer( learning_method = v1_optimizers.MomentumOptimizer(
momentum=None, sparse=False) momentum=momentum, sparse=sparse)
super(Momentum, self).__init__( super(Momentum, self).__init__(
learning_method=learning_method, **kwargs) learning_method=learning_method, **kwargs)
......
import numpy as np import numpy as np
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
from paddle.proto.ParameterConfig_pb2 import ParameterConfig from paddle.proto.ParameterConfig_pb2 import ParameterConfig
import struct
import tarfile
import cStringIO
from topology import Topology from topology import Topology
__all__ = ['Parameters', 'create'] __all__ = ['Parameters', 'create']
...@@ -10,6 +12,7 @@ __all__ = ['Parameters', 'create'] ...@@ -10,6 +12,7 @@ __all__ = ['Parameters', 'create']
def create(layers): def create(layers):
""" """
Create parameter pool by topology. Create parameter pool by topology.
:param layers: :param layers:
:return: :return:
""" """
...@@ -67,6 +70,7 @@ class Parameters(object): ...@@ -67,6 +70,7 @@ class Parameters(object):
def keys(self): def keys(self):
""" """
keys are the names of each parameter. keys are the names of each parameter.
:return: list of parameter name :return: list of parameter name
:rtype: list :rtype: list
""" """
...@@ -75,6 +79,7 @@ class Parameters(object): ...@@ -75,6 +79,7 @@ class Parameters(object):
def names(self): def names(self):
""" """
names of each parameter. names of each parameter.
:return: list of parameter name :return: list of parameter name
:rtype: list :rtype: list
""" """
...@@ -83,6 +88,7 @@ class Parameters(object): ...@@ -83,6 +88,7 @@ class Parameters(object):
def has_key(self, key): def has_key(self, key):
""" """
has_key return true if there are such parameter name == key has_key return true if there are such parameter name == key
:param key: Parameter name :param key: Parameter name
:type key: basestring :type key: basestring
:return: True if contains such key :return: True if contains such key
...@@ -118,6 +124,12 @@ class Parameters(object): ...@@ -118,6 +124,12 @@ class Parameters(object):
if len(self.__gradient_machines__) == 0: if len(self.__gradient_machines__) == 0:
# create new parameter in python numpy. # create new parameter in python numpy.
if len(self.__tmp_params__) != 0:
ret_list = [
mat for name, mat in self.__tmp_params__ if name == key
]
if len(ret_list) == 1:
return ret_list[0]
return np.ndarray(shape=shape, dtype=np.float32) return np.ndarray(shape=shape, dtype=np.float32)
else: else:
for each_gradient_machine in self.__gradient_machines__: for each_gradient_machine in self.__gradient_machines__:
...@@ -136,6 +148,7 @@ class Parameters(object): ...@@ -136,6 +148,7 @@ class Parameters(object):
def get_shape(self, key): def get_shape(self, key):
""" """
get shape of the parameter. get shape of the parameter.
:param key: parameter name :param key: parameter name
:type key: basestring :type key: basestring
:return: parameter's shape :return: parameter's shape
...@@ -190,6 +203,7 @@ class Parameters(object): ...@@ -190,6 +203,7 @@ class Parameters(object):
def set(self, parameter_name, value): def set(self, parameter_name, value):
""" """
Set parameter by parameter name & matrix. Set parameter by parameter name & matrix.
:param parameter_name: parameter name :param parameter_name: parameter name
:type parameter_name: basestring :type parameter_name: basestring
:param value: parameter matrix :param value: parameter matrix
...@@ -222,6 +236,67 @@ class Parameters(object): ...@@ -222,6 +236,67 @@ class Parameters(object):
self.__gradient_machines__.append(gradient_machine) self.__gradient_machines__.append(gradient_machine)
def serialize(self, name, f):
"""
:param name:
:param f:
:type f: file
:return:
"""
param = self.get(name)
size = reduce(lambda a, b: a * b, param.shape)
f.write(struct.pack("IIQ", 0, 4, size))
param = param.astype(np.float32)
f.write(param.tobytes())
def deserialize(self, name, f):
"""
:param name:
:param f:
:type f: file
:return:
"""
f.read(16) # header
arr = np.frombuffer(f.read(), dtype=np.float32)
self.set(name, arr.reshape(self.get_shape(name)))
def to_tar(self, f):
tar = tarfile.TarFile(fileobj=f, mode='w')
for nm in self.names():
buf = cStringIO.StringIO()
self.serialize(nm, buf)
tarinfo = tarfile.TarInfo(name=nm)
buf.seek(0)
tarinfo.size = len(buf.getvalue())
tar.addfile(tarinfo, buf)
conf = self.__param_conf__[nm]
confStr = conf.SerializeToString()
tarinfo = tarfile.TarInfo(name="%s.protobuf" % nm)
tarinfo.size = len(confStr)
buf = cStringIO.StringIO(confStr)
buf.seek(0)
tar.addfile(tarinfo, fileobj=buf)
@staticmethod
def from_tar(f):
params = Parameters()
tar = tarfile.TarFile(fileobj=f, mode='r')
for finfo in tar:
assert isinstance(finfo, tarfile.TarInfo)
if finfo.name.endswith('.protobuf'):
f = tar.extractfile(finfo)
conf = ParameterConfig()
conf.ParseFromString(f.read())
params.__append_config__(conf)
for param_name in params.names():
f = tar.extractfile(param_name)
params.deserialize(param_name, f)
return params
def __get_parameter_in_gradient_machine__(gradient_machine, name): def __get_parameter_in_gradient_machine__(gradient_machine, name):
""" """
......
...@@ -12,13 +12,15 @@ ...@@ -12,13 +12,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle.trainer_config_helpers.poolings import * import paddle.trainer_config_helpers.poolings
import copy
__all__ = ["Max", "CudnnMax", "Avg", "CudnnAvg", "Sum", "SquareRootN"] __all__ = []
suffix = 'Pooling'
Max = MaxPooling for name in paddle.trainer_config_helpers.poolings.__all__:
CudnnMax = CudnnMaxPooling new_name = name[:-len(suffix)]
Avg = AvgPooling globals()[new_name] = copy.copy(
CudnnAvg = CudnnAvgPooling getattr(paddle.trainer_config_helpers.poolings, name))
Sum = SumPooling globals()[new_name].__name__ = new_name
SquareRootN = SquareRootNPooling __all__.append(new_name)
...@@ -11,15 +11,64 @@ ...@@ -11,15 +11,64 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
At training and testing time, PaddlePaddle programs need to read data. To ease
the users' work to write data reading code, we define that
# It would be too lengthy to require our users to prefix decorators with `decorator`. - A *reader* is a function that reads data (from file, network, random number
# For example, we want the following line generator, etc) and yields data items.
# - A *reader creator* is a function that returns a reader function.
# r = paddle.reader.decorator.bufferd(paddle.reader.creator.text("hello.txt")) - A *reader decorator* is a function, which accepts one or more readers, and
# returns a reader.
# to be a shorter version: - A *batch reader* is a function that reads data (from *reader*, file, network,
# random number generator, etc) and yields a batch of data items.
# r = paddle.reader.buffered(paddle.reader.creator.text("hello.txt"))
#####################
Data Reader Interface
#####################
Indeed, *data reader* doesn't have to be a function that reads and yields data
items. It can be any function with no parameter that creates a iterable
(anything can be used in :code:`for x in iterable`)\:
.. code-block:: python
iterable = data_reader()
Element produced from the iterable should be a **single** entry of data,
**not** a mini batch. That entry of data could be a single item, or a tuple of
items.
Item should be of `supported type <http://www.paddlepaddle.org/doc/ui/data_provider
/pydataprovider2.html?highlight=dense_vector#input-types>`_ (e.g., numpy 1d
array of float32, int, list of int)
An example implementation for single item data reader creator:
.. code-block:: python
def reader_creator_random_image(width, height):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height)
return reader
An example implementation for multiple item data reader creator:
.. code-block:: python
def reader_creator_random_image_and_label(width, height, label):
def reader():
while True:
yield numpy.random.uniform(-1, 1, size=width*height), label
return reader
TODO(yuyang18): Should we add whole design doc here?
"""
import decorator
from decorator import * from decorator import *
import creator import creator
__all__ = decorator.__all__ + ['creator']
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
Creator package contains some simple reader creator, which could be used in user
program.
"""
__all__ = ['np_array', 'text_file'] __all__ = ['np_array', 'text_file']
...@@ -38,7 +42,7 @@ def np_array(x): ...@@ -38,7 +42,7 @@ def np_array(x):
def text_file(path): def text_file(path):
""" """
Creates a data reader that outputs text line by line from given text file. Creates a data reader that outputs text line by line from given text file.
Trailing new line ('\n') of each line will be removed. Trailing new line ('\\\\n') of each line will be removed.
:path: path of the text file. :path: path of the text file.
:returns: data reader of text file :returns: data reader of text file
......
...@@ -14,13 +14,13 @@ ...@@ -14,13 +14,13 @@
__all__ = [ __all__ = [
'map_readers', 'buffered', 'compose', 'chain', 'shuffle', 'map_readers', 'buffered', 'compose', 'chain', 'shuffle',
'ComposeNotAligned', 'batched' 'ComposeNotAligned', 'firstn'
] ]
from Queue import Queue
from threading import Thread
import itertools import itertools
import random import random
from Queue import Queue
from threading import Thread
def map_readers(func, *readers): def map_readers(func, *readers):
...@@ -28,9 +28,11 @@ def map_readers(func, *readers): ...@@ -28,9 +28,11 @@ def map_readers(func, *readers):
Creates a data reader that outputs return value of function using Creates a data reader that outputs return value of function using
output of each data readers as arguments. output of each data readers as arguments.
:param func: function to use. :param func: function to use. The type of func should be (Sample) => Sample
:param *readers: readers whose outputs will be used as arguments of func. :type: callable
:returns: the created data reader. :param readers: readers whose outputs will be used as arguments of func.
:return: the created data reader.
:rtype: callable
""" """
def reader(): def reader():
...@@ -45,16 +47,19 @@ def map_readers(func, *readers): ...@@ -45,16 +47,19 @@ def map_readers(func, *readers):
def shuffle(reader, buf_size): def shuffle(reader, buf_size):
""" """
Creates a data reader whose data output is suffled. Creates a data reader whose data output is shuffled.
Output from the iterator that created by original reader will be Output from the iterator that created by original reader will be
buffered into shuffle buffer, and then shuffled. The size of shuffle buffer buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
is determined by argument buf_size. is determined by argument buf_size.
:param reader: the original reader whose output will be shuffled. :param reader: the original reader whose output will be shuffled.
:type reader: callable
:param buf_size: shuffle buffer size. :param buf_size: shuffle buffer size.
:type buf_size: int
:returns:the new reader whose output is shuffled. :return: the new reader whose output is shuffled.
:rtype: callable
""" """
def data_reader(): def data_reader():
...@@ -88,7 +93,8 @@ def chain(*readers): ...@@ -88,7 +93,8 @@ def chain(*readers):
[0, 0, 0, 1, 1, 1, 2, 2, 2] [0, 0, 0, 1, 1, 1, 2, 2, 2]
:param readers: input readers. :param readers: input readers.
:returns: the new data reader. :return: the new data reader.
:rtype: callable
""" """
def reader(): def reader():
...@@ -115,12 +121,13 @@ def compose(*readers, **kwargs): ...@@ -115,12 +121,13 @@ def compose(*readers, **kwargs):
The composed reader will output: The composed reader will output:
(1, 2, 3, 4, 5) (1, 2, 3, 4, 5)
:*readers: readers that will be composed together. :param readers: readers that will be composed together.
:check_alignment: if True, will check if input readers are aligned :param check_alignment: if True, will check if input readers are aligned
correctly. If False, will not check alignment and trailing outputs correctly. If False, will not check alignment and trailing outputs
will be discarded. Defaults to True. will be discarded. Defaults to True.
:type check_alignment: bool
:returns: the new data reader. :return: the new data reader.
:raises ComposeNotAligned: outputs of readers are not aligned. :raises ComposeNotAligned: outputs of readers are not aligned.
Will not raise when check_alignment is set to False. Will not raise when check_alignment is set to False.
...@@ -161,7 +168,9 @@ def buffered(reader, size): ...@@ -161,7 +168,9 @@ def buffered(reader, size):
as the buffer is not empty. as the buffer is not empty.
:param reader: the data reader to read from. :param reader: the data reader to read from.
:type reader: callable
:param size: max buffer size. :param size: max buffer size.
:type size: int
:returns: the buffered data reader. :returns: the buffered data reader.
""" """
...@@ -193,23 +202,25 @@ def buffered(reader, size): ...@@ -193,23 +202,25 @@ def buffered(reader, size):
return data_reader return data_reader
def batched(reader, batch_size): def firstn(reader, n):
""" """
Create a batched reader. Limit the max number of samples that reader could return.
:param reader: the data reader to read from. :param reader: the data reader to read from.
:param batch_size: batch_size :type reader: callable
:return: the batched reader. :param n: the max number of samples that return.
:type n: int
:return: the decorated reader.
:rtype: callable
""" """
def batched_reader(): # TODO(yuyang18): Check if just drop the reader, could clean the opened
r = reader() # resource or not?
batch = []
for instance in r: def firstn_reader():
batch.append(instance) for i, item in enumerate(reader()):
if len(batch) == batch_size: if i == n:
yield batch break
batch = [] yield item
if batch:
yield batch return firstn_reader
return batched_reader
add_test(NAME test_v2_api
COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE})
add_test(NAME test_v2_layer add_test(NAME test_v2_layer
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
add_test(NAME test_v2_api add_test(NAME test_v2_rnn_layer
COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE}) COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_rnn_layer.py)
add_test(NAME topology_test add_test(NAME test_topology
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_topology.py ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_topology.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
...@@ -22,7 +22,7 @@ cd $SCRIPTPATH ...@@ -22,7 +22,7 @@ cd $SCRIPTPATH
$1 -m pip install ../../../../paddle/dist/*.whl $1 -m pip install ../../../../paddle/dist/*.whl
test_list="test_data_feeder.py" test_list="test_data_feeder.py test_parameters.py"
export PYTHONPATH=$PWD/../../../../python/ export PYTHONPATH=$PWD/../../../../python/
......
...@@ -110,14 +110,14 @@ class DataFeederTest(unittest.TestCase): ...@@ -110,14 +110,14 @@ class DataFeederTest(unittest.TestCase):
self.assertAlmostEqual(value.all(), w[i].all()) self.assertAlmostEqual(value.all(), w[i].all())
def test_integer(self): def test_integer(self):
dim = 100 value_range = 100
batch_size = 32 batch_size = 32
index = [] index = []
for i in xrange(batch_size): for i in xrange(batch_size):
each_sample = [] each_sample = []
each_sample.append(np.random.randint(dim)) each_sample.append(np.random.randint(value_range))
index.append(each_sample) index.append(each_sample)
feeder = DataFeeder([('input', data_type.integer_value(dim))], feeder = DataFeeder([('input', data_type.integer_value(value_range))],
{'input': 0}) {'input': 0})
arg = feeder(index) arg = feeder(index)
output = arg.getSlotIds(0).copyToNumpyArray() output = arg.getSlotIds(0).copyToNumpyArray()
...@@ -125,7 +125,7 @@ class DataFeederTest(unittest.TestCase): ...@@ -125,7 +125,7 @@ class DataFeederTest(unittest.TestCase):
self.assertEqual(output.all(), index.flatten().all()) self.assertEqual(output.all(), index.flatten().all())
def test_integer_sequence(self): def test_integer_sequence(self):
dim = 10000 value_range = 10000
batch_size = 32 batch_size = 32
start = [0] start = [0]
data = [] data = []
...@@ -133,11 +133,12 @@ class DataFeederTest(unittest.TestCase): ...@@ -133,11 +133,12 @@ class DataFeederTest(unittest.TestCase):
each_sample = [] each_sample = []
each_sample.append( each_sample.append(
self.sparse_binary_reader( self.sparse_binary_reader(
dim, 30, non_empty=True)) value_range, 30, non_empty=True))
data.append(each_sample) data.append(each_sample)
start.append(len(each_sample[0]) + start[-1]) start.append(len(each_sample[0]) + start[-1])
feeder = DataFeeder([('input', data_type.integer_value_sequence(dim))], feeder = DataFeeder(
{'input': 0}) [('input', data_type.integer_value_sequence(value_range))],
{'input': 0})
arg = feeder(data) arg = feeder(data)
output_data = arg.getSlotIds(0).copyToNumpyArray() output_data = arg.getSlotIds(0).copyToNumpyArray()
output_start = arg.getSlotSequenceStartPositions(0).copyToNumpyArray() output_start = arg.getSlotSequenceStartPositions(0).copyToNumpyArray()
......
...@@ -18,6 +18,7 @@ import paddle.v2.attr as attr ...@@ -18,6 +18,7 @@ import paddle.v2.attr as attr
import paddle.v2.data_type as data_type import paddle.v2.data_type as data_type
import paddle.v2.layer as layer import paddle.v2.layer as layer
import paddle.v2.pooling as pooling import paddle.v2.pooling as pooling
import paddle.v2.networks as networks
pixel = layer.data(name='pixel', type=data_type.dense_vector(128)) pixel = layer.data(name='pixel', type=data_type.dense_vector(128))
label = layer.data(name='label', type=data_type.integer_value(10)) label = layer.data(name='label', type=data_type.integer_value(10))
...@@ -251,5 +252,13 @@ class ProjOpTest(unittest.TestCase): ...@@ -251,5 +252,13 @@ class ProjOpTest(unittest.TestCase):
print layer.parse_network(conv1) print layer.parse_network(conv1)
class NetworkTests(unittest.TestCase):
def test_vgg(self):
img = layer.data(name='pixel', type=data_type.dense_vector(784))
vgg_out = networks.small_vgg(
input_image=img, num_channels=1, num_classes=2)
print layer.parse_network(vgg_out)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
import unittest
import sys
try:
import py_paddle
del py_paddle
except ImportError:
print >> sys.stderr, "It seems swig of Paddle is not installed, this " \
"unittest will not be run."
sys.exit(0)
import paddle.v2.parameters as parameters
from paddle.proto.ParameterConfig_pb2 import ParameterConfig
import random
import cStringIO
import numpy
def __rand_param_config__(name):
conf = ParameterConfig()
conf.name = name
size = 1
for i in xrange(2):
dim = random.randint(1, 1000)
conf.dims.append(dim)
size *= dim
conf.size = size
assert conf.IsInitialized()
return conf
class TestParameters(unittest.TestCase):
def test_serialization(self):
params = parameters.Parameters()
params.__append_config__(__rand_param_config__("param_0"))
params.__append_config__(__rand_param_config__("param_1"))
for name in params.names():
param = params.get(name)
param[:] = numpy.random.uniform(
-1.0, 1.0, size=params.get_shape(name))
params.set(name, param)
tmp_file = cStringIO.StringIO()
params.to_tar(tmp_file)
tmp_file.seek(0)
params_dup = parameters.Parameters.from_tar(tmp_file)
self.assertEqual(params_dup.names(), params.names())
for name in params.names():
self.assertEqual(params.get_shape(name), params_dup.get_shape(name))
p0 = params.get(name)
p1 = params_dup.get(name)
self.assertTrue(numpy.isclose(p0, p1).all())
if __name__ == '__main__':
unittest.main()
# Copyright PaddlePaddle contributors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import difflib
import unittest
import paddle.trainer_config_helpers as conf_helps
import paddle.v2.activation as activation
import paddle.v2.data_type as data_type
import paddle.v2.layer as layer
from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as parse_network
class RNNTest(unittest.TestCase):
def test_simple_rnn(self):
dict_dim = 10
word_dim = 8
hidden_dim = 8
def parse_old_rnn():
def step(y):
mem = conf_helps.memory(name="rnn_state", size=hidden_dim)
out = conf_helps.fc_layer(
input=[y, mem],
size=hidden_dim,
act=activation.Tanh(),
bias_attr=True,
name="rnn_state")
return out
def test():
data = conf_helps.data_layer(name="word", size=dict_dim)
embd = conf_helps.embedding_layer(input=data, size=word_dim)
conf_helps.recurrent_group(name="rnn", step=step, input=embd)
return str(parse_network(test))
def parse_new_rnn():
def new_step(y):
mem = layer.memory(name="rnn_state", size=hidden_dim)
out = layer.fc(input=[y, mem],
size=hidden_dim,
act=activation.Tanh(),
bias_attr=True,
name="rnn_state")
return out
data = layer.data(
name="word", type=data_type.integer_value(dict_dim))
embd = layer.embedding(input=data, size=word_dim)
rnn_layer = layer.recurrent_group(
name="rnn", step=new_step, input=embd)
return str(layer.parse_network(rnn_layer))
diff = difflib.unified_diff(parse_old_rnn().splitlines(1),
parse_new_rnn().splitlines(1))
print ''.join(diff)
def test_sequence_rnn_multi_input(self):
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
def parse_old_rnn():
def test():
data = conf_helps.data_layer(name="word", size=dict_dim)
label = conf_helps.data_layer(name="label", size=label_dim)
emb = conf_helps.embedding_layer(input=data, size=word_dim)
boot_layer = conf_helps.data_layer(name="boot", size=10)
boot_layer = conf_helps.fc_layer(
name='boot_fc', input=boot_layer, size=10)
def step(y, wid):
z = conf_helps.embedding_layer(input=wid, size=word_dim)
mem = conf_helps.memory(
name="rnn_state",
size=hidden_dim,
boot_layer=boot_layer)
out = conf_helps.fc_layer(
input=[y, z, mem],
size=hidden_dim,
act=conf_helps.TanhActivation(),
bias_attr=True,
name="rnn_state")
return out
out = conf_helps.recurrent_group(
name="rnn", step=step, input=[emb, data])
rep = conf_helps.last_seq(input=out)
prob = conf_helps.fc_layer(
size=label_dim,
input=rep,
act=conf_helps.SoftmaxActivation(),
bias_attr=True)
conf_helps.outputs(
conf_helps.classification_cost(
input=prob, label=label))
return str(parse_network(test))
def parse_new_rnn():
data = layer.data(
name="word", type=data_type.dense_vector(dict_dim))
label = layer.data(
name="label", type=data_type.dense_vector(label_dim))
emb = layer.embedding(input=data, size=word_dim)
boot_layer = layer.data(
name="boot", type=data_type.dense_vector(10))
boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10)
def step(y, wid):
z = layer.embedding(input=wid, size=word_dim)
mem = layer.memory(
name="rnn_state", size=hidden_dim, boot_layer=boot_layer)
out = layer.fc(input=[y, z, mem],
size=hidden_dim,
act=activation.Tanh(),
bias_attr=True,
name="rnn_state")
return out
out = layer.recurrent_group(
name="rnn", step=step, input=[emb, data])
rep = layer.last_seq(input=out)
prob = layer.fc(size=label_dim,
input=rep,
act=activation.Softmax(),
bias_attr=True)
cost = layer.classification_cost(input=prob, label=label)
return str(layer.parse_network(cost))
diff = difflib.unified_diff(parse_old_rnn().splitlines(1),
parse_new_rnn().splitlines(1))
print ''.join(diff)
if __name__ == '__main__':
unittest.main()
...@@ -16,6 +16,7 @@ import paddle.v2.layer as layer ...@@ -16,6 +16,7 @@ import paddle.v2.layer as layer
import paddle.v2.topology as topology import paddle.v2.topology as topology
import paddle.v2.data_type as data_type import paddle.v2.data_type as data_type
import paddle.trainer_config_helpers as conf_helps import paddle.trainer_config_helpers as conf_helps
import paddle.trainer.PyDataProvider2 as pydp2
class TestTopology(unittest.TestCase): class TestTopology(unittest.TestCase):
...@@ -35,13 +36,13 @@ class TestTopology(unittest.TestCase): ...@@ -35,13 +36,13 @@ class TestTopology(unittest.TestCase):
pixel_data_type = filter(lambda type: type[0] == "pixel", data_types) pixel_data_type = filter(lambda type: type[0] == "pixel", data_types)
self.assertEqual(len(pixel_data_type), 1) self.assertEqual(len(pixel_data_type), 1)
pixel_data_type = pixel_data_type[0] pixel_data_type = pixel_data_type[0]
self.assertEqual(pixel_data_type[1].type, data_type.DataType.Dense) self.assertEqual(pixel_data_type[1].type, pydp2.DataType.Dense)
self.assertEqual(pixel_data_type[1].dim, 784) self.assertEqual(pixel_data_type[1].dim, 784)
label_data_type = filter(lambda type: type[0] == "label", data_types) label_data_type = filter(lambda type: type[0] == "label", data_types)
self.assertEqual(len(label_data_type), 1) self.assertEqual(len(label_data_type), 1)
label_data_type = label_data_type[0] label_data_type = label_data_type[0]
self.assertEqual(label_data_type[1].type, data_type.DataType.Index) self.assertEqual(label_data_type[1].type, pydp2.DataType.Index)
self.assertEqual(label_data_type[1].dim, 10) self.assertEqual(label_data_type[1].dim, 10)
def test_get_layer(self): def test_get_layer(self):
......
...@@ -17,16 +17,34 @@ import collections ...@@ -17,16 +17,34 @@ import collections
from paddle.proto.ModelConfig_pb2 import ModelConfig from paddle.proto.ModelConfig_pb2 import ModelConfig
import layer as v2_layer import layer as v2_layer
from layer import WithExtraParent
__all__ = ['Topology'] __all__ = ['Topology']
def __flatten__(lis):
"""
Given a list, possibly nested to any level, return it flattened.
"""
new_lis = []
for item in lis:
if isinstance(item, collections.Sequence):
new_lis.extend(__flatten__(item))
else:
new_lis.append(item)
return new_lis
def __bfs_travel__(callback, *layers): def __bfs_travel__(callback, *layers):
layers = __flatten__(layers)
for each_layer in layers: for each_layer in layers:
__break__ = callback(each_layer) __break__ = callback(each_layer)
if __break__: if __break__:
return return
__bfs_travel__(callback, *each_layer.__parent_layers__.values()) __layers__ = each_layer.__parent_layers__.values()
if isinstance(each_layer, WithExtraParent):
__layers__ = __layers__ + each_layer.extra_parent()
__bfs_travel__(callback, *__layers__)
class Topology(object): class Topology(object):
......
...@@ -8,7 +8,11 @@ from . import event as v2_event ...@@ -8,7 +8,11 @@ from . import event as v2_event
from . import optimizer as v2_optimizer from . import optimizer as v2_optimizer
from . import parameters as v2_parameters from . import parameters as v2_parameters
__all__ = ['ITrainer', 'SGD'] __all__ = ['SGD']
"""
Trainer package
TODO(yuyang18): Complete comments.
"""
def default_event_handler(event): def default_event_handler(event):
...@@ -22,33 +26,20 @@ def default_event_handler(event): ...@@ -22,33 +26,20 @@ def default_event_handler(event):
pass pass
class ITrainer(object): class SGD(object):
""" """
The interface of Trainer. The only exposed method is `train`. Simple SGD Trainer.
TODO(yuyang18): Complete comments
:param update_equation: The optimizer object.
:type update_equation: paddle.v2.optimizer.Optimizer
:param cost: Target cost that neural network should be optimized.
:type cost: paddle.v2.config_base.Layer
:param parameters: The parameters dictionary.
:type parameters: paddle.v2.parameters.Parameters
""" """
def train(self, reader, topology, parameters, event_handler=None):
"""
train method.
:param reader:
:param topology:
:param parameters:
:param event_handler:
:return:
"""
raise NotImplementedError()
class SGD(ITrainer):
def __init__(self, cost, parameters, update_equation): def __init__(self, cost, parameters, update_equation):
"""
Simple SGD Trainer.
:param update_equation: The optimizer object.
:type update_equation: v2_optimizer.Optimizer
"""
if not isinstance(parameters, v2_parameters.Parameters): if not isinstance(parameters, v2_parameters.Parameters):
raise TypeError('parameters should be parameters') raise TypeError('parameters should be parameters')
...@@ -66,29 +57,26 @@ class SGD(ITrainer): ...@@ -66,29 +57,26 @@ class SGD(ITrainer):
self.__topology_in_proto__, api.CREATE_MODE_NORMAL, self.__topology_in_proto__, api.CREATE_MODE_NORMAL,
self.__optimizer__.enable_types()) self.__optimizer__.enable_types())
assert isinstance(gm, api.GradientMachine) assert isinstance(gm, api.GradientMachine)
parameters.append_gradient_machine(gm)
self.__gradient_machine__ = gm self.__gradient_machine__ = gm
self.__gradient_machine__.randParameters() self.__gradient_machine__.randParameters()
parameters.append_gradient_machine(gm)
def train(self, reader, num_passes=1, event_handler=None, reader_dict=None): def train(self, reader, num_passes=1, event_handler=None, feeding=None):
""" """
Training method. Will train num_passes of input data. Training method. Will train num_passes of input data.
:param reader: :param reader:
:param topology: Network Topology, use one or more Layers to represent it.
:param parameters: The parameter pools.
:param num_passes: The total train passes. :param num_passes: The total train passes.
:param event_handler: Event handler. A method will be invoked when event :param event_handler: Event handler. A method will be invoked when event
occurred. occurred.
:type event_handler: (BaseEvent) => None :type event_handler: (BaseEvent) => None
:param feeding: Feeding is a map of neural network input name and array
index that reader returns.
:type feeding: dict
:return: :return:
""" """
if event_handler is None: if event_handler is None:
event_handler = default_event_handler event_handler = default_event_handler
if reader_dict is None:
reader_dict = self.default_reader_dict()
__check_train_args__(**locals()) __check_train_args__(**locals())
updater = self.__optimizer__.create_local_updater() updater = self.__optimizer__.create_local_updater()
...@@ -100,17 +88,12 @@ class SGD(ITrainer): ...@@ -100,17 +88,12 @@ class SGD(ITrainer):
pass_evaluator = self.__gradient_machine__.makeEvaluator() pass_evaluator = self.__gradient_machine__.makeEvaluator()
assert isinstance(pass_evaluator, api.Evaluator) assert isinstance(pass_evaluator, api.Evaluator)
out_args = api.Arguments.createArguments(0) out_args = api.Arguments.createArguments(0)
feeder = DataFeeder(self.__data_types__, feeding)
feeder = DataFeeder(self.__data_types__, reader_dict)
for pass_id in xrange(num_passes): for pass_id in xrange(num_passes):
event_handler(v2_event.BeginPass(pass_id)) event_handler(v2_event.BeginPass(pass_id))
pass_evaluator.start() pass_evaluator.start()
updater.startPass() updater.startPass()
for batch_id, data_batch in enumerate(reader()): for batch_id, data_batch in enumerate(reader()):
pass_type = updater.startBatch(len(data_batch))
self.__gradient_machine__.forwardBackward(
feeder(data_batch), out_args, pass_type)
batch_evaluator.start() batch_evaluator.start()
event_handler( event_handler(
v2_event.BeginIteration( v2_event.BeginIteration(
...@@ -120,12 +103,11 @@ class SGD(ITrainer): ...@@ -120,12 +103,11 @@ class SGD(ITrainer):
feeder(data_batch), out_args, pass_type) feeder(data_batch), out_args, pass_type)
self.__gradient_machine__.eval(pass_evaluator) self.__gradient_machine__.eval(pass_evaluator)
self.__gradient_machine__.eval(batch_evaluator) self.__gradient_machine__.eval(batch_evaluator)
for each_param in self.__gradient_machine__.getParameters(): for each_param in self.__gradient_machine__.getNonStaticParameters(
):
updater.update(each_param) updater.update(each_param)
# Get cost. We use numpy to calculate total cost for this batch. cost_sum = out_args.sum()
cost_vec = out_args.getSlotValue(0) cost = cost_sum / len(data_batch)
cost_vec = cost_vec.copyToNumpyMat()
cost = cost_vec.sum() / len(data_batch)
updater.finishBatch(cost) updater.finishBatch(cost)
batch_evaluator.finish() batch_evaluator.finish()
event_handler( event_handler(
...@@ -140,27 +122,23 @@ class SGD(ITrainer): ...@@ -140,27 +122,23 @@ class SGD(ITrainer):
event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator)) event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator))
self.__gradient_machine__.finish() self.__gradient_machine__.finish()
def default_reader_dict(self): def test(self, reader, feeding=None):
reader_dict = dict() feeder = DataFeeder(self.__data_types__, feeding)
for i, tp in enumerate(self.__data_types__):
reader_dict[tp[0]] = i
return reader_dict
def test(self, reader, reader_dict=None):
if reader_dict is None:
reader_dict = self.default_reader_dict()
feeder = DataFeeder(self.__data_types__, reader_dict)
evaluator = self.__gradient_machine__.makeEvaluator() evaluator = self.__gradient_machine__.makeEvaluator()
out_args = api.Arguments.createArguments(0) out_args = api.Arguments.createArguments(0)
evaluator.start() evaluator.start()
total_cost = 0
num_samples = 0.0
for data_batch in reader(): for data_batch in reader():
num_samples += len(data_batch)
self.__gradient_machine__.forward( self.__gradient_machine__.forward(
feeder(data_batch), out_args, api.PASS_TEST) feeder(data_batch), out_args, api.PASS_TEST)
total_cost += out_args.sum()
self.__gradient_machine__.eval(evaluator) self.__gradient_machine__.eval(evaluator)
evaluator.finish() evaluator.finish()
return v2_event.TestResult(evaluator=evaluator) return v2_event.TestResult(
evaluator=evaluator, cost=total_cost / num_samples)
def __check_train_args__(reader, event_handler, **kwargs): def __check_train_args__(reader, event_handler, **kwargs):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册