提交 0d5b4acb 编写于 作者: Y Yu Yang

Merge branch 'develop' of github.com:baidu/Paddle into feature/serialize_deserialize_in_parameters

...@@ -4,22 +4,14 @@ cache: ...@@ -4,22 +4,14 @@ cache:
- $HOME/third_party - $HOME/third_party
- $HOME/.ccache - $HOME/.ccache
- $HOME/.cache/pip - $HOME/.cache/pip
- $HOME/Library/Caches/Homebrew
sudo: required sudo: required
dist: trusty dist: trusty
os: os:
- linux - linux
- osx
env: env:
- JOB=DOCS - JOB=DOCS
- JOB=BUILD_AND_TEST - JOB=BUILD_AND_TEST
- JOB=PRE_COMMIT - JOB=PRE_COMMIT
matrix:
exclude:
- os: osx
env: JOB=DOCS # Only generate documentation in linux.
- os: osx
env: JOB=PRE_COMMIT # Only check pre-commit hook in linux
addons: addons:
apt: apt:
...@@ -53,7 +45,6 @@ before_install: ...@@ -53,7 +45,6 @@ before_install:
fi fi
fi fi
fi fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
# Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
# protobuf version. # protobuf version.
......
...@@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination ) ...@@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
${source} ${source}
${destination} ${destination}
COMMENT "Generating sphinx documentation: ${builder}" COMMENT "Generating sphinx documentation: ${builder}"
COMMAND ln -sf ${destination}/index_*.html ${destination}/index.html COMMAND cd ${destination} && ln -s ./index_*.html index.html
) )
set_property( set_property(
......
...@@ -110,14 +110,13 @@ endmacro() ...@@ -110,14 +110,13 @@ endmacro()
# Get the coverage data. # Get the coverage data.
file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda") file(GLOB_RECURSE GCDA_FILES "${COV_PATH}" "*.gcda")
message("GCDA files:") message("Process GCDA files:")
message("===============================")
# Get a list of all the object directories needed by gcov # Get a list of all the object directories needed by gcov
# (The directories the .gcda files and .o files are found in) # (The directories the .gcda files and .o files are found in)
# and run gcov on those. # and run gcov on those.
foreach(GCDA ${GCDA_FILES}) foreach(GCDA ${GCDA_FILES})
message("Process: ${GCDA}")
message("------------------------------------------------------------------------------")
get_filename_component(GCDA_DIR ${GCDA} PATH) get_filename_component(GCDA_DIR ${GCDA} PATH)
# #
...@@ -135,7 +134,7 @@ foreach(GCDA ${GCDA_FILES}) ...@@ -135,7 +134,7 @@ foreach(GCDA ${GCDA_FILES})
# If -p is not specified then the file is named only "the_file.c.gcov" # If -p is not specified then the file is named only "the_file.c.gcov"
# #
execute_process( execute_process(
COMMAND ${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} COMMAND "${GCOV_EXECUTABLE} -p -o ${GCDA_DIR} ${GCDA} >/dev/null"
WORKING_DIRECTORY ${GCDA_DIR} WORKING_DIRECTORY ${GCDA_DIR}
) )
endforeach() endforeach()
...@@ -383,7 +382,6 @@ foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING}) ...@@ -383,7 +382,6 @@ foreach(NOT_COVERED_SRC ${COVERAGE_SRCS_REMAINING})
set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]") set(GCOV_FILE_COVERAGE "${GCOV_FILE_COVERAGE}]")
# Generate the final JSON for this file. # Generate the final JSON for this file.
message("Generate JSON for non-gcov file: ${NOT_COVERED_SRC}...")
string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON) string(CONFIGURE ${SRC_FILE_TEMPLATE} FILE_JSON)
set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ") set(JSON_GCOV_FILES "${JSON_GCOV_FILES}${FILE_JSON}, ")
endforeach() endforeach()
......
...@@ -14,46 +14,50 @@ ...@@ -14,46 +14,50 @@
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf) FIND_PACKAGE(Protobuf)
SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) IF(NOT PROTOBUF_FOUND)
SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/protobuf)
SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/protobuf)
SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" CACHE PATH "protobuf include directory." FORCE)
IF(WIN32)
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
ELSE(WIN32)
SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
ENDIF(WIN32)
IF(WIN32) ExternalProject_Add(
SET(PROTOBUF_LITE_LIBRARY protobuf
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.lib" CACHE FILEPATH "protobuf lite library." FORCE) ${EXTERNAL_PROJECT_LOG_ARGS}
SET(PROTOBUF_LIBRARY PREFIX ${PROTOBUF_SOURCES_DIR}
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.lib" CACHE FILEPATH "protobuf library." FORCE) UPDATE_COMMAND ""
SET(PROTOBUF_PROTOC_LIBRARY DEPENDS zlib
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE) GIT_REPOSITORY "https://github.com/google/protobuf.git"
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE) GIT_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546"
ELSE(WIN32) CONFIGURE_COMMAND
SET(PROTOBUF_LITE_LIBRARY ${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE) -Dprotobuf_BUILD_TESTS=OFF
SET(PROTOBUF_LIBRARY -DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
"${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE) -DCMAKE_POSITION_INDEPENDENT_CODE=ON
SET(PROTOBUF_PROTOC_LIBRARY -DCMAKE_BUILD_TYPE=Release
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE) -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE) -DCMAKE_INSTALL_LIBDIR=lib
ENDIF(WIN32) )
ExternalProject_Add( LIST(APPEND external_project_dependencies protobuf)
protobuf ENDIF(NOT PROTOBUF_FOUND)
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${PROTOBUF_SOURCES_DIR}
UPDATE_COMMAND ""
DEPENDS zlib
GIT_REPOSITORY "https://github.com/google/protobuf.git"
GIT_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546"
CONFIGURE_COMMAND
${CMAKE_COMMAND} ${PROTOBUF_SOURCES_DIR}/src/protobuf/cmake
-Dprotobuf_BUILD_TESTS=OFF
-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib
)
LIST(APPEND external_project_dependencies protobuf) INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
...@@ -221,7 +221,3 @@ ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) ...@@ -221,7 +221,3 @@ ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
MESSAGE("[Paddle] Python Executable: ${PYTHON_EXECUTABLE}")
MESSAGE("[Paddle] Python Include: ${PYTHON_INCLUDE_DIRS}")
MESSAGE("[Paddle] Python Libraries: ${PYTHON_LIBRARIES}")
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
__all__ = ['resnet_cifar10']
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
active_type=paddle.activation.Relu(),
ch_in=None):
tmp = paddle.layer.img_conv(
input=input,
filter_size=filter_size,
num_channels=ch_in,
num_filters=ch_out,
stride=stride,
padding=padding,
act=paddle.activation.Linear(),
bias_attr=False)
return paddle.layer.batch_norm(input=tmp, act=active_type)
def shortcut(ipt, n_in, n_out, stride):
if n_in != n_out:
return conv_bn_layer(ipt, n_out, 1, stride, 0,
paddle.activation.Linear())
else:
return ipt
def basicblock(ipt, ch_out, stride):
ch_in = ch_out * 2
tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
short = shortcut(ipt, ch_in, ch_out, stride)
return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
def layer_warp(block_func, ipt, features, count, stride):
tmp = block_func(ipt, features, stride)
for i in range(1, count):
tmp = block_func(tmp, features, 1)
return tmp
def resnet_cifar10(ipt, depth=32):
# depth should be one of 20, 32, 44, 56, 110, 1202
assert (depth - 2) % 6 == 0
n = (depth - 2) / 6
nStages = {16, 64, 128}
conv1 = conv_bn_layer(
ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
res1 = layer_warp(basicblock, conv1, 16, n, 1)
res2 = layer_warp(basicblock, res1, 32, n, 2)
res3 = layer_warp(basicblock, res2, 64, n, 2)
pool = paddle.layer.img_pool(
input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
return pool
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
import sys
import paddle.v2 as paddle
from api_v2_vgg import vgg_bn_drop
from api_v2_resnet import resnet_cifar10
def main():
datadim = 3 * 32 * 32
classdim = 10
# PaddlePaddle init
paddle.init(use_gpu=True, trainer_count=1)
image = paddle.layer.data(
name="image", type=paddle.data_type.dense_vector(datadim))
# Add neural network config
# option 1. resnet
net = resnet_cifar10(image, depth=32)
# option 2. vgg
# net = vgg_bn_drop(image)
out = paddle.layer.fc(input=net,
size=classdim,
act=paddle.activation.Softmax())
lbl = paddle.layer.data(
name="label", type=paddle.data_type.integer_value(classdim))
cost = paddle.layer.classification_cost(input=out, label=lbl)
# Create parameters
parameters = paddle.parameters.create(cost)
# Create optimizer
momentum_optimizer = paddle.optimizer.Momentum(
momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
learning_rate=0.1 / 128.0,
learning_rate_decay_a=0.1,
learning_rate_decay_b=50000 * 100,
learning_rate_schedule='discexp',
batch_size=128)
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.reader.batched(
paddle.dataset.cifar.test10(), batch_size=128),
reader_dict={'image': 0,
'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# Create trainer
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=momentum_optimizer)
trainer.train(
reader=paddle.reader.batched(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(), buf_size=50000),
batch_size=128),
num_passes=5,
event_handler=event_handler,
reader_dict={'image': 0,
'label': 1})
if __name__ == '__main__':
main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2 as paddle
__all__ = ['vgg_bn_drop']
def vgg_bn_drop(input):
def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
return paddle.networks.img_conv_group(
input=ipt,
num_channels=num_channels,
pool_size=2,
pool_stride=2,
conv_num_filter=[num_filter] * groups,
conv_filter_size=3,
conv_act=paddle.activation.Relu(),
conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts,
pool_type=paddle.pooling.Max())
conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
bn = paddle.layer.batch_norm(
input=fc1,
act=paddle.activation.Relu(),
layer_attr=paddle.attr.Extra(drop_rate=0.5))
fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
return fc2
import paddle.v2 as paddle
import paddle.v2.dataset.uci_housing as uci_housing
def main():
# init
paddle.init(use_gpu=False, trainer_count=1)
# network config
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
y_predict = paddle.layer.fc(input=x,
param_attr=paddle.attr.Param(name='w'),
size=1,
act=paddle.activation.Linear(),
bias_attr=paddle.attr.Param(name='b'))
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
cost = paddle.layer.regression_cost(input=y_predict, label=y)
# create parameters
parameters = paddle.parameters.create(cost)
# create optimizer
optimizer = paddle.optimizer.Momentum(momentum=0)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
# event_handler to print training and testing info
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.reader.batched(
uci_housing.test(), batch_size=2),
reader_dict={'x': 0,
'y': 1})
if event.pass_id % 10 == 0:
print "Test %d, %s" % (event.pass_id, result.metrics)
# training
trainer.train(
reader=paddle.reader.batched(
paddle.reader.shuffle(
uci_housing.train(), buf_size=500),
batch_size=2),
reader_dict={'x': 0,
'y': 1},
event_handler=event_handler,
num_passes=30)
if __name__ == '__main__':
main()
...@@ -2,6 +2,59 @@ import paddle.v2 as paddle ...@@ -2,6 +2,59 @@ import paddle.v2 as paddle
import cPickle import cPickle
def softmax_regression(img):
predict = paddle.layer.fc(input=img,
size=10,
act=paddle.activation.Softmax())
return predict
def multilayer_perceptron(img):
# The first fully-connected layer
hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
# The second fully-connected layer and the according activation function
hidden2 = paddle.layer.fc(input=hidden1,
size=64,
act=paddle.activation.Relu())
# The thrid fully-connected layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = paddle.layer.fc(input=hidden2,
size=10,
act=paddle.activation.Softmax())
return predict
def convolutional_neural_network(img):
# first conv layer
conv_pool_1 = paddle.networks.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
num_channel=1,
pool_size=2,
pool_stride=2,
act=paddle.activation.Tanh())
# second conv layer
conv_pool_2 = paddle.networks.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
num_channel=20,
pool_size=2,
pool_stride=2,
act=paddle.activation.Tanh())
# The first fully-connected layer
fc1 = paddle.layer.fc(input=conv_pool_2,
size=128,
act=paddle.activation.Tanh())
# The softmax layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = paddle.layer.fc(input=fc1,
size=10,
act=paddle.activation.Softmax())
return predict
def main(): def main():
paddle.init(use_gpu=False, trainer_count=1) paddle.init(use_gpu=False, trainer_count=1)
...@@ -10,12 +63,14 @@ def main(): ...@@ -10,12 +63,14 @@ def main():
name='pixel', type=paddle.data_type.dense_vector(784)) name='pixel', type=paddle.data_type.dense_vector(784))
label = paddle.layer.data( label = paddle.layer.data(
name='label', type=paddle.data_type.integer_value(10)) name='label', type=paddle.data_type.integer_value(10))
hidden1 = paddle.layer.fc(input=images, size=200)
hidden2 = paddle.layer.fc(input=hidden1, size=200) # Here we can build the prediction network in different ways. Please
inference = paddle.layer.fc(input=hidden2, # choose one by uncomment corresponding line.
size=10, predict = softmax_regression(images)
act=paddle.activation.Softmax()) #predict = multilayer_perceptron(images)
cost = paddle.layer.classification_cost(input=inference, label=label) #predict = convolutional_neural_network(images)
cost = paddle.layer.classification_cost(input=predict, label=label)
try: try:
with open('params.pkl', 'r') as f: with open('params.pkl', 'r') as f:
...@@ -23,11 +78,16 @@ def main(): ...@@ -23,11 +78,16 @@ def main():
except IOError: except IOError:
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
adam_optimizer = paddle.optimizer.Adam(learning_rate=0.01) optimizer = paddle.optimizer.Momentum(
learning_rate=0.1 / 128.0,
momentum=0.9,
regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
trainer = paddle.trainer.SGD(cost=cost, trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters, parameters=parameters,
update_equation=adam_optimizer) update_equation=optimizer)
lists = []
def event_handler(event): def event_handler(event):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
...@@ -43,20 +103,31 @@ def main(): ...@@ -43,20 +103,31 @@ def main():
cPickle.dump( cPickle.dump(
parameters, f, protocol=cPickle.HIGHEST_PROTOCOL) parameters, f, protocol=cPickle.HIGHEST_PROTOCOL)
else: elif isinstance(event, paddle.event.EndPass):
pass result = trainer.test(reader=paddle.reader.batched(
paddle.dataset.mnist.test(), batch_size=128))
print "Test with Pass %d, Cost %f, %s\n" % (
event.pass_id, result.cost, result.metrics)
lists.append((event.pass_id, result.cost,
result.metrics['classification_error_evaluator']))
trainer.train( trainer.train(
reader=paddle.reader.batched( reader=paddle.reader.batched(
paddle.reader.shuffle( paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=8192), paddle.dataset.mnist.train(), buf_size=8192),
batch_size=32), batch_size=128),
event_handler=event_handler) event_handler=event_handler,
num_passes=100)
# find the best pass
best = sorted(lists, key=lambda list: float(list[1]))[0]
print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
# output is a softmax layer. It returns probabilities. # output is a softmax layer. It returns probabilities.
# Shape should be (100, 10) # Shape should be (100, 10)
probs = paddle.infer( probs = paddle.infer(
output=inference, output=predict,
parameters=parameters, parameters=parameters,
reader=paddle.reader.batched( reader=paddle.reader.batched(
paddle.reader.firstn( paddle.reader.firstn(
......
import sys
import math
import numpy as np
import paddle.v2 as paddle
import paddle.v2.dataset.conll05 as conll05
def db_lstm():
word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_len = len(verb_dict)
mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
#8 features
def d_type(size):
return paddle.data_type.integer_value_sequence(size)
word = paddle.layer.data(name='word_data', type=d_type(word_dict_len))
predicate = paddle.layer.data(name='verb_data', type=d_type(pred_len))
ctx_n2 = paddle.layer.data(name='ctx_n2_data', type=d_type(word_dict_len))
ctx_n1 = paddle.layer.data(name='ctx_n1_data', type=d_type(word_dict_len))
ctx_0 = paddle.layer.data(name='ctx_0_data', type=d_type(word_dict_len))
ctx_p1 = paddle.layer.data(name='ctx_p1_data', type=d_type(word_dict_len))
ctx_p2 = paddle.layer.data(name='ctx_p2_data', type=d_type(word_dict_len))
mark = paddle.layer.data(name='mark_data', type=d_type(mark_dict_len))
target = paddle.layer.data(name='target', type=d_type(label_dict_len))
default_std = 1 / math.sqrt(hidden_dim) / 3.0
emb_para = paddle.attr.Param(name='emb', initial_std=0., learning_rate=0.)
std_0 = paddle.attr.Param(initial_std=0.)
std_default = paddle.attr.Param(initial_std=default_std)
predicate_embedding = paddle.layer.embedding(
size=word_dim,
input=predicate,
param_attr=paddle.attr.Param(
name='vemb', initial_std=default_std))
mark_embedding = paddle.layer.embedding(
size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
paddle.layer.embedding(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0 = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
mix_hidden_lr = 1e-3
lstm_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = paddle.attr.Param(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = paddle.layer.lstmemory(
input=hidden_0,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = paddle.layer.mixed(
size=hidden_dim,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
])
lstm = paddle.layer.lstmemory(
input=mix_hidden,
act=paddle.activation.Relu(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Sigmoid(),
reverse=((i % 2) == 1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = paddle.layer.mixed(
size=label_dict_len,
bias_attr=std_default,
input=[
paddle.layer.full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
paddle.layer.full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
crf_cost = paddle.layer.crf(size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(
name='crfw',
initial_std=default_std,
learning_rate=mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=paddle.attr.Param(name='crfw'))
return crf_cost, crf_dec
def load_parameter(file_name, h, w):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32).reshape(h, w)
def main():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
crf_cost, crf_dec = db_lstm()
# create parameters
parameters = paddle.parameters.create([crf_cost, crf_dec])
# create optimizer
optimizer = paddle.optimizer.Momentum(
momentum=0,
learning_rate=2e-2,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(
average_window=0.5, max_average_window=10000), )
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
trainer = paddle.trainer.SGD(cost=crf_cost,
parameters=parameters,
update_equation=optimizer)
parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))
trn_reader = paddle.reader.batched(
paddle.reader.shuffle(
conll05.test(), buf_size=8192), batch_size=10)
reader_dict = {
'word_data': 0,
'ctx_n2_data': 1,
'ctx_n1_data': 2,
'ctx_0_data': 3,
'ctx_p1_data': 4,
'ctx_p2_data': 5,
'verb_data': 6,
'mark_data': 7,
'target': 8
}
trainer.train(
reader=trn_reader,
event_handler=event_handler,
num_passes=10000,
reader_dict=reader_dict)
if __name__ == '__main__':
main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle.trainer_config_helpers.attrs as attrs
from paddle.trainer_config_helpers.poolings import MaxPooling
import paddle.v2 as paddle
def convolution_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=128,
is_predict=False):
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
conv_3 = paddle.networks.sequence_conv_pool(
input=emb, context_len=3, hidden_size=hid_dim)
conv_4 = paddle.networks.sequence_conv_pool(
input=emb, context_len=4, hidden_size=hid_dim)
output = paddle.layer.fc(input=[conv_3, conv_4],
size=class_dim,
act=paddle.activation.Softmax())
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
def stacked_lstm_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=512,
stacked_num=3,
is_predict=False):
"""
A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network,
consisting three LSTM layers. This configure is referred to
the paper as following url, but use fewer layrs.
http://www.aclweb.org/anthology/P15-1109
input_dim: here is word dictionary dimension.
class_dim: number of categories.
emb_dim: dimension of word embedding.
hid_dim: dimension of hidden layer.
stacked_num: number of stacked lstm-hidden layer.
is_predict: is predicting or not.
Some layers is not needed in network when predicting.
"""
assert stacked_num % 2 == 1
layer_attr = attrs.ExtraLayerAttribute(drop_rate=0.5)
fc_para_attr = attrs.ParameterAttribute(learning_rate=1e-3)
lstm_para_attr = attrs.ParameterAttribute(initial_std=0., learning_rate=1.)
para_attr = [fc_para_attr, lstm_para_attr]
bias_attr = attrs.ParameterAttribute(initial_std=0., l2_rate=0.)
relu = paddle.activation.Relu()
linear = paddle.activation.Linear()
data = paddle.layer.data("word",
paddle.data_type.integer_value_sequence(input_dim))
emb = paddle.layer.embedding(input=data, size=emb_dim)
fc1 = paddle.layer.fc(input=emb,
size=hid_dim,
act=linear,
bias_attr=bias_attr)
lstm1 = paddle.layer.lstmemory(
input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1):
fc = paddle.layer.fc(input=inputs,
size=hid_dim,
act=linear,
param_attr=para_attr,
bias_attr=bias_attr)
lstm = paddle.layer.lstmemory(
input=fc,
reverse=(i % 2) == 0,
act=relu,
bias_attr=bias_attr,
layer_attr=layer_attr)
inputs = [fc, lstm]
fc_last = paddle.layer.pooling(input=inputs[0], pooling_type=MaxPooling())
lstm_last = paddle.layer.pooling(input=inputs[1], pooling_type=MaxPooling())
output = paddle.layer.fc(input=[fc_last, lstm_last],
size=class_dim,
act=paddle.activation.Softmax(),
bias_attr=bias_attr,
param_attr=para_attr)
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
cost = paddle.layer.classification_cost(input=output, label=lbl)
return cost
if __name__ == '__main__':
# init
paddle.init(use_gpu=True, trainer_count=4)
# network config
print 'load dictionary...'
word_dict = paddle.dataset.imdb.word_dict()
dict_dim = len(word_dict)
class_dim = 2
# Please choose the way to build the network
# by uncommenting the corresponding line.
cost = convolution_net(dict_dim, class_dim=class_dim)
# cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
# create parameters
parameters = paddle.parameters.create(cost)
# create optimizer
adam_optimizer = paddle.optimizer.Adam(
learning_rate=2e-3,
regularization=paddle.optimizer.L2Regularization(rate=8e-4),
model_average=paddle.optimizer.ModelAverage(average_window=0.5))
# End batch and end pass event handler
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
reader=paddle.reader.batched(
lambda: paddle.dataset.imdb.test(word_dict),
batch_size=128),
reader_dict={'word': 0,
'label': 1})
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
# create trainer
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=adam_optimizer)
trainer.train(
reader=paddle.reader.batched(
paddle.reader.shuffle(
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=100),
event_handler=event_handler,
reader_dict={'word': 0,
'label': 1},
num_passes=10)
import os
import paddle.v2 as paddle
from seqToseq_net_v2 import seqToseq_net_v2
# Data Definiation.
# TODO:This code should be merged to dataset package.
data_dir = "./data/pre-wmt14"
src_lang_dict = os.path.join(data_dir, 'src.dict')
trg_lang_dict = os.path.join(data_dir, 'trg.dict')
source_dict_dim = len(open(src_lang_dict, "r").readlines())
target_dict_dim = len(open(trg_lang_dict, "r").readlines())
def read_to_dict(dict_path):
with open(dict_path, "r") as fin:
out_dict = {
line.strip(): line_count
for line_count, line in enumerate(fin)
}
return out_dict
src_dict = read_to_dict(src_lang_dict)
trg_dict = read_to_dict(trg_lang_dict)
train_list = os.path.join(data_dir, 'train.list')
test_list = os.path.join(data_dir, 'test.list')
UNK_IDX = 2
START = "<s>"
END = "<e>"
def _get_ids(s, dictionary):
words = s.strip().split()
return [dictionary[START]] + \
[dictionary.get(w, UNK_IDX) for w in words] + \
[dictionary[END]]
def train_reader(file_name):
def reader():
with open(file_name, 'r') as f:
for line_count, line in enumerate(f):
line_split = line.strip().split('\t')
if len(line_split) != 2:
continue
src_seq = line_split[0] # one source sequence
src_ids = _get_ids(src_seq, src_dict)
trg_seq = line_split[1] # one target sequence
trg_words = trg_seq.split()
trg_ids = [trg_dict.get(w, UNK_IDX) for w in trg_words]
# remove sequence whose length > 80 in training mode
if len(src_ids) > 80 or len(trg_ids) > 80:
continue
trg_ids_next = trg_ids + [trg_dict[END]]
trg_ids = [trg_dict[START]] + trg_ids
yield src_ids, trg_ids, trg_ids_next
return reader
def main():
paddle.init(use_gpu=False, trainer_count=1)
# define network topology
cost = seqToseq_net_v2(source_dict_dim, target_dict_dim)
parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Adam(learning_rate=1e-4)
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 10 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters,
update_equation=optimizer)
reader_dict = {
'source_language_word': 0,
'target_language_word': 1,
'target_language_next_word': 2
}
trn_reader = paddle.reader.batched(
paddle.reader.shuffle(
train_reader("data/pre-wmt14/train/train"), buf_size=8192),
batch_size=5)
trainer.train(
reader=trn_reader,
event_handler=event_handler,
num_passes=10000,
reader_dict=reader_dict)
if __name__ == '__main__':
main()
import paddle.v2.activation as activation
import paddle.v2.attr as attr
import paddle.v2.data_type as data_type
import paddle.v2.layer as layer
import paddle.v2.networks as networks
def seqToseq_net_v2(source_dict_dim, target_dict_dim):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
#### Encoder
src_word_id = layer.data(
name='source_language_word',
type=data_type.integer_value_sequence(source_dict_dim))
src_embedding = layer.embedding(
input=src_word_id,
size=word_vector_dim,
param_attr=attr.ParamAttr(name='_source_language_embedding'))
src_forward = networks.simple_gru(input=src_embedding, size=encoder_size)
src_backward = networks.simple_gru(
input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = layer.concat(input=[src_forward, src_backward])
#### Decoder
with layer.mixed(size=decoder_size) as encoded_proj:
encoded_proj += layer.full_matrix_projection(input=encoded_vector)
backward_first = layer.first_seq(input=src_backward)
with layer.mixed(size=decoder_size, act=activation.Tanh()) as decoder_boot:
decoder_boot += layer.full_matrix_projection(input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = layer.memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = networks.simple_attention(
encoded_sequence=enc_vec,
encoded_proj=enc_proj,
decoder_state=decoder_mem)
with layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += layer.full_matrix_projection(input=context)
decoder_inputs += layer.full_matrix_projection(input=current_word)
gru_step = layer.gru_step(
name='gru_decoder',
input=decoder_inputs,
output_mem=decoder_mem,
size=decoder_size)
with layer.mixed(
size=target_dict_dim, bias_attr=True,
act=activation.Softmax()) as out:
out += layer.full_matrix_projection(input=gru_step)
return out
decoder_group_name = "decoder_group"
group_input1 = layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input2 = layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
trg_embedding = layer.embedding(
input=layer.data(
name='target_language_word',
type=data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = layer.data(
name='target_language_next_word',
type=data_type.integer_value_sequence(target_dict_dim))
cost = layer.classification_cost(input=decoder, label=lbl)
return cost
API中文手册 API
============ ===
\ No newline at end of file
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst
API API
=== ===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:
Model Config API Model Config API
---------------- ----------------
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
trainer_config_helpers/optimizers.rst v2/model_configs.rst
trainer_config_helpers/data_sources.rst \ No newline at end of file
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_en.rst
API中文手册
============
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst
API
===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_en.rst
======
Layers
======
.. automodule:: paddle.v2.layer
:members:
...@@ -4,9 +4,10 @@ At training and testing time, PaddlePaddle programs need to read data. To ease t ...@@ -4,9 +4,10 @@ At training and testing time, PaddlePaddle programs need to read data. To ease t
- A *reader* is a function that reads data (from file, network, random number generator, etc) and yields data items. - A *reader* is a function that reads data (from file, network, random number generator, etc) and yields data items.
- A *reader creator* is a function that returns a reader function. - A *reader creator* is a function that returns a reader function.
- A *reader* decorator is a function, which accepts one or more readers, and returns a reader. - A *reader decorator* is a function, which accepts one or more readers, and returns a reader.
- A *batch reader* is a function that reads data (from *reader*, file, network, random number generator, etc) and yields a batch of data items.
and provide frequently used reader creators and reader decorators. and provide function which converts reader to batch reader, frequently used reader creators and reader decorators.
## Data Reader Interface ## Data Reader Interface
...@@ -37,9 +38,54 @@ def reader_creator_random_imageand_label(widht, height, label): ...@@ -37,9 +38,54 @@ def reader_creator_random_imageand_label(widht, height, label):
return reader return reader
``` ```
## Batch Reader Interface
*batch reader* can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`). The output of the iterable should be a batch (list) of data items. Each item inside the list must be a tuple.
Here are valid outputs:
```python
# a mini batch of three data items. Each data item consist three columns of data, each of which is 1.
[(1, 1, 1),
(2, 2, 2),
(3, 3, 3)]
# a mini batch of three data items, each data item is a list (single column).
[([1,1,1],),
([2,2,2],),
([3,3,3],),
```
Please note that each item inside the list must be a tuple, below is an invalid output:
```python
# wrong, [1,1,1] needs to be inside a tuple: ([1,1,1],).
# Otherwise it's ambiguous whether [1,1,1] means a single column of data [1, 1, 1],
# or three column of datas, each of which is 1.
[[1,1,1],
[2,2,2],
[3,3,3]]
```
It's easy to convert from reader to batch reader:
```python
mnist_train = paddle.dataset.mnist.train()
mnist_train_batch_reader = paddle.batch(mnist_train, 128)
```
Also easy to create custom batch reader:
```python
def custom_batch_reader():
while True:
batch = []
for i in xrange(128):
batch.append((numpy.random.uniform(-1, 1, 28*28),)) # note that it's a tuple being appended.
yield batch
mnist_random_image_batch_reader = custom_batch_reader
```
## Usage ## Usage
data reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`: batch reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`:
```python ```python
# two data layer is created: # two data layer is created:
...@@ -47,8 +93,8 @@ image_layer = paddle.layer.data("image", ...) ...@@ -47,8 +93,8 @@ image_layer = paddle.layer.data("image", ...)
label_layer = paddle.layer.data("label", ...) label_layer = paddle.layer.data("label", ...)
# ... # ...
batch_reader = paddle.batch(paddle.dataset.mnist.train(), 128)
paddle.train(paddle.dataset.mnist, {"image":0, "label":1}, 128, 10, ...) paddle.train(batch_reader, {"image":0, "label":1}, 128, 10, ...)
``` ```
## Data Reader Decorator ## Data Reader Decorator
...@@ -64,7 +110,7 @@ Since reading data may take time and training can not proceed without data. It i ...@@ -64,7 +110,7 @@ Since reading data may take time and training can not proceed without data. It i
Use `paddle.reader.buffered` to prefetch data: Use `paddle.reader.buffered` to prefetch data:
```python ```python
buffered_reader = paddle.reader.buffered(paddle.dataset.mnist, 100) buffered_reader = paddle.reader.buffered(paddle.dataset.mnist.train(), 100)
``` ```
`buffered_reader` will try to buffer (prefetch) `100` data entries. `buffered_reader` will try to buffer (prefetch) `100` data entries.
...@@ -91,10 +137,10 @@ def reader_creator_bool(t): ...@@ -91,10 +137,10 @@ def reader_creator_bool(t):
true_reader = reader_creator_bool(True) true_reader = reader_creator_bool(True)
false_reader = reader_creator_bool(False) false_reader = reader_creator_bool(False)
reader = paddle.reader.compose(paddle.dataset.mnist, data_reader_creator_random_image(20, 20), true_reader, false_reader) reader = paddle.reader.compose(paddle.dataset.mnist.train(), data_reader_creator_random_image(20, 20), true_reader, false_reader)
# Skipped 1 because paddle.dataset.mnist produces two items per data entry. # Skipped 1 because paddle.dataset.mnist.train() produces two items per data entry.
# And we don't care second item at this time. # And we don't care second item at this time.
paddle.train(reader, {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...) paddle.train(paddle.batch(reader, 128), {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...)
``` ```
### Shuffle ### Shuffle
...@@ -103,16 +149,20 @@ Given shuffle buffer size `n`, `paddle.reader.shuffle` will return a data reader ...@@ -103,16 +149,20 @@ Given shuffle buffer size `n`, `paddle.reader.shuffle` will return a data reader
Example: Example:
```python ```python
reader = paddle.reader.shuffle(paddle.dataset.mnist, 512) reader = paddle.reader.shuffle(paddle.dataset.mnist.train(), 512)
``` ```
## Q & A ## Q & A
### Why return only a single entry, but not a mini batch? ### Why reader return only a single entry, but not a mini batch?
Always returning a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2).
We provide function `paddle.batch` to turn (single entry) reader into batch reader.
If a mini batch is returned, data reader need to take care of batch size. But batch size is a concept for training, it makes more sense for user to specify batch size as a parameter for `train`. ### Why do we need batch reader, isn't train take reader and batch_size as arguments sufficient?
Practically, always return a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2). In most of the case, train taking reader and batch_size as arguments would be sufficent. However sometimes user want to customize order of data entries inside a mini batch. Or even change batch size dynamically.
### Why use a dictionary but not a list to provide mapping? ### Why use a dictionary but not a list to provide mapping?
...@@ -137,7 +187,7 @@ def image_reader_creator(image_path, label_path, n): ...@@ -137,7 +187,7 @@ def image_reader_creator(image_path, label_path, n):
# images_reader_creator creates a reader # images_reader_creator creates a reader
reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024) reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024)
paddle.train(reader, {"image":0, "label":1}, ...) paddle.train(paddle.batch(reader, 128), {"image":0, "label":1}, ...)
``` ```
### How is `paddle.train` implemented ### How is `paddle.train` implemented
...@@ -145,17 +195,8 @@ paddle.train(reader, {"image":0, "label":1}, ...) ...@@ -145,17 +195,8 @@ paddle.train(reader, {"image":0, "label":1}, ...)
An example implementation of paddle.train could be: An example implementation of paddle.train could be:
```python ```python
def make_minibatch(reader, minibatch_size): def train(batch_reader, mapping, batch_size, total_pass):
def ret():
r = reader()
buf = [r.next() for x in xrange(minibatch_size)]
while len(buf) > 0:
yield buf
buf = [r.next() for x in xrange(minibatch_size)]
return ret
def train(reader, mapping, batch_size, total_pass):
for pass_idx in range(total_pass): for pass_idx in range(total_pass):
for mini_batch in make_minibatch(reader): # this loop will never end in online learning. for mini_batch in batch_reader(): # this loop will never end in online learning.
do_forward_backward(mini_batch, mapping) do_forward_backward(mini_batch, mapping)
``` ```
...@@ -132,7 +132,8 @@ def startPaddle(idMap={}, train_args_dict=None): ...@@ -132,7 +132,8 @@ def startPaddle(idMap={}, train_args_dict=None):
logDir = JOB_PATH_OUTPUT + "/node_" + str(trainerId) logDir = JOB_PATH_OUTPUT + "/node_" + str(trainerId)
if not os.path.exists(JOB_PATH_OUTPUT): if not os.path.exists(JOB_PATH_OUTPUT):
os.makedirs(JOB_PATH_OUTPUT) os.makedirs(JOB_PATH_OUTPUT)
os.mkdir(logDir) if not os.path.exists(logDir):
os.mkdir(logDir)
copyCommand = 'cp -rf ' + JOB_PATH + \ copyCommand = 'cp -rf ' + JOB_PATH + \
"/" + str(trainerId) + "/data/*" + " ./data/" "/" + str(trainerId) + "/data/*" + " ./data/"
os.system(copyCommand) os.system(copyCommand)
......
...@@ -15,13 +15,19 @@ import sys ...@@ -15,13 +15,19 @@ import sys
import os, subprocess import os, subprocess
import shlex import shlex
from recommonmark import parser, transform from recommonmark import parser, transform
try:
import py_paddle
import paddle
import paddle.v2
except ImportError:
print("Must install paddle python package before generating documentation")
sys.exit(1)
MarkdownParser = parser.CommonMarkParser MarkdownParser = parser.CommonMarkParser
AutoStructify = transform.AutoStructify AutoStructify = transform.AutoStructify
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, '@PROJ_ROOT@/python')
templates_path = ["@PROJ_ROOT@/doc_theme/templates"] templates_path = ["@PROJ_ROOT@/doc_theme/templates"]
# -- General configuration ------------------------------------------------ # -- General configuration ------------------------------------------------
......
...@@ -15,14 +15,20 @@ import sys ...@@ -15,14 +15,20 @@ import sys
import os, subprocess import os, subprocess
import shlex import shlex
from recommonmark import parser, transform from recommonmark import parser, transform
try:
import py_paddle
import paddle
import paddle.v2
except ImportError:
print("Must install paddle python package before generating documentation")
sys.exit(1)
MarkdownParser = parser.CommonMarkParser MarkdownParser = parser.CommonMarkParser
AutoStructify = transform.AutoStructify AutoStructify = transform.AutoStructify
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, '@PROJ_ROOT@/python')
templates_path = ["@PROJ_ROOT@/doc_theme/templates"] templates_path = ["@PROJ_ROOT@/doc_theme/templates"]
# -- General configuration ------------------------------------------------ # -- General configuration ------------------------------------------------
......
...@@ -156,14 +156,14 @@ define_py_data_sources2(train_list='data/train.list', ...@@ -156,14 +156,14 @@ define_py_data_sources2(train_list='data/train.list',
obj="process", obj="process",
args={"dictionary": word_dict}) args={"dictionary": word_dict})
``` ```
You can refer to the following link for more detailed examples and data formats: <a href = "../../api/data_provider/pydataprovider2_en.html">PyDataProvider2</a>. You can refer to the following link for more detailed examples and data formats: <a href = "../../api/v1/data_provider/pydataprovider2_en.html">PyDataProvider2</a>.
## Network Architecture ## Network Architecture
We will describe four kinds of network architectures in this section. We will describe four kinds of network architectures in this section.
<center> ![](./src/PipelineNetwork_en.jpg) </center> <center> ![](./src/PipelineNetwork_en.jpg) </center>
First, you will build a logistic regression model. Later, you will also get chance to build other more powerful network architectures. First, you will build a logistic regression model. Later, you will also get chance to build other more powerful network architectures.
For more detailed documentation, you could refer to: <a href = "../../api/trainer_config_helpers/layers.html">layer documentation</a>. All configuration files are in `demo/quick_start` directory. For more detailed documentation, you could refer to: <a href = "../../api/v1/trainer_config_helpers/layers.html">layer documentation</a>. All configuration files are in `demo/quick_start` directory.
### Logistic Regression ### Logistic Regression
The architecture is illustrated in the following picture: The architecture is illustrated in the following picture:
...@@ -366,7 +366,7 @@ You can use single layer LSTM model with Dropout for our text classification pro ...@@ -366,7 +366,7 @@ You can use single layer LSTM model with Dropout for our text classification pro
<br> <br>
## Optimization Algorithm ## Optimization Algorithm
<a href = "../../api/trainer_config_helpers/optimizers.html">Optimization algorithms</a> include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network. <a href = "../../api/v1/trainer_config_helpers/optimizers.html">Optimization algorithms</a> include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network.
```python ```python
settings(batch_size=128, settings(batch_size=128,
...@@ -407,7 +407,7 @@ paddle train \ ...@@ -407,7 +407,7 @@ paddle train \
--init_model_path=./output/pass-0000x --init_model_path=./output/pass-0000x
``` ```
We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to <a href = "../../api/predict/swig_py_paddle_en.html">Python Prediction API</a> tutorial,or other <a href = "../../tutorials/index_en.html">demo</a> for the prediction process using Python. You can also use the following script for inference or evaluation. We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to <a href = "../../api/v1/predict/swig_py_paddle_en.html">Python Prediction API</a> tutorial,or other <a href = "../../tutorials/index_en.html">demo</a> for the prediction process using Python. You can also use the following script for inference or evaluation.
inference script (predict.sh): inference script (predict.sh):
......
...@@ -144,9 +144,7 @@ void Arguments::setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError) { ...@@ -144,9 +144,7 @@ void Arguments::setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError) {
a.cpuSequenceDims = m->cast<paddle::IVector>(vec->getSharedPtr()); a.cpuSequenceDims = m->cast<paddle::IVector>(vec->getSharedPtr());
} }
float Arguments::sumCosts() const { float Arguments::sum() const { return paddle::Argument::sum(m->outputs); }
return paddle::Argument::sumCosts(m->outputs);
}
int64_t Arguments::getBatchSize(size_t idx) const throw(RangeError) { int64_t Arguments::getBatchSize(size_t idx) const throw(RangeError) {
auto& a = m->getArg(idx); auto& a = m->getArg(idx);
......
...@@ -142,6 +142,20 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) { ...@@ -142,6 +142,20 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) {
} }
} }
size_t GradientMachine::getNonStaticParameterSize() const {
return m->machine->getNonStaticParameters().size();
}
Parameter* GradientMachine::getNonStaticParameter(size_t i) throw(RangeError) {
auto params = m->machine->getNonStaticParameters();
if (i < params.size()) {
return Parameter::createFromSharedPtr(
&m->machine->getNonStaticParameters()[i]);
} else {
throw RangeError();
}
}
void GradientMachine::randParameters() { m->machine->randParameters(); } void GradientMachine::randParameters() { m->machine->randParameters(); }
Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const
......
...@@ -453,7 +453,7 @@ public: ...@@ -453,7 +453,7 @@ public:
IVector* vec) throw(RangeError); IVector* vec) throw(RangeError);
void setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError); void setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError);
float sumCosts() const; float sum() const;
private: private:
static Arguments* createByPaddleArgumentVector(void* ptr); static Arguments* createByPaddleArgumentVector(void* ptr);
...@@ -771,6 +771,9 @@ public: ...@@ -771,6 +771,9 @@ public:
size_t getParameterSize() const; size_t getParameterSize() const;
Parameter* getParameter(size_t i) throw(RangeError); Parameter* getParameter(size_t i) throw(RangeError);
size_t getNonStaticParameterSize() const;
Parameter* getNonStaticParameter(size_t i) throw(RangeError);
void randParameters(); void randParameters();
Arguments* getLayerOutput(const std::string& layerName) const Arguments* getLayerOutput(const std::string& layerName) const
......
...@@ -22,7 +22,7 @@ class TestArguments(unittest.TestCase): ...@@ -22,7 +22,7 @@ class TestArguments(unittest.TestCase):
args = swig_paddle.Arguments.createArguments(1) args = swig_paddle.Arguments.createArguments(1)
args.setSlotValue(0, m) args.setSlotValue(0, m)
self.assertAlmostEqual(27.0, args.sumCosts()) self.assertAlmostEqual(27.0, args.sum())
mat = args.getSlotValue(0) mat = args.getSlotValue(0)
assert isinstance(mat, swig_paddle.Matrix) assert isinstance(mat, swig_paddle.Matrix)
......
...@@ -24,7 +24,7 @@ real getCostSum(LayerPtr& testLayer, MatrixPtr weights) { ...@@ -24,7 +24,7 @@ real getCostSum(LayerPtr& testLayer, MatrixPtr weights) {
if (weights) { if (weights) {
outArgs[0].value->dotMul(*outArgs[0].value, *weights); outArgs[0].value->dotMul(*outArgs[0].value, *weights);
} }
return Argument::sumCosts(outArgs); return Argument::sum(outArgs);
} }
real getDiffAndPrint(real newCost1, real getDiffAndPrint(real newCost1,
...@@ -241,7 +241,7 @@ void testBatchState(LayerPtr testLayer, ...@@ -241,7 +241,7 @@ void testBatchState(LayerPtr testLayer,
std::vector<Argument> args; std::vector<Argument> args;
args.push_back(out); args.push_back(out);
EXPECT_EQ(0, Argument::sumCosts(args)) << "testBatchState failed"; EXPECT_EQ(0, Argument::sum(args)) << "testBatchState failed";
for (size_t seqId = 0; seqId < numSequences; ++seqId) { for (size_t seqId = 0; seqId < numSequences; ++seqId) {
start[seqId] += seqLens[seqId]; start[seqId] += seqLens[seqId];
} }
...@@ -672,7 +672,7 @@ void testLayerGradKernel(TestConfig testConf, ...@@ -672,7 +672,7 @@ void testLayerGradKernel(TestConfig testConf,
outArgs[0].value->dotMul(*testLayer->getOutput().value, *weights); outArgs[0].value->dotMul(*testLayer->getOutput().value, *weights);
} }
real cost = Argument::sumCosts(outArgs); real cost = Argument::sum(outArgs);
LOG(INFO) << " cost " << cost; LOG(INFO) << " cost " << cost;
EXPECT_FALSE(std::isnan(cost)); EXPECT_FALSE(std::isnan(cost));
......
...@@ -163,7 +163,7 @@ struct Argument { ...@@ -163,7 +163,7 @@ struct Argument {
: sequenceStartPositions->getData(false); : sequenceStartPositions->getData(false);
} }
static inline real sumCosts(const std::vector<Argument>& arguments) { static inline real sum(const std::vector<Argument>& arguments) {
real cost = 0; real cost = 0;
for (auto& arg : arguments) { for (auto& arg : arguments) {
if (arg.value) { if (arg.value) {
......
...@@ -10,9 +10,11 @@ add_test(NAME socket_test ...@@ -10,9 +10,11 @@ add_test(NAME socket_test
add_unittest_without_exec(test_ProtoServer add_unittest_without_exec(test_ProtoServer
test_ProtoServer.cpp) test_ProtoServer.cpp)
add_test(NAME test_ProtoServer IF(NOT ON_TRAVIS)
COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port add_test(NAME test_ProtoServer
${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer) COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port
${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer)
ENDIF(NOT ON_TRAVIS)
# TODO(yuyang18): Run test_ProtoServer when with rdma # TODO(yuyang18): Run test_ProtoServer when with rdma
# add_test(NAME test_ProtoServerRDMA # add_test(NAME test_ProtoServerRDMA
......
...@@ -195,6 +195,12 @@ def __monkeypatch_gradient_machine__(): ...@@ -195,6 +195,12 @@ def __monkeypatch_gradient_machine__():
swig_paddle.GradientMachine.getParameters = getParameters swig_paddle.GradientMachine.getParameters = getParameters
def getNonStaticParameters(self):
return (self.getNonStaticParameter(i)
for i in xrange(self.getNonStaticParameterSize()))
swig_paddle.GradientMachine.getNonStaticParameters = getNonStaticParameters
def getLayerOutputs(self, layerNames): def getLayerOutputs(self, layerNames):
""" """
getLayerOutputs. get outputs of layers and return a numpy matrix dict. getLayerOutputs. get outputs of layers and return a numpy matrix dict.
......
#!/bin/bash
brew update
brew tap homebrew/science
brew install openblas swig md5sha1sum
...@@ -2,18 +2,11 @@ ...@@ -2,18 +2,11 @@
source ./common.sh source ./common.sh
NPROC=1 NPROC=1
if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages
export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages export PYTHONHOME=/opt/python/2.7.12
export PYTHONHOME=/opt/python/2.7.12 export PATH=/opt/python/2.7.12/bin:${PATH}
export PATH=/opt/python/2.7.12/bin:${PATH} cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS} NRPOC=`nproc`
NRPOC=`nproc` make -j $NPROC
make -j $NPROC make coveralls
make coveralls sudo make install
sudo make install
elif [[ "$TRAVIS_OS_NAME" == "osx" ]]; then
export PYTHONPATH=/usr/local/lib/python2.7/site-packages
cmake .. -DON_TRAVIS=ON -DON_COVERALLS=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS}
NPROC=`sysctl -n hw.ncpu`
make -j $NPROC
fi
...@@ -2,8 +2,12 @@ ...@@ -2,8 +2,12 @@
# Add set -e, cd to directory. # Add set -e, cd to directory.
source ./common.sh source ./common.sh
# Compile Documentation only. # Compile Documentation only.
cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF ${EXTRA_CMAKE_OPTS}
mkdir output
make DESTDIR=./output install -j `nproc`
pip install ./output/usr/local/opt/paddle/share/wheels/*
rm -rf *
cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS} cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS}
make paddle_docs paddle_docs_cn make paddle_docs paddle_docs_cn
...@@ -25,26 +29,41 @@ TARGET_BRANCH="gh-pages" ...@@ -25,26 +29,41 @@ TARGET_BRANCH="gh-pages"
# Only deploy master branch to build latest documentation. # Only deploy master branch to build latest documentation.
SOURCE_BRANCH="master" SOURCE_BRANCH="master"
# If is not a Github pull request, and in master branch.
if [ "$TRAVIS_PULL_REQUEST" != "false" -o "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ]; then
exit 0
fi
# Clone the repo to output directory # Clone the repo to output directory
git clone $REPO output git clone $REPO output
cd output cd output
# checkout github page branch function deploy_docs() {
git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH SOURCE_BRANCH=$1
DIR=$2
# If is not a Github pull request
if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then
exit 0
fi
# If it is not watched branch.
if [ "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ]; then
return
fi
# remove old docs. mv new docs. # checkout github page branch
rm -rf doc doc_cn git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH
mv ../doc/cn/html doc_cn
mv ../doc/en/html doc mkdir -p ${DIR}
# remove old docs. mv new docs.
set +e
rm -rf ${DIR}/doc ${DIR}/doc_cn
set -e
mv ../doc/cn/html ${DIR}/doc_cn
mv ../doc/en/html ${DIR}/doc
git add .
}
deploy_docs "master" "."
deploy_docs "develop" "./develop/"
# Check is there anything changed. # Check is there anything changed.
set +e set +e
git diff --exit-code >/dev/null git diff --cached --exit-code >/dev/null
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then
echo "No changes to the output on this push; exiting." echo "No changes to the output on this push; exiting."
exit 0 exit 0
...@@ -57,7 +76,6 @@ if [ -n $SSL_KEY ]; then # Only push updated docs for github.com/PaddlePaddle/P ...@@ -57,7 +76,6 @@ if [ -n $SSL_KEY ]; then # Only push updated docs for github.com/PaddlePaddle/P
git config user.name "Travis CI" git config user.name "Travis CI"
git config user.email "paddle-dev@baidu.com" git config user.email "paddle-dev@baidu.com"
git commit -m "Deploy to GitHub Pages: ${SHA}" git commit -m "Deploy to GitHub Pages: ${SHA}"
# Set ssh private key # Set ssh private key
openssl aes-256-cbc -K $SSL_KEY -iv $SSL_IV -in ../../paddle/scripts/travis/deploy_key.enc -out deploy_key -d openssl aes-256-cbc -K $SSL_KEY -iv $SSL_IV -in ../../paddle/scripts/travis/deploy_key.enc -out deploy_key -d
chmod 600 deploy_key chmod 600 deploy_key
......
...@@ -72,6 +72,7 @@ setup(name="py_paddle", ...@@ -72,6 +72,7 @@ setup(name="py_paddle",
packages=['py_paddle'], packages=['py_paddle'],
include_dirs = include_dirs, include_dirs = include_dirs,
install_requires = [ install_requires = [
'nltk>=3.2.2',
'numpy>=1.8.0', # The numpy is required. 'numpy>=1.8.0', # The numpy is required.
'protobuf>=3.0.0' # The paddle protobuf version 'protobuf>=3.0.0' # The paddle protobuf version
], ],
......
...@@ -208,7 +208,7 @@ real Tester::forwardOneBatch(const DataBatch& dataBatch, ...@@ -208,7 +208,7 @@ real Tester::forwardOneBatch(const DataBatch& dataBatch,
return 0.0; // In this case, there is no meaning to calculate cost return 0.0; // In this case, there is no meaning to calculate cost
} }
return Argument::sumCosts(outArgs); return Argument::sum(outArgs);
} }
void Tester::testOnePassBatch(int passId) { void Tester::testOnePassBatch(int passId) {
......
...@@ -310,7 +310,7 @@ real Trainer::checkGradient() { ...@@ -310,7 +310,7 @@ real Trainer::checkGradient() {
std::vector<Argument> outArgs; std::vector<Argument> outArgs;
trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC); trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
real cost = Argument::sumCosts(outArgs); real cost = Argument::sum(outArgs);
LOG(INFO) << "original cost=" << cost; LOG(INFO) << "original cost=" << cost;
trainerInternal_.getGradientMachine()->backward(); trainerInternal_.getGradientMachine()->backward();
...@@ -340,7 +340,7 @@ real Trainer::checkGradient() { ...@@ -340,7 +340,7 @@ real Trainer::checkGradient() {
parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara); parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara);
parameter->setValueUpdated(); parameter->setValueUpdated();
trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC); trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
real newCost1 = Argument::sumCosts(outArgs); real newCost1 = Argument::sum(outArgs);
for (size_t i = 0; i < dim; ++i) { for (size_t i = 0; i < dim; ++i) {
newp[i] = oldp[i] - step * d[i]; newp[i] = oldp[i] - step * d[i];
...@@ -349,7 +349,7 @@ real Trainer::checkGradient() { ...@@ -349,7 +349,7 @@ real Trainer::checkGradient() {
parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara); parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara);
parameter->setValueUpdated(); parameter->setValueUpdated();
trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC); trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
real newCost2 = Argument::sumCosts(outArgs); real newCost2 = Argument::sum(outArgs);
real trueDelta = 0.5 * (newCost1 - newCost2); real trueDelta = 0.5 * (newCost1 - newCost2);
real diff = (1e-20 + trueDelta) / (1e-20 + delta) - 1; real diff = (1e-20 + trueDelta) / (1e-20 + delta) - 1;
...@@ -575,7 +575,7 @@ real Trainer::calcGradient(const DataBatch& dataBatch, ...@@ -575,7 +575,7 @@ real Trainer::calcGradient(const DataBatch& dataBatch,
trainerInternal_.getGradientMachine()->forwardBackward( trainerInternal_.getGradientMachine()->forwardBackward(
inArgs, &outArgs, PASS_TRAIN); inArgs, &outArgs, PASS_TRAIN);
real cost = Argument::sumCosts(outArgs); real cost = Argument::sum(outArgs);
offset = 0; offset = 0;
for (auto& para : parameters) { for (auto& para : parameters) {
......
...@@ -134,7 +134,7 @@ void TrainerInternal::trainOneBatch(int64_t batchId, ...@@ -134,7 +134,7 @@ void TrainerInternal::trainOneBatch(int64_t batchId,
real cost = 0; real cost = 0;
{ {
REGISTER_TIMER("sumCost"); REGISTER_TIMER("sumCost");
cost = Argument::sumCosts(*outArgs); cost = Argument::sum(*outArgs);
} }
if (batchId % intconfig_->log_period == 0) { if (batchId % intconfig_->log_period == 0) {
......
...@@ -65,14 +65,18 @@ def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE): ...@@ -65,14 +65,18 @@ def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
return InputType(dim, seq_type, DataType.SparseValue) return InputType(dim, seq_type, DataType.SparseValue)
def index_slot(dim, seq_type=SequenceType.NO_SEQUENCE): def index_slot(value_range, seq_type=SequenceType.NO_SEQUENCE):
return InputType(dim, seq_type, DataType.Index) """Data type of integer.
:param value_range: range of this integer.
"""
return InputType(value_range, seq_type, DataType.Index)
dense_vector = dense_slot dense_vector = dense_slot
sparse_binary_vector = sparse_non_value_slot sparse_binary_vector = sparse_non_value_slot
sparse_vector = sparse_value_slot sparse_vector = sparse_value_slot
integer_value = index_slot integer_value = index_slot
integer_value.__doc__ = index_slot.__doc__
def dense_vector_sequence(dim): def dense_vector_sequence(dim):
...@@ -99,8 +103,11 @@ def sparse_vector_sub_sequence(dim): ...@@ -99,8 +103,11 @@ def sparse_vector_sub_sequence(dim):
return sparse_vector(dim, seq_type=SequenceType.SUB_SEQUENCE) return sparse_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
def integer_value_sequence(dim): def integer_value_sequence(value_range):
return integer_value(dim, seq_type=SequenceType.SEQUENCE) """Data type of a sequence of integer.
:param value_range: range of each element.
"""
return integer_value(value_range, seq_type=SequenceType.SEQUENCE)
def integer_value_sub_sequence(dim): def integer_value_sub_sequence(dim):
...@@ -108,6 +115,7 @@ def integer_value_sub_sequence(dim): ...@@ -108,6 +115,7 @@ def integer_value_sub_sequence(dim):
integer_sequence = integer_value_sequence integer_sequence = integer_value_sequence
integer_sequence.__doc__ = integer_value_sequence.__doc__
class SingleSlotWrapper(object): class SingleSlotWrapper(object):
......
...@@ -20,18 +20,19 @@ import event ...@@ -20,18 +20,19 @@ import event
import data_type import data_type
import topology import topology
import data_feeder import data_feeder
import networks
from . import dataset from . import dataset
from . import reader from . import reader
import attr import attr
import pooling import pooling
import inferencer import inference
import networks import networks
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
__all__ = [ __all__ = [
'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer',
'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader', 'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader',
'topology', 'networks', 'inferencer', 'infer' 'topology', 'networks', 'infer'
] ]
...@@ -43,4 +44,4 @@ def init(**kwargs): ...@@ -43,4 +44,4 @@ def init(**kwargs):
api.initPaddle(*args) api.initPaddle(*args)
infer = inferencer.infer infer = inference.infer
...@@ -22,6 +22,7 @@ class Layer(object): ...@@ -22,6 +22,7 @@ class Layer(object):
def __init__(self, name=None, parent_layers=None): def __init__(self, name=None, parent_layers=None):
assert isinstance(parent_layers, dict) assert isinstance(parent_layers, dict)
self.name = name self.name = name
self.__contex__ = {}
self.__parent_layers__ = parent_layers self.__parent_layers__ = parent_layers
def to_proto(self, context): def to_proto(self, context):
...@@ -39,16 +40,38 @@ class Layer(object): ...@@ -39,16 +40,38 @@ class Layer(object):
self.__parent_layers__[layer_name]) self.__parent_layers__[layer_name])
kwargs[layer_name] = v1_layer kwargs[layer_name] = v1_layer
if self.name is None: if self.context_name() is None:
return self.to_proto_impl(**kwargs) return self.to_proto_impl(**kwargs)
elif self.name not in context: elif self.context_name() not in context:
context[self.name] = self.to_proto_impl(**kwargs) context[self.context_name()] = self.to_proto_impl(**kwargs)
self.__contex__ = context
return context[self.name] if self.use_context_name():
return context[self.context_name()]
else:
return context[self.name]
def to_proto_impl(self, **kwargs): def to_proto_impl(self, **kwargs):
raise NotImplementedError() raise NotImplementedError()
def context_name(self):
"""
Context name means the context which stores `to_proto_impl` result.
If multiple layer share same context_name, the `to_proto_impl` of them
will be invoked only once.
"""
return self.name
def use_context_name(self):
return False
def calculate_size(self):
"""
lazy calculate size of the layer, should be called when to_proto_impl of
this layer is called.
:return:
"""
return self.__contex__[self.context_name()].size
def __convert_to_v2__(method_name, parent_names, is_default_name=True): def __convert_to_v2__(method_name, parent_names, is_default_name=True):
if is_default_name: if is_default_name:
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import mnist import mnist
import imikolov
import imdb
import cifar
import movielens
import conll05
import uci_housing
import sentiment
import wmt14
__all__ = ['mnist'] __all__ = [
'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment'
'uci_housing', 'wmt14'
]
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" """
CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html
""" """
import cPickle import cPickle
import itertools import itertools
import numpy import numpy
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import requests import requests
import hashlib import hashlib
import os import os
import shutil import shutil
import sys
__all__ = ['DATA_HOME', 'download', 'md5file'] __all__ = ['DATA_HOME', 'download', 'md5file']
...@@ -27,9 +42,24 @@ def download(url, module_name, md5sum): ...@@ -27,9 +42,24 @@ def download(url, module_name, md5sum):
filename = os.path.join(dirname, url.split('/')[-1]) filename = os.path.join(dirname, url.split('/')[-1])
if not (os.path.exists(filename) and md5file(filename) == md5sum): if not (os.path.exists(filename) and md5file(filename) == md5sum):
print "Cache file %s not found, downloading %s" % (filename, url)
r = requests.get(url, stream=True) r = requests.get(url, stream=True)
with open(filename, 'w') as f: total_length = r.headers.get('content-length')
shutil.copyfileobj(r.raw, f)
if total_length is None:
with open(filename, 'w') as f:
shutil.copyfileobj(r.raw, f)
else:
with open(filename, 'w') as f:
dl = 0
total_length = int(total_length)
for data in r.iter_content(chunk_size=4096):
dl += len(data)
f.write(data)
done = int(50 * dl / total_length)
sys.stdout.write("\r[%s%s]" % ('=' * done,
' ' * (50 - done)))
sys.stdout.flush()
return filename return filename
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tarfile
import gzip
import itertools
from common import download
__all__ = ['test, get_dict', 'get_embedding']
"""
Conll 2005 dataset. Paddle semantic role labeling Book and demo use this
dataset as an example. Because Conll 2005 is not free in public, the default
downloaded URL is test set of Conll 2005 (which is public). Users can change
URL and MD5 to their Conll dataset.
"""
DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz'
DATA_MD5 = '387719152ae52d60422c016e92a742fc'
WORDDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt'
WORDDICT_MD5 = 'ea7fb7d4c75cc6254716f0177a506baa'
VERBDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt'
VERBDICT_MD5 = '0d2977293bbb6cbefab5b0f97db1e77c'
TRGDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt'
TRGDICT_MD5 = 'd8c7f03ceb5fc2e5a0fa7503a4353751'
EMB_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb'
EMB_MD5 = 'bf436eb0faa1f6f9103017f8be57cdb7'
UNK_IDX = 0
def load_dict(filename):
d = dict()
with open(filename, 'r') as f:
for i, line in enumerate(f):
d[line.strip()] = i
return d
def corpus_reader(data_path, words_name, props_name):
"""
Read one corpus. It returns an iterator. Each element of
this iterator is a tuple including sentence and labels. The sentence is
consist of a list of word IDs. The labels include a list of label IDs.
:return: a iterator of data.
:rtype: iterator
"""
def reader():
tf = tarfile.open(data_path)
wf = tf.extractfile(words_name)
pf = tf.extractfile(props_name)
with gzip.GzipFile(fileobj=wf) as words_file, gzip.GzipFile(
fileobj=pf) as props_file:
sentences = []
labels = []
one_seg = []
for word, label in itertools.izip(words_file, props_file):
word = word.strip()
label = label.strip().split()
if len(label) == 0: # end of sentence
for i in xrange(len(one_seg[0])):
a_kind_lable = [x[i] for x in one_seg]
labels.append(a_kind_lable)
if len(labels) >= 1:
verb_list = []
for x in labels[0]:
if x != '-':
verb_list.append(x)
for i, lbl in enumerate(labels[1:]):
cur_tag = 'O'
is_in_bracket = False
lbl_seq = []
verb_word = ''
for l in lbl:
if l == '*' and is_in_bracket == False:
lbl_seq.append('O')
elif l == '*' and is_in_bracket == True:
lbl_seq.append('I-' + cur_tag)
elif l == '*)':
lbl_seq.append('I-' + cur_tag)
is_in_bracket = False
elif l.find('(') != -1 and l.find(')') != -1:
cur_tag = l[1:l.find('*')]
lbl_seq.append('B-' + cur_tag)
is_in_bracket = False
elif l.find('(') != -1 and l.find(')') == -1:
cur_tag = l[1:l.find('*')]
lbl_seq.append('B-' + cur_tag)
is_in_bracket = True
else:
raise RuntimeError('Unexpected label: %s' %
l)
yield sentences, verb_list[i], lbl_seq
sentences = []
labels = []
one_seg = []
else:
sentences.append(word)
one_seg.append(label)
pf.close()
wf.close()
tf.close()
return reader
def reader_creator(corpus_reader,
word_dict=None,
predicate_dict=None,
label_dict=None):
def reader():
for sentence, predicate, labels in corpus_reader():
sen_len = len(sentence)
verb_index = labels.index('B-V')
mark = [0] * len(labels)
if verb_index > 0:
mark[verb_index - 1] = 1
ctx_n1 = sentence[verb_index - 1]
else:
ctx_n1 = 'bos'
if verb_index > 1:
mark[verb_index - 2] = 1
ctx_n2 = sentence[verb_index - 2]
else:
ctx_n2 = 'bos'
mark[verb_index] = 1
ctx_0 = sentence[verb_index]
if verb_index < len(labels) - 1:
mark[verb_index + 1] = 1
ctx_p1 = sentence[verb_index + 1]
else:
ctx_p1 = 'eos'
if verb_index < len(labels) - 2:
mark[verb_index + 2] = 1
ctx_p2 = sentence[verb_index + 2]
else:
ctx_p2 = 'eos'
word_idx = [word_dict.get(w, UNK_IDX) for w in sentence]
ctx_n2_idx = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_idx = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_idx = [word_dict.get(ctx_0, UNK_IDX)] * sen_len
ctx_p1_idx = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len
ctx_p2_idx = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len
pred_idx = [predicate_dict.get(predicate)] * sen_len
label_idx = [label_dict.get(w) for w in labels]
yield word_idx, ctx_n2_idx, ctx_n1_idx, \
ctx_0_idx, ctx_p1_idx, ctx_p2_idx, pred_idx, mark, label_idx
return reader
def get_dict():
word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5))
verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5))
label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5))
return word_dict, verb_dict, label_dict
def get_embedding():
return download(EMB_URL, 'conll05st', EMB_MD5)
def test():
word_dict, verb_dict, label_dict = get_dict()
reader = corpus_reader(
download(DATA_URL, 'conll05st', DATA_MD5),
words_name='conll05st-release/test.wsj/words/test.wsj.words.gz',
props_name='conll05st-release/test.wsj/props/test.wsj.props.gz')
return reader_creator(reader, word_dict, verb_dict, label_dict)
# /usr/bin/env python
# -*- coding:utf-8 -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
...@@ -17,6 +14,7 @@ ...@@ -17,6 +14,7 @@
""" """
IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
""" """
import paddle.v2.dataset.common import paddle.v2.dataset.common
import tarfile import tarfile
import Queue import Queue
...@@ -118,3 +116,8 @@ def test(word_idx): ...@@ -118,3 +116,8 @@ def test(word_idx):
return reader_creator( return reader_creator(
re.compile("aclImdb/test/pos/.*\.txt$"), re.compile("aclImdb/test/pos/.*\.txt$"),
re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000) re.compile("aclImdb/test/neg/.*\.txt$"), word_idx, 1000)
def word_dict():
return build_dict(
re.compile("aclImdb/((train)|(test))/((pos)|(neg))/.*\.txt$"), 150)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" """
imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
""" """
import paddle.v2.dataset.common import paddle.v2.dataset.common
import tarfile import tarfile
__all__ = ['train', 'test'] __all__ = ['train', 'test', 'build_dict']
URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz' URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
MD5 = '30177ea32e27c525793142b6bf2c8e2d' MD5 = '30177ea32e27c525793142b6bf2c8e2d'
...@@ -24,7 +37,9 @@ def word_count(f, word_freq=None): ...@@ -24,7 +37,9 @@ def word_count(f, word_freq=None):
return word_freq return word_freq
def build_dict(train_filename, test_filename): def build_dict():
train_filename = './simple-examples/data/ptb.train.txt'
test_filename = './simple-examples/data/ptb.valid.txt'
with tarfile.open( with tarfile.open(
paddle.v2.dataset.common.download( paddle.v2.dataset.common.download(
paddle.v2.dataset.imikolov.URL, 'imikolov', paddle.v2.dataset.imikolov.URL, 'imikolov',
...@@ -32,27 +47,22 @@ def build_dict(train_filename, test_filename): ...@@ -32,27 +47,22 @@ def build_dict(train_filename, test_filename):
trainf = tf.extractfile(train_filename) trainf = tf.extractfile(train_filename)
testf = tf.extractfile(test_filename) testf = tf.extractfile(test_filename)
word_freq = word_count(testf, word_count(trainf)) word_freq = word_count(testf, word_count(trainf))
if '<unk>' in word_freq:
# remove <unk> for now, since we will set it as last index
del word_freq['<unk>']
TYPO_FREQ = 50 TYPO_FREQ = 50
word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items()) word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items())
dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0])) word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*dictionary)) words, _ = list(zip(*word_freq_sorted))
word_idx = dict(zip(words, xrange(len(words)))) word_idx = dict(zip(words, xrange(len(words))))
word_idx['<unk>'] = len(words) word_idx['<unk>'] = len(words)
return word_idx return word_idx
word_idx = {} def reader_creator(filename, word_idx, n):
def reader_creator(filename, n):
global word_idx
if len(word_idx) == 0:
word_idx = build_dict('./simple-examples/data/ptb.train.txt',
'./simple-examples/data/ptb.valid.txt')
def reader(): def reader():
with tarfile.open( with tarfile.open(
paddle.v2.dataset.common.download( paddle.v2.dataset.common.download(
...@@ -71,9 +81,9 @@ def reader_creator(filename, n): ...@@ -71,9 +81,9 @@ def reader_creator(filename, n):
return reader return reader
def train(n): def train(word_idx, n):
return reader_creator('./simple-examples/data/ptb.train.txt', n) return reader_creator('./simple-examples/data/ptb.train.txt', word_idx, n)
def test(n): def test(word_idx, n):
return reader_creator('./simple-examples/data/ptb.valid.txt', n) return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" """
MNIST dataset. MNIST dataset.
""" """
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import zipfile import zipfile
from common import download from common import download
import re import re
......
# /usr/bin/env python
# -*- coding:utf-8 -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
The script fetch and preprocess movie_reviews data set
that provided by NLTK
"""
import common
import collections
import nltk
import numpy as np
from itertools import chain
from nltk.corpus import movie_reviews
__all__ = ['train', 'test', 'get_word_dict']
NUM_TRAINING_INSTANCES = 1600
NUM_TOTAL_INSTANCES = 2000
def download_data_if_not_yet():
"""
Download the data set, if the data set is not download.
"""
try:
# make sure that nltk can find the data
if common.DATA_HOME not in nltk.data.path:
nltk.data.path.append(common.DATA_HOME)
movie_reviews.categories()
except LookupError:
print "Downloading movie_reviews data set, please wait....."
nltk.download('movie_reviews', download_dir=common.DATA_HOME)
print "Download data set success....."
print "Path is " + nltk.data.find('corpora/movie_reviews').path
def get_word_dict():
"""
Sorted the words by the frequency of words which occur in sample
:return:
words_freq_sorted
"""
words_freq_sorted = list()
word_freq_dict = collections.defaultdict(int)
download_data_if_not_yet()
for category in movie_reviews.categories():
for field in movie_reviews.fileids(category):
for words in movie_reviews.words(field):
word_freq_dict[words] += 1
words_sort_list = word_freq_dict.items()
words_sort_list.sort(cmp=lambda a, b: b[1] - a[1])
for index, word in enumerate(words_sort_list):
words_freq_sorted.append((word[0], index))
return words_freq_sorted
def sort_files():
"""
Sorted the sample for cross reading the sample
:return:
files_list
"""
files_list = list()
neg_file_list = movie_reviews.fileids('neg')
pos_file_list = movie_reviews.fileids('pos')
files_list = list(chain.from_iterable(zip(neg_file_list, pos_file_list)))
return files_list
def load_sentiment_data():
"""
Load the data set
:return:
data_set
"""
data_set = list()
download_data_if_not_yet()
words_ids = dict(get_word_dict())
for sample_file in sort_files():
words_list = list()
category = 0 if 'neg' in sample_file else 1
for word in movie_reviews.words(sample_file):
words_list.append(words_ids[word.lower()])
data_set.append((words_list, category))
return data_set
def reader_creator(data):
"""
Reader creator, generate an iterator for data set
:param data:
train data set or test data set
"""
for each in data:
yield each[0], each[1]
def train():
"""
Default train set reader creator
"""
data_set = load_sentiment_data()
return reader_creator(data_set[0:NUM_TRAINING_INSTANCES])
def test():
"""
Default test set reader creator
"""
data_set = load_sentiment_data()
return reader_creator(data_set[NUM_TRAINING_INSTANCES:])
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.cifar import paddle.v2.dataset.cifar
import unittest import unittest
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.common import paddle.v2.dataset.common
import unittest import unittest
import tempfile import tempfile
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.imdb import paddle.v2.dataset.imdb
import unittest import unittest
import re import re
......
import paddle.v2.dataset.imikolov import paddle.v2.dataset.imikolov
import unittest import unittest
WORD_DICT = paddle.v2.dataset.imikolov.build_dict()
class TestMikolov(unittest.TestCase): class TestMikolov(unittest.TestCase):
def check_reader(self, reader, n): def check_reader(self, reader, n):
...@@ -9,11 +11,15 @@ class TestMikolov(unittest.TestCase): ...@@ -9,11 +11,15 @@ class TestMikolov(unittest.TestCase):
def test_train(self): def test_train(self):
n = 5 n = 5
self.check_reader(paddle.v2.dataset.imikolov.train(n), n) self.check_reader(paddle.v2.dataset.imikolov.train(WORD_DICT, n), n)
def test_test(self): def test_test(self):
n = 5 n = 5
self.check_reader(paddle.v2.dataset.imikolov.test(n), n) self.check_reader(paddle.v2.dataset.imikolov.test(WORD_DICT, n), n)
def test_total(self):
_, idx = zip(*WORD_DICT.items())
self.assertEqual(sorted(idx)[-1], len(WORD_DICT) - 1)
if __name__ == '__main__': if __name__ == '__main__':
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.mnist import paddle.v2.dataset.mnist
import unittest import unittest
......
# /usr/bin/env python
# -*- coding:utf-8 -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import nltk
import paddle.v2.dataset.sentiment as st
from nltk.corpus import movie_reviews
class TestSentimentMethods(unittest.TestCase):
def test_get_word_dict(self):
word_dict = st.get_word_dict()[0:10]
test_word_list = [(u',', 0), (u'the', 1), (u'.', 2), (u'a', 3),
(u'and', 4), (u'of', 5), (u'to', 6), (u"'", 7),
(u'is', 8), (u'in', 9)]
for idx, each in enumerate(word_dict):
self.assertEqual(each, test_word_list[idx])
self.assertTrue("/root/.cache/paddle/dataset" in nltk.data.path)
def test_sort_files(self):
last_label = ''
for sample_file in st.sort_files():
current_label = sample_file.split("/")[0]
self.assertNotEqual(current_label, last_label)
last_label = current_label
def test_data_set(self):
data_set = st.load_sentiment_data()
last_label = -1
for each in st.test():
self.assertNotEqual(each[1], last_label)
last_label = each[1]
self.assertEqual(len(data_set), st.NUM_TOTAL_INSTANCES)
self.assertEqual(len(list(st.train())), st.NUM_TRAINING_INSTANCES)
self.assertEqual(
len(list(st.test())),
(st.NUM_TOTAL_INSTANCES - st.NUM_TRAINING_INSTANCES))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import os
from common import download
__all__ = ['train', 'test']
URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data'
MD5 = 'd4accdce7a25600298819f8e28e8d593'
feature_names = [
'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
'PTRATIO', 'B', 'LSTAT'
]
UCI_TRAIN_DATA = None
UCI_TEST_DATA = None
def feature_range(maximums, minimums):
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
feature_num = len(maximums)
ax.bar(range(feature_num), maximums - minimums, color='r', align='center')
ax.set_title('feature scale')
plt.xticks(range(feature_num), feature_names)
plt.xlim([-1, feature_num])
fig.set_figheight(6)
fig.set_figwidth(10)
if not os.path.exists('./image'):
os.makedirs('./image')
fig.savefig('image/ranges.png', dpi=48)
plt.close(fig)
def load_data(filename, feature_num=14, ratio=0.8):
global UCI_TRAIN_DATA, UCI_TEST_DATA
if UCI_TRAIN_DATA is not None and UCI_TEST_DATA is not None:
return
data = np.fromfile(filename, sep=' ')
data = data.reshape(data.shape[0] / feature_num, feature_num)
maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum(
axis=0) / data.shape[0]
feature_range(maximums[:-1], minimums[:-1])
for i in xrange(feature_num - 1):
data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i])
offset = int(data.shape[0] * ratio)
UCI_TRAIN_DATA = data[:offset]
UCI_TEST_DATA = data[offset:]
def train():
global UCI_TRAIN_DATA
load_data(download(URL, 'uci_housing', MD5))
def reader():
for d in UCI_TRAIN_DATA:
yield d[:-1], d[-1:]
return reader
def test():
global UCI_TEST_DATA
load_data(download(URL, 'uci_housing', MD5))
def reader():
for d in UCI_TEST_DATA:
yield d[:-1], d[-1:]
return reader
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
wmt14 dataset
"""
import paddle.v2.dataset.common
import tarfile
import os.path
import itertools
__all__ = ['train', 'test', 'build_dict']
URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
URL_TRAIN = 'http://localhost:8000/train.tgz'
MD5_TRAIN = '72de99da2830ea5a3a2c4eb36092bbc7'
def word_count(f, word_freq=None):
add = paddle.v2.dataset.common.dict_add
if word_freq == None:
word_freq = {}
for l in f:
for w in l.strip().split():
add(word_freq, w)
add(word_freq, '<s>')
add(word_freq, '<e>')
return word_freq
def get_word_dix(word_freq):
TYPO_FREQ = 50
word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items())
word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*word_freq_sorted))
word_idx = dict(zip(words, xrange(len(words))))
word_idx['<unk>'] = len(words)
return word_idx
def get_word_freq(train, dev):
word_freq = word_count(train, word_count(dev))
if '<unk>' in word_freq:
# remove <unk> for now, since we will set it as last index
del word_freq['<unk>']
return word_freq
def build_dict():
base_dir = './wmt14-data'
train_en_filename = base_dir + '/train/train.en'
train_fr_filename = base_dir + '/train/train.fr'
dev_en_filename = base_dir + '/dev/ntst1213.en'
dev_fr_filename = base_dir + '/dev/ntst1213.fr'
if not os.path.exists(train_en_filename) or not os.path.exists(
train_fr_filename):
with tarfile.open(
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14',
MD5_TRAIN)) as tf:
tf.extractall(base_dir)
if not os.path.exists(dev_en_filename) or not os.path.exists(
dev_fr_filename):
with tarfile.open(
paddle.v2.dataset.common.download(URL_DEV_TEST, 'wmt14',
MD5_DEV_TEST)) as tf:
tf.extractall(base_dir)
f_en = open(train_en_filename)
f_fr = open(train_fr_filename)
f_en_dev = open(dev_en_filename)
f_fr_dev = open(dev_fr_filename)
word_freq_en = get_word_freq(f_en, f_en_dev)
word_freq_fr = get_word_freq(f_fr, f_fr_dev)
f_en.close()
f_fr.close()
f_en_dev.close()
f_fr_dev.close()
return get_word_dix(word_freq_en), get_word_dix(word_freq_fr)
def reader_creator(directory, path_en, path_fr, URL, MD5, dict_en, dict_fr):
def reader():
if not os.path.exists(path_en) or not os.path.exists(path_fr):
with tarfile.open(
paddle.v2.dataset.common.download(URL, 'wmt14', MD5)) as tf:
tf.extractall(directory)
f_en = open(path_en)
f_fr = open(path_fr)
UNK_en = dict_en['<unk>']
UNK_fr = dict_fr['<unk>']
for en, fr in itertools.izip(f_en, f_fr):
src_ids = [dict_en.get(w, UNK_en) for w in en.strip().split()]
tar_ids = [
dict_fr.get(w, UNK_fr)
for w in ['<s>'] + fr.strip().split() + ['<e>']
]
# remove sequence whose length > 80 in training mode
if len(src_ids) == 0 or len(tar_ids) <= 1 or len(
src_ids) > 80 or len(tar_ids) > 80:
continue
yield src_ids, tar_ids[:-1], tar_ids[1:]
f_en.close()
f_fr.close()
return reader
def train(dict_en, dict_fr):
directory = './wmt14-data'
return reader_creator(directory, directory + '/train/train.en',
directory + '/train/train.fr', URL_TRAIN, MD5_TRAIN,
dict_en, dict_fr)
def test(dict_en, dict_fr):
directory = './wmt14-data'
return reader_creator(directory, directory + '/dev/ntst1213.en',
directory + '/dev/ntst1213.fr', URL_DEV_TEST,
MD5_DEV_TEST, dict_en, dict_fr)
...@@ -34,8 +34,9 @@ class WithMetric(object): ...@@ -34,8 +34,9 @@ class WithMetric(object):
class TestResult(WithMetric): class TestResult(WithMetric):
def __init__(self, evaluator): def __init__(self, evaluator, cost):
super(TestResult, self).__init__(evaluator) super(TestResult, self).__init__(evaluator)
self.cost = cost
class BeginPass(object): class BeginPass(object):
......
...@@ -5,7 +5,7 @@ from data_feeder import DataFeeder ...@@ -5,7 +5,7 @@ from data_feeder import DataFeeder
import itertools import itertools
import numpy import numpy
__all__ = ['Inference', 'infer'] __all__ = ['infer']
class Inference(object): class Inference(object):
......
...@@ -12,72 +12,42 @@ ...@@ -12,72 +12,42 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
Before this new package paddle.v2.layer, users would need to use functions `paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2,
in paddle.trainer_config_helpers.layers to configure networks. we want to make Paddle a plain Python package. The model config package defined
the way how to configure a neural network topology in Paddle Python code.
The Old Way:
========= The primary usage shows below.
This old way requires that the creation of a network be defined in a Python
function, say network_config, and that this Python function being passed to .. code-block:: python
paddle.trainer_config_helpers.parse_network_config for the creation of
protobuf message description of this network. import paddle.v2 as paddle
```python img = paddle.layer.data(name='img', type=paddle.data_type.dense_vector(784))
def network_config(): hidden = paddle.layer.fc(input=img, size=200)
img = paddle.trainer_config_helpers.data_layer(name="pixel", size=784) prediction = paddle.layer.fc(input=hidden, size=10,
inference = paddle.trainer_config_helpers.fc_layer( act=paddle.activation.Softmax())
input=img,
size=10, # use prediction instance where needed.
act=paddle.trainer_config_helpers.SoftmaxActivation()) parameters = paddle.v2.parameters.create(cost)
cost = paddle.trainer_config_helpers.classification_cost(
input=inference,
label=paddle.trainer_config_helpers.data_layer(name="label", size=10))
proto_desc = parse_network_config(network_config)
```
When parse_network_config executes network_config, those layer definition
functions like data_layer and fc_layer would change some Python global variables,
so that after the execution, parse_network_config could collect information from
these global variables and generates the protobuf message.
The New Way:
=========
In this PR, we define a function in paddle.v2.layer which creates a Python
class for each layer creation function in paddle.trainer_config_helpers.layers.
Users can use create a network as follows:
```python
img = paddle.v2.layer.data(name="pixel", size=784)
inference = paddle.v2.layer.fc(input=img, size=10, act=paddle.v2.layer.Softmax())
cost = paddle.v2.layer.classification(
input=inference,
label=paddle.v2.layer.data(name="label", size=10))
parameters = paddle.v2.parameters.create(cost)
```
This new way doesn't require those invocations to layer definition functions
to be in a Python function but could be anywhere.
Also, the creation of a protobuf message is hidden in the invocation of
paddle.v2.parameters.create, no longer exposed to users.
""" """
import collections
import inspect
from config_base import Layer, __convert_to_v2__ from config_base import Layer, __convert_to_v2__
import paddle.trainer_config_helpers as conf_helps import paddle.trainer_config_helpers as conf_helps
from paddle.trainer_config_helpers.config_parser_utils import \ from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as __parse__ parse_network_config as __parse__
from paddle.trainer_config_helpers.default_decorators import wrap_name_default
from paddle.trainer_config_helpers.default_decorators import wrap_act_default from paddle.trainer_config_helpers.default_decorators import wrap_act_default
from paddle.trainer_config_helpers.default_decorators import \ from paddle.trainer_config_helpers.default_decorators import \
wrap_bias_attr_default wrap_bias_attr_default
from paddle.trainer_config_helpers.default_decorators import wrap_name_default
from paddle.trainer_config_helpers.layers import layer_support from paddle.trainer_config_helpers.layers import layer_support
from paddle.trainer.config_parser import \
RecurrentLayerGroupWithoutOutLinksBegin, RecurrentLayerGroupSetOutLink, \
RecurrentLayerGroupEnd, model_type
import data_type
import activation import activation
import data_type
__all__ = ['parse_network', 'data'] __all__ = ['parse_network', 'data']
...@@ -130,6 +100,137 @@ class DataLayerV2(Layer): ...@@ -130,6 +100,137 @@ class DataLayerV2(Layer):
return getattr(conf_helps, self.__method_name__)(name=self.name, **args) return getattr(conf_helps, self.__method_name__)(name=self.name, **args)
class WithExtraParent(Layer):
def extra_parent(self):
return self.__extra_parent__
def __init__(self, name=None, parent_layers=None):
self.__extra_parent__ = []
super(WithExtraParent, self).__init__(
name=name, parent_layers=parent_layers)
def append_extra_parent(self, parent):
self.__extra_parent__.append(parent)
def to_proto(self, context):
"""
function to set proto attribute
"""
kwargs = dict()
for p in self.__extra_parent__:
p.to_proto(context=context)
for layer_name in self.__parent_layers__:
if not isinstance(self.__parent_layers__[layer_name],
collections.Sequence):
v1_layer = self.__parent_layers__[layer_name].to_proto(
context=context)
else:
v1_layer = map(lambda x: x.to_proto(context=context),
self.__parent_layers__[layer_name])
kwargs[layer_name] = v1_layer
if self.context_name() is None:
return self.to_proto_impl(context=context, **kwargs)
elif self.context_name() not in context:
context[self.context_name()] = self.to_proto_impl(
context=context, **kwargs)
if self.use_context_name():
return context[self.context_name()]
else:
return context[self.name]
class MemoryV2(WithExtraParent):
def __init__(self, name, **kwargs):
self.name = name
super(MemoryV2, self).__init__(name=name, parent_layers=dict())
self.__kwargs__ = kwargs
self.__boot_layer_name__ = None
if 'boot_layer' in kwargs:
begin_of_current_rnn = []
# TODO(yuyang18): Fix inspect, it could be wrong when user invoke a
# function inside step.
st = inspect.stack()
for i in xrange(len(st)):
locs = inspect.stack()[i][0].f_locals
keys = locs.keys()
for key in keys:
val = locs[key]
if isinstance(val, RecurrentLayerInput):
begin_of_current_rnn.append(val)
elif isinstance(val, collections.Sequence):
for v in val:
if isinstance(v, RecurrentLayerInput):
begin_of_current_rnn.append(v)
if begin_of_current_rnn:
break
assert begin_of_current_rnn is not None
for extra in begin_of_current_rnn:
self.append_extra_parent(extra)
assert isinstance(extra, WithExtraParent)
extra.append_extra_parent(kwargs['boot_layer'])
self.__boot_layer_name__ = kwargs['boot_layer'].name
def to_proto_impl(self, context, **kwargs):
args = dict()
for each in kwargs:
args[each] = kwargs[each]
for each in self.__kwargs__:
args[each] = self.__kwargs__[each]
if self.__boot_layer_name__ is not None:
args['boot_layer'] = context[self.__boot_layer_name__]
size = args.get('size', None)
if size is not None:
if callable(size):
real_size = size()
else:
real_size = size
args['size'] = real_size
return conf_helps.memory(name=self.name, **args)
def context_name(self):
return self.name + "#memory"
def use_context_name(self):
"""
memory layer will have the same name with some layer
:return:
"""
return True
class LayerOutputV2(Layer):
"""
LayerOutputV2 is used to store the result of LayerOutput in v1 api.
It will not store it's parents because layer_output has been parsed already.
"""
def __init__(self, layer_output):
assert isinstance(layer_output, conf_helps.LayerOutput)
self.layer_output = layer_output
super(LayerOutputV2, self).__init__(
name=layer_output.name, parent_layers=dict())
def to_proto_impl(self):
return self.layer_output
class StaticInputV2(object):
def __init__(self, input, is_seq=False, size=None):
assert isinstance(input, LayerV2)
self.name = input.name
self.input = input
self.is_seq = is_seq
self.size = size
# TODO(add size check)
# assert input.size is not None or size is not None
class MixedLayerV2(Layer): class MixedLayerV2(Layer):
""" """
This class is use to support `with` grammar. If not, the following code This class is use to support `with` grammar. If not, the following code
...@@ -161,7 +262,6 @@ class MixedLayerV2(Layer): ...@@ -161,7 +262,6 @@ class MixedLayerV2(Layer):
other_kwargs['act'] = act other_kwargs['act'] = act
other_kwargs['bias_attr'] = bias_attr other_kwargs['bias_attr'] = bias_attr
other_kwargs['layer_attr'] = layer_attr other_kwargs['layer_attr'] = layer_attr
parent_layers = {"input": self.__inputs__} parent_layers = {"input": self.__inputs__}
super(MixedLayerV2, self).__init__(name, parent_layers) super(MixedLayerV2, self).__init__(name, parent_layers)
self.__other_kwargs__ = other_kwargs self.__other_kwargs__ = other_kwargs
...@@ -171,7 +271,7 @@ class MixedLayerV2(Layer): ...@@ -171,7 +271,7 @@ class MixedLayerV2(Layer):
self.__inputs__.append(other) self.__inputs__.append(other)
return self return self
else: else:
raise MixedLayerTypeV2.AddToSealedMixedLayerExceptionV2() raise MixedLayerV2.AddToSealedMixedLayerExceptionV2()
def __enter__(self): def __enter__(self):
assert len(self.__inputs__) == 0 assert len(self.__inputs__) == 0
...@@ -186,6 +286,13 @@ class MixedLayerV2(Layer): ...@@ -186,6 +286,13 @@ class MixedLayerV2(Layer):
args[each] = kwargs[each] args[each] = kwargs[each]
for each in self.__other_kwargs__: for each in self.__other_kwargs__:
args[each] = self.__other_kwargs__[each] args[each] = self.__other_kwargs__[each]
size = args.get('size', None)
if size is not None:
if callable(size):
real_size = size()
else:
real_size = size
args['size'] = real_size
return getattr(conf_helps, self.__method_name__)(**args) return getattr(conf_helps, self.__method_name__)(**args)
...@@ -202,14 +309,51 @@ def mixed(size=0, ...@@ -202,14 +309,51 @@ def mixed(size=0,
return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) return MixedLayerV2(size, input, name, act, bias_attr, layer_attr)
class RecurrentLayerInput(WithExtraParent):
def __init__(self, recurrent_name, index, parent_layers):
assert len(parent_layers) == 1
self.__parents__ = parent_layers.values()[0]
super(RecurrentLayerInput, self).__init__(
name=self.__parents__[index].name, parent_layers=parent_layers)
self.__recurrent_name__ = recurrent_name
def context_name(self):
return self.__recurrent_name__ + ".begin"
def to_proto_impl(self, context, **kwargs):
model_type('recurrent_nn')
RecurrentLayerGroupWithoutOutLinksBegin(
name=self.__recurrent_name__,
in_links=map(lambda x: x.name, self.__parents__))
return self
class RecurrentLayerOutput(Layer):
def __init__(self, recurrent_name, index, parent_layers):
assert len(parent_layers) == 1
self.__parents__ = parent_layers.values()[0]
super(RecurrentLayerOutput, self).__init__(
name=self.__parents__[index].name, parent_layers=parent_layers)
self.__recurrent_name__ = recurrent_name
def context_name(self):
return self.__recurrent_name__ + ".end"
def to_proto_impl(self, **kwargs):
for l in self.__parents__:
RecurrentLayerGroupSetOutLink(l.name)
RecurrentLayerGroupEnd(name=self.__recurrent_name__)
LayerV2 = Layer LayerV2 = Layer
data = DataLayerV2 data = DataLayerV2
AggregateLevel = conf_helps.layers.AggregateLevel AggregateLevel = conf_helps.layers.AggregateLevel
ExpandLevel = conf_helps.layers.ExpandLevel ExpandLevel = conf_helps.layers.ExpandLevel
memory = MemoryV2
def __layer_name_mapping__(inname): def __layer_name_mapping__(inname):
if inname in ['data_layer', 'memory', 'mixed_layer']: if inname in ['data_layer', 'memory', 'mixed_layer', 'recurrent_group']:
# Do Not handle these layers # Do Not handle these layers
return return
elif inname == 'maxid_layer': elif inname == 'maxid_layer':
...@@ -231,8 +375,10 @@ def __layer_name_mapping__(inname): ...@@ -231,8 +375,10 @@ def __layer_name_mapping__(inname):
def __layer_name_mapping_parent_names__(inname): def __layer_name_mapping_parent_names__(inname):
all_args = getattr(conf_helps, inname).argspec.args all_args = getattr(conf_helps, inname).argspec.args
return filter( return filter(
lambda x: x in ['input1', 'input2','label', 'input', 'a', 'b', 'expand_as', lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b',
'weights', 'vectors', 'weight', 'score', 'left', 'right'], 'expand_as',
'weights', 'vectors', 'weight', 'score', 'left',
'right', 'output_mem'],
all_args) all_args)
...@@ -267,3 +413,54 @@ operator_list = [ ...@@ -267,3 +413,54 @@ operator_list = [
for op in operator_list: for op in operator_list:
globals()[op[0]] = __convert_to_v2__( globals()[op[0]] = __convert_to_v2__(
op[0], parent_names=op[1], is_default_name=False) op[0], parent_names=op[1], is_default_name=False)
@wrap_name_default()
def recurrent_group(step, input, name=None):
if not isinstance(input, collections.Sequence):
input = [input]
non_static_inputs = filter(lambda x: not isinstance(x, StaticInputV2),
input)
actual_input = [
RecurrentLayerInput(
recurrent_name=name,
index=i,
parent_layers={'recurrent_inputs': non_static_inputs})
for i in xrange(len(non_static_inputs))
]
def __real_step__(*args):
rnn_input = list(args)
static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input)
for static_input in static_inputs:
mem_name = "__%s_memory__" % static_input.input.name
mem = memory(
name=mem_name,
is_seq=static_input.is_seq,
size=static_input.input.calculate_size,
boot_layer=static_input.input)
with mixed(
name=mem_name,
size=static_input.input.calculate_size,
act=activation.Identity()) as mix:
mix += identity_projection(input=mem)
rnn_input.insert(input.index(static_input), mix)
return step(*rnn_input)
actual_output = __real_step__(*actual_input)
if not isinstance(actual_output, collections.Sequence):
actual_output = [actual_output]
retv = [
RecurrentLayerOutput(
recurrent_name=name,
index=i,
parent_layers={'recurrent_outputs': actual_output})
for i in xrange(len(actual_output))
]
if len(retv) == 1:
return retv[0]
else:
return retv
add_test(NAME test_v2_api
COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE})
add_test(NAME test_v2_layer add_test(NAME test_v2_layer
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
add_test(NAME test_v2_api add_test(NAME test_v2_rnn_layer
COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE}) COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_rnn_layer.py)
add_test(NAME topology_test add_test(NAME test_topology
COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/
${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_topology.py ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_topology.py
WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle)
...@@ -110,14 +110,14 @@ class DataFeederTest(unittest.TestCase): ...@@ -110,14 +110,14 @@ class DataFeederTest(unittest.TestCase):
self.assertAlmostEqual(value.all(), w[i].all()) self.assertAlmostEqual(value.all(), w[i].all())
def test_integer(self): def test_integer(self):
dim = 100 value_range = 100
batch_size = 32 batch_size = 32
index = [] index = []
for i in xrange(batch_size): for i in xrange(batch_size):
each_sample = [] each_sample = []
each_sample.append(np.random.randint(dim)) each_sample.append(np.random.randint(value_range))
index.append(each_sample) index.append(each_sample)
feeder = DataFeeder([('input', data_type.integer_value(dim))], feeder = DataFeeder([('input', data_type.integer_value(value_range))],
{'input': 0}) {'input': 0})
arg = feeder(index) arg = feeder(index)
output = arg.getSlotIds(0).copyToNumpyArray() output = arg.getSlotIds(0).copyToNumpyArray()
...@@ -125,7 +125,7 @@ class DataFeederTest(unittest.TestCase): ...@@ -125,7 +125,7 @@ class DataFeederTest(unittest.TestCase):
self.assertEqual(output.all(), index.flatten().all()) self.assertEqual(output.all(), index.flatten().all())
def test_integer_sequence(self): def test_integer_sequence(self):
dim = 10000 value_range = 10000
batch_size = 32 batch_size = 32
start = [0] start = [0]
data = [] data = []
...@@ -133,11 +133,12 @@ class DataFeederTest(unittest.TestCase): ...@@ -133,11 +133,12 @@ class DataFeederTest(unittest.TestCase):
each_sample = [] each_sample = []
each_sample.append( each_sample.append(
self.sparse_binary_reader( self.sparse_binary_reader(
dim, 30, non_empty=True)) value_range, 30, non_empty=True))
data.append(each_sample) data.append(each_sample)
start.append(len(each_sample[0]) + start[-1]) start.append(len(each_sample[0]) + start[-1])
feeder = DataFeeder([('input', data_type.integer_value_sequence(dim))], feeder = DataFeeder(
{'input': 0}) [('input', data_type.integer_value_sequence(value_range))],
{'input': 0})
arg = feeder(data) arg = feeder(data)
output_data = arg.getSlotIds(0).copyToNumpyArray() output_data = arg.getSlotIds(0).copyToNumpyArray()
output_start = arg.getSlotSequenceStartPositions(0).copyToNumpyArray() output_start = arg.getSlotSequenceStartPositions(0).copyToNumpyArray()
......
# Copyright PaddlePaddle contributors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import difflib
import unittest
import paddle.trainer_config_helpers as conf_helps
import paddle.v2.activation as activation
import paddle.v2.data_type as data_type
import paddle.v2.layer as layer
from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as parse_network
class RNNTest(unittest.TestCase):
def test_simple_rnn(self):
dict_dim = 10
word_dim = 8
hidden_dim = 8
def parse_old_rnn():
def step(y):
mem = conf_helps.memory(name="rnn_state", size=hidden_dim)
out = conf_helps.fc_layer(
input=[y, mem],
size=hidden_dim,
act=activation.Tanh(),
bias_attr=True,
name="rnn_state")
return out
def test():
data = conf_helps.data_layer(name="word", size=dict_dim)
embd = conf_helps.embedding_layer(input=data, size=word_dim)
conf_helps.recurrent_group(name="rnn", step=step, input=embd)
return str(parse_network(test))
def parse_new_rnn():
def new_step(y):
mem = layer.memory(name="rnn_state", size=hidden_dim)
out = layer.fc(input=[y, mem],
size=hidden_dim,
act=activation.Tanh(),
bias_attr=True,
name="rnn_state")
return out
data = layer.data(
name="word", type=data_type.integer_value(dict_dim))
embd = layer.embedding(input=data, size=word_dim)
rnn_layer = layer.recurrent_group(
name="rnn", step=new_step, input=embd)
return str(layer.parse_network(rnn_layer))
diff = difflib.unified_diff(parse_old_rnn().splitlines(1),
parse_new_rnn().splitlines(1))
print ''.join(diff)
def test_sequence_rnn_multi_input(self):
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3
def parse_old_rnn():
def test():
data = conf_helps.data_layer(name="word", size=dict_dim)
label = conf_helps.data_layer(name="label", size=label_dim)
emb = conf_helps.embedding_layer(input=data, size=word_dim)
boot_layer = conf_helps.data_layer(name="boot", size=10)
boot_layer = conf_helps.fc_layer(
name='boot_fc', input=boot_layer, size=10)
def step(y, wid):
z = conf_helps.embedding_layer(input=wid, size=word_dim)
mem = conf_helps.memory(
name="rnn_state",
size=hidden_dim,
boot_layer=boot_layer)
out = conf_helps.fc_layer(
input=[y, z, mem],
size=hidden_dim,
act=conf_helps.TanhActivation(),
bias_attr=True,
name="rnn_state")
return out
out = conf_helps.recurrent_group(
name="rnn", step=step, input=[emb, data])
rep = conf_helps.last_seq(input=out)
prob = conf_helps.fc_layer(
size=label_dim,
input=rep,
act=conf_helps.SoftmaxActivation(),
bias_attr=True)
conf_helps.outputs(
conf_helps.classification_cost(
input=prob, label=label))
return str(parse_network(test))
def parse_new_rnn():
data = layer.data(
name="word", type=data_type.dense_vector(dict_dim))
label = layer.data(
name="label", type=data_type.dense_vector(label_dim))
emb = layer.embedding(input=data, size=word_dim)
boot_layer = layer.data(
name="boot", type=data_type.dense_vector(10))
boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10)
def step(y, wid):
z = layer.embedding(input=wid, size=word_dim)
mem = layer.memory(
name="rnn_state", size=hidden_dim, boot_layer=boot_layer)
out = layer.fc(input=[y, z, mem],
size=hidden_dim,
act=activation.Tanh(),
bias_attr=True,
name="rnn_state")
return out
out = layer.recurrent_group(
name="rnn", step=step, input=[emb, data])
rep = layer.last_seq(input=out)
prob = layer.fc(size=label_dim,
input=rep,
act=activation.Softmax(),
bias_attr=True)
cost = layer.classification_cost(input=prob, label=label)
return str(layer.parse_network(cost))
diff = difflib.unified_diff(parse_old_rnn().splitlines(1),
parse_new_rnn().splitlines(1))
print ''.join(diff)
if __name__ == '__main__':
unittest.main()
...@@ -17,6 +17,7 @@ import collections ...@@ -17,6 +17,7 @@ import collections
from paddle.proto.ModelConfig_pb2 import ModelConfig from paddle.proto.ModelConfig_pb2 import ModelConfig
import layer as v2_layer import layer as v2_layer
from layer import WithExtraParent
__all__ = ['Topology'] __all__ = ['Topology']
...@@ -40,7 +41,10 @@ def __bfs_travel__(callback, *layers): ...@@ -40,7 +41,10 @@ def __bfs_travel__(callback, *layers):
__break__ = callback(each_layer) __break__ = callback(each_layer)
if __break__: if __break__:
return return
__bfs_travel__(callback, *each_layer.__parent_layers__.values()) __layers__ = each_layer.__parent_layers__.values()
if isinstance(each_layer, WithExtraParent):
__layers__ = __layers__ + each_layer.extra_parent()
__bfs_travel__(callback, *__layers__)
class Topology(object): class Topology(object):
......
...@@ -8,7 +8,7 @@ from . import event as v2_event ...@@ -8,7 +8,7 @@ from . import event as v2_event
from . import optimizer as v2_optimizer from . import optimizer as v2_optimizer
from . import parameters as v2_parameters from . import parameters as v2_parameters
__all__ = ['ITrainer', 'SGD'] __all__ = ['SGD']
def default_event_handler(event): def default_event_handler(event):
...@@ -22,26 +22,7 @@ def default_event_handler(event): ...@@ -22,26 +22,7 @@ def default_event_handler(event):
pass pass
class ITrainer(object): class SGD():
"""
The interface of Trainer. The only exposed method is `train`.
"""
def train(self, reader, topology, parameters, event_handler=None):
"""
train method.
:param reader:
:param topology:
:param parameters:
:param event_handler:
:return:
"""
raise NotImplementedError()
class SGD(ITrainer):
def __init__(self, cost, parameters, update_equation): def __init__(self, cost, parameters, update_equation):
""" """
Simple SGD Trainer. Simple SGD Trainer.
...@@ -108,9 +89,6 @@ class SGD(ITrainer): ...@@ -108,9 +89,6 @@ class SGD(ITrainer):
pass_evaluator.start() pass_evaluator.start()
updater.startPass() updater.startPass()
for batch_id, data_batch in enumerate(reader()): for batch_id, data_batch in enumerate(reader()):
pass_type = updater.startBatch(len(data_batch))
self.__gradient_machine__.forwardBackward(
feeder(data_batch), out_args, pass_type)
batch_evaluator.start() batch_evaluator.start()
event_handler( event_handler(
v2_event.BeginIteration( v2_event.BeginIteration(
...@@ -120,12 +98,11 @@ class SGD(ITrainer): ...@@ -120,12 +98,11 @@ class SGD(ITrainer):
feeder(data_batch), out_args, pass_type) feeder(data_batch), out_args, pass_type)
self.__gradient_machine__.eval(pass_evaluator) self.__gradient_machine__.eval(pass_evaluator)
self.__gradient_machine__.eval(batch_evaluator) self.__gradient_machine__.eval(batch_evaluator)
for each_param in self.__gradient_machine__.getParameters(): for each_param in self.__gradient_machine__.getNonStaticParameters(
):
updater.update(each_param) updater.update(each_param)
# Get cost. We use numpy to calculate total cost for this batch. cost_sum = out_args.sumCosts()
cost_vec = out_args.getSlotValue(0) cost = cost_sum / len(data_batch)
cost_vec = cost_vec.copyToNumpyMat()
cost = cost_vec.sum() / len(data_batch)
updater.finishBatch(cost) updater.finishBatch(cost)
batch_evaluator.finish() batch_evaluator.finish()
event_handler( event_handler(
...@@ -154,13 +131,18 @@ class SGD(ITrainer): ...@@ -154,13 +131,18 @@ class SGD(ITrainer):
evaluator = self.__gradient_machine__.makeEvaluator() evaluator = self.__gradient_machine__.makeEvaluator()
out_args = api.Arguments.createArguments(0) out_args = api.Arguments.createArguments(0)
evaluator.start() evaluator.start()
total_cost = 0
num_samples = 0.0
for data_batch in reader(): for data_batch in reader():
num_samples += len(data_batch)
self.__gradient_machine__.forward( self.__gradient_machine__.forward(
feeder(data_batch), out_args, api.PASS_TEST) feeder(data_batch), out_args, api.PASS_TEST)
total_cost += out_args.sumCosts()
self.__gradient_machine__.eval(evaluator) self.__gradient_machine__.eval(evaluator)
evaluator.finish() evaluator.finish()
return v2_event.TestResult(evaluator=evaluator) return v2_event.TestResult(
evaluator=evaluator, cost=total_cost / num_samples)
def __check_train_args__(reader, event_handler, **kwargs): def __check_train_args__(reader, event_handler, **kwargs):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册