提交 863511ee 编写于 作者: Y Yu Yang 提交者: GitHub

Merge branch 'develop' into batch

...@@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination ) ...@@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
${source} ${source}
${destination} ${destination}
COMMENT "Generating sphinx documentation: ${builder}" COMMENT "Generating sphinx documentation: ${builder}"
COMMAND ln -sf ${destination}/index_*.html ${destination}/index.html COMMAND cd ${destination} && ln -s ./index_*.html index.html
) )
set_property( set_property(
......
API中文手册 API
============ ===
\ No newline at end of file
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst
API API
=== ===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:
Model Config API Model Config API
---------------- ----------------
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1
trainer_config_helpers/optimizers.rst v2/model_configs.rst
trainer_config_helpers/data_sources.rst \ No newline at end of file
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_en.rst
API中文手册
============
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst
API
===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_en.rst
======
Layers
======
.. automodule:: paddle.v2.layer
:members:
...@@ -15,13 +15,19 @@ import sys ...@@ -15,13 +15,19 @@ import sys
import os, subprocess import os, subprocess
import shlex import shlex
from recommonmark import parser, transform from recommonmark import parser, transform
try:
import py_paddle
import paddle
import paddle.v2
except ImportError:
print("Must install paddle python package before generating documentation")
sys.exit(1)
MarkdownParser = parser.CommonMarkParser MarkdownParser = parser.CommonMarkParser
AutoStructify = transform.AutoStructify AutoStructify = transform.AutoStructify
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, '@PROJ_ROOT@/python')
templates_path = ["@PROJ_ROOT@/doc_theme/templates"] templates_path = ["@PROJ_ROOT@/doc_theme/templates"]
# -- General configuration ------------------------------------------------ # -- General configuration ------------------------------------------------
......
...@@ -15,14 +15,20 @@ import sys ...@@ -15,14 +15,20 @@ import sys
import os, subprocess import os, subprocess
import shlex import shlex
from recommonmark import parser, transform from recommonmark import parser, transform
try:
import py_paddle
import paddle
import paddle.v2
except ImportError:
print("Must install paddle python package before generating documentation")
sys.exit(1)
MarkdownParser = parser.CommonMarkParser MarkdownParser = parser.CommonMarkParser
AutoStructify = transform.AutoStructify AutoStructify = transform.AutoStructify
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, '@PROJ_ROOT@/python')
templates_path = ["@PROJ_ROOT@/doc_theme/templates"] templates_path = ["@PROJ_ROOT@/doc_theme/templates"]
# -- General configuration ------------------------------------------------ # -- General configuration ------------------------------------------------
......
...@@ -156,14 +156,14 @@ define_py_data_sources2(train_list='data/train.list', ...@@ -156,14 +156,14 @@ define_py_data_sources2(train_list='data/train.list',
obj="process", obj="process",
args={"dictionary": word_dict}) args={"dictionary": word_dict})
``` ```
You can refer to the following link for more detailed examples and data formats: <a href = "../../api/data_provider/pydataprovider2_en.html">PyDataProvider2</a>. You can refer to the following link for more detailed examples and data formats: <a href = "../../api/v1/data_provider/pydataprovider2_en.html">PyDataProvider2</a>.
## Network Architecture ## Network Architecture
We will describe four kinds of network architectures in this section. We will describe four kinds of network architectures in this section.
<center> ![](./src/PipelineNetwork_en.jpg) </center> <center> ![](./src/PipelineNetwork_en.jpg) </center>
First, you will build a logistic regression model. Later, you will also get chance to build other more powerful network architectures. First, you will build a logistic regression model. Later, you will also get chance to build other more powerful network architectures.
For more detailed documentation, you could refer to: <a href = "../../api/trainer_config_helpers/layers.html">layer documentation</a>. All configuration files are in `demo/quick_start` directory. For more detailed documentation, you could refer to: <a href = "../../api/v1/trainer_config_helpers/layers.html">layer documentation</a>. All configuration files are in `demo/quick_start` directory.
### Logistic Regression ### Logistic Regression
The architecture is illustrated in the following picture: The architecture is illustrated in the following picture:
...@@ -366,7 +366,7 @@ You can use single layer LSTM model with Dropout for our text classification pro ...@@ -366,7 +366,7 @@ You can use single layer LSTM model with Dropout for our text classification pro
<br> <br>
## Optimization Algorithm ## Optimization Algorithm
<a href = "../../api/trainer_config_helpers/optimizers.html">Optimization algorithms</a> include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network. <a href = "../../api/v1/trainer_config_helpers/optimizers.html">Optimization algorithms</a> include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network.
```python ```python
settings(batch_size=128, settings(batch_size=128,
...@@ -407,7 +407,7 @@ paddle train \ ...@@ -407,7 +407,7 @@ paddle train \
--init_model_path=./output/pass-0000x --init_model_path=./output/pass-0000x
``` ```
We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to <a href = "../../api/predict/swig_py_paddle_en.html">Python Prediction API</a> tutorial,or other <a href = "../../tutorials/index_en.html">demo</a> for the prediction process using Python. You can also use the following script for inference or evaluation. We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to <a href = "../../api/v1/predict/swig_py_paddle_en.html">Python Prediction API</a> tutorial,or other <a href = "../../tutorials/index_en.html">demo</a> for the prediction process using Python. You can also use the following script for inference or evaluation.
inference script (predict.sh): inference script (predict.sh):
......
...@@ -144,9 +144,7 @@ void Arguments::setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError) { ...@@ -144,9 +144,7 @@ void Arguments::setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError) {
a.cpuSequenceDims = m->cast<paddle::IVector>(vec->getSharedPtr()); a.cpuSequenceDims = m->cast<paddle::IVector>(vec->getSharedPtr());
} }
float Arguments::sumCosts() const { float Arguments::sum() const { return paddle::Argument::sum(m->outputs); }
return paddle::Argument::sumCosts(m->outputs);
}
int64_t Arguments::getBatchSize(size_t idx) const throw(RangeError) { int64_t Arguments::getBatchSize(size_t idx) const throw(RangeError) {
auto& a = m->getArg(idx); auto& a = m->getArg(idx);
......
...@@ -453,7 +453,7 @@ public: ...@@ -453,7 +453,7 @@ public:
IVector* vec) throw(RangeError); IVector* vec) throw(RangeError);
void setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError); void setSlotSequenceDim(size_t idx, IVector* vec) throw(RangeError);
float sumCosts() const; float sum() const;
private: private:
static Arguments* createByPaddleArgumentVector(void* ptr); static Arguments* createByPaddleArgumentVector(void* ptr);
......
...@@ -22,7 +22,7 @@ class TestArguments(unittest.TestCase): ...@@ -22,7 +22,7 @@ class TestArguments(unittest.TestCase):
args = swig_paddle.Arguments.createArguments(1) args = swig_paddle.Arguments.createArguments(1)
args.setSlotValue(0, m) args.setSlotValue(0, m)
self.assertAlmostEqual(27.0, args.sumCosts()) self.assertAlmostEqual(27.0, args.sum())
mat = args.getSlotValue(0) mat = args.getSlotValue(0)
assert isinstance(mat, swig_paddle.Matrix) assert isinstance(mat, swig_paddle.Matrix)
......
...@@ -24,7 +24,7 @@ real getCostSum(LayerPtr& testLayer, MatrixPtr weights) { ...@@ -24,7 +24,7 @@ real getCostSum(LayerPtr& testLayer, MatrixPtr weights) {
if (weights) { if (weights) {
outArgs[0].value->dotMul(*outArgs[0].value, *weights); outArgs[0].value->dotMul(*outArgs[0].value, *weights);
} }
return Argument::sumCosts(outArgs); return Argument::sum(outArgs);
} }
real getDiffAndPrint(real newCost1, real getDiffAndPrint(real newCost1,
...@@ -241,7 +241,7 @@ void testBatchState(LayerPtr testLayer, ...@@ -241,7 +241,7 @@ void testBatchState(LayerPtr testLayer,
std::vector<Argument> args; std::vector<Argument> args;
args.push_back(out); args.push_back(out);
EXPECT_EQ(0, Argument::sumCosts(args)) << "testBatchState failed"; EXPECT_EQ(0, Argument::sum(args)) << "testBatchState failed";
for (size_t seqId = 0; seqId < numSequences; ++seqId) { for (size_t seqId = 0; seqId < numSequences; ++seqId) {
start[seqId] += seqLens[seqId]; start[seqId] += seqLens[seqId];
} }
...@@ -672,7 +672,7 @@ void testLayerGradKernel(TestConfig testConf, ...@@ -672,7 +672,7 @@ void testLayerGradKernel(TestConfig testConf,
outArgs[0].value->dotMul(*testLayer->getOutput().value, *weights); outArgs[0].value->dotMul(*testLayer->getOutput().value, *weights);
} }
real cost = Argument::sumCosts(outArgs); real cost = Argument::sum(outArgs);
LOG(INFO) << " cost " << cost; LOG(INFO) << " cost " << cost;
EXPECT_FALSE(std::isnan(cost)); EXPECT_FALSE(std::isnan(cost));
......
...@@ -163,7 +163,7 @@ struct Argument { ...@@ -163,7 +163,7 @@ struct Argument {
: sequenceStartPositions->getData(false); : sequenceStartPositions->getData(false);
} }
static inline real sumCosts(const std::vector<Argument>& arguments) { static inline real sum(const std::vector<Argument>& arguments) {
real cost = 0; real cost = 0;
for (auto& arg : arguments) { for (auto& arg : arguments) {
if (arg.value) { if (arg.value) {
......
...@@ -2,8 +2,12 @@ ...@@ -2,8 +2,12 @@
# Add set -e, cd to directory. # Add set -e, cd to directory.
source ./common.sh source ./common.sh
# Compile Documentation only. # Compile Documentation only.
cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF ${EXTRA_CMAKE_OPTS}
mkdir output
make DESTDIR=./output install -j `nproc`
pip install ./output/usr/local/opt/paddle/share/wheels/*
rm -rf *
cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS} cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS}
make paddle_docs paddle_docs_cn make paddle_docs paddle_docs_cn
...@@ -25,26 +29,41 @@ TARGET_BRANCH="gh-pages" ...@@ -25,26 +29,41 @@ TARGET_BRANCH="gh-pages"
# Only deploy master branch to build latest documentation. # Only deploy master branch to build latest documentation.
SOURCE_BRANCH="master" SOURCE_BRANCH="master"
# If is not a Github pull request, and in master branch.
if [ "$TRAVIS_PULL_REQUEST" != "false" -o "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ]; then
exit 0
fi
# Clone the repo to output directory # Clone the repo to output directory
git clone $REPO output git clone $REPO output
cd output cd output
# checkout github page branch function deploy_docs() {
git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH SOURCE_BRANCH=$1
DIR=$2
# If is not a Github pull request
if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then
exit 0
fi
# If it is not watched branch.
if [ "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ]; then
return
fi
# remove old docs. mv new docs. # checkout github page branch
rm -rf doc doc_cn git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH
mv ../doc/cn/html doc_cn
mv ../doc/en/html doc mkdir -p ${DIR}
# remove old docs. mv new docs.
set +e
rm -rf ${DIR}/doc ${DIR}/doc_cn
set -e
mv ../doc/cn/html ${DIR}/doc_cn
mv ../doc/en/html ${DIR}/doc
git add .
}
deploy_docs "master" "."
deploy_docs "develop" "./develop/"
# Check is there anything changed. # Check is there anything changed.
set +e set +e
git diff --exit-code >/dev/null git diff --cached --exit-code >/dev/null
if [ $? -eq 0 ]; then if [ $? -eq 0 ]; then
echo "No changes to the output on this push; exiting." echo "No changes to the output on this push; exiting."
exit 0 exit 0
...@@ -57,7 +76,6 @@ if [ -n $SSL_KEY ]; then # Only push updated docs for github.com/PaddlePaddle/P ...@@ -57,7 +76,6 @@ if [ -n $SSL_KEY ]; then # Only push updated docs for github.com/PaddlePaddle/P
git config user.name "Travis CI" git config user.name "Travis CI"
git config user.email "paddle-dev@baidu.com" git config user.email "paddle-dev@baidu.com"
git commit -m "Deploy to GitHub Pages: ${SHA}" git commit -m "Deploy to GitHub Pages: ${SHA}"
# Set ssh private key # Set ssh private key
openssl aes-256-cbc -K $SSL_KEY -iv $SSL_IV -in ../../paddle/scripts/travis/deploy_key.enc -out deploy_key -d openssl aes-256-cbc -K $SSL_KEY -iv $SSL_IV -in ../../paddle/scripts/travis/deploy_key.enc -out deploy_key -d
chmod 600 deploy_key chmod 600 deploy_key
......
...@@ -208,7 +208,7 @@ real Tester::forwardOneBatch(const DataBatch& dataBatch, ...@@ -208,7 +208,7 @@ real Tester::forwardOneBatch(const DataBatch& dataBatch,
return 0.0; // In this case, there is no meaning to calculate cost return 0.0; // In this case, there is no meaning to calculate cost
} }
return Argument::sumCosts(outArgs); return Argument::sum(outArgs);
} }
void Tester::testOnePassBatch(int passId) { void Tester::testOnePassBatch(int passId) {
......
...@@ -310,7 +310,7 @@ real Trainer::checkGradient() { ...@@ -310,7 +310,7 @@ real Trainer::checkGradient() {
std::vector<Argument> outArgs; std::vector<Argument> outArgs;
trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC); trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
real cost = Argument::sumCosts(outArgs); real cost = Argument::sum(outArgs);
LOG(INFO) << "original cost=" << cost; LOG(INFO) << "original cost=" << cost;
trainerInternal_.getGradientMachine()->backward(); trainerInternal_.getGradientMachine()->backward();
...@@ -340,7 +340,7 @@ real Trainer::checkGradient() { ...@@ -340,7 +340,7 @@ real Trainer::checkGradient() {
parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara); parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara);
parameter->setValueUpdated(); parameter->setValueUpdated();
trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC); trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
real newCost1 = Argument::sumCosts(outArgs); real newCost1 = Argument::sum(outArgs);
for (size_t i = 0; i < dim; ++i) { for (size_t i = 0; i < dim; ++i) {
newp[i] = oldp[i] - step * d[i]; newp[i] = oldp[i] - step * d[i];
...@@ -349,7 +349,7 @@ real Trainer::checkGradient() { ...@@ -349,7 +349,7 @@ real Trainer::checkGradient() {
parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara); parameter->getBuf(PARAMETER_VALUE)->copyFrom(newPara);
parameter->setValueUpdated(); parameter->setValueUpdated();
trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC); trainerInternal_.getGradientMachine()->forward(inArgs, &outArgs, PASS_GC);
real newCost2 = Argument::sumCosts(outArgs); real newCost2 = Argument::sum(outArgs);
real trueDelta = 0.5 * (newCost1 - newCost2); real trueDelta = 0.5 * (newCost1 - newCost2);
real diff = (1e-20 + trueDelta) / (1e-20 + delta) - 1; real diff = (1e-20 + trueDelta) / (1e-20 + delta) - 1;
...@@ -575,7 +575,7 @@ real Trainer::calcGradient(const DataBatch& dataBatch, ...@@ -575,7 +575,7 @@ real Trainer::calcGradient(const DataBatch& dataBatch,
trainerInternal_.getGradientMachine()->forwardBackward( trainerInternal_.getGradientMachine()->forwardBackward(
inArgs, &outArgs, PASS_TRAIN); inArgs, &outArgs, PASS_TRAIN);
real cost = Argument::sumCosts(outArgs); real cost = Argument::sum(outArgs);
offset = 0; offset = 0;
for (auto& para : parameters) { for (auto& para : parameters) {
......
...@@ -134,7 +134,7 @@ void TrainerInternal::trainOneBatch(int64_t batchId, ...@@ -134,7 +134,7 @@ void TrainerInternal::trainOneBatch(int64_t batchId,
real cost = 0; real cost = 0;
{ {
REGISTER_TIMER("sumCost"); REGISTER_TIMER("sumCost");
cost = Argument::sumCosts(*outArgs); cost = Argument::sum(*outArgs);
} }
if (batchId % intconfig_->log_period == 0) { if (batchId % intconfig_->log_period == 0) {
......
...@@ -65,14 +65,18 @@ def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE): ...@@ -65,14 +65,18 @@ def sparse_value_slot(dim, seq_type=SequenceType.NO_SEQUENCE):
return InputType(dim, seq_type, DataType.SparseValue) return InputType(dim, seq_type, DataType.SparseValue)
def index_slot(dim, seq_type=SequenceType.NO_SEQUENCE): def index_slot(value_range, seq_type=SequenceType.NO_SEQUENCE):
return InputType(dim, seq_type, DataType.Index) """Data type of integer.
:param value_range: range of this integer.
"""
return InputType(value_range, seq_type, DataType.Index)
dense_vector = dense_slot dense_vector = dense_slot
sparse_binary_vector = sparse_non_value_slot sparse_binary_vector = sparse_non_value_slot
sparse_vector = sparse_value_slot sparse_vector = sparse_value_slot
integer_value = index_slot integer_value = index_slot
integer_value.__doc__ = index_slot.__doc__
def dense_vector_sequence(dim): def dense_vector_sequence(dim):
...@@ -99,8 +103,11 @@ def sparse_vector_sub_sequence(dim): ...@@ -99,8 +103,11 @@ def sparse_vector_sub_sequence(dim):
return sparse_vector(dim, seq_type=SequenceType.SUB_SEQUENCE) return sparse_vector(dim, seq_type=SequenceType.SUB_SEQUENCE)
def integer_value_sequence(dim): def integer_value_sequence(value_range):
return integer_value(dim, seq_type=SequenceType.SEQUENCE) """Data type of a sequence of integer.
:param value_range: range of each element.
"""
return integer_value(value_range, seq_type=SequenceType.SEQUENCE)
def integer_value_sub_sequence(dim): def integer_value_sub_sequence(dim):
...@@ -108,6 +115,7 @@ def integer_value_sub_sequence(dim): ...@@ -108,6 +115,7 @@ def integer_value_sub_sequence(dim):
integer_sequence = integer_value_sequence integer_sequence = integer_value_sequence
integer_sequence.__doc__ = integer_value_sequence.__doc__
class SingleSlotWrapper(object): class SingleSlotWrapper(object):
......
...@@ -25,7 +25,7 @@ from . import dataset ...@@ -25,7 +25,7 @@ from . import dataset
from . import reader from . import reader
import attr import attr
import pooling import pooling
import inferencer import inference
import networks import networks
import py_paddle.swig_paddle as api import py_paddle.swig_paddle as api
import minibatch import minibatch
...@@ -33,7 +33,7 @@ import minibatch ...@@ -33,7 +33,7 @@ import minibatch
__all__ = [ __all__ = [
'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer',
'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader', 'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader',
'topology', 'networks', 'inferencer', 'infer' 'topology', 'networks', 'infer'
] ]
...@@ -44,6 +44,5 @@ def init(**kwargs): ...@@ -44,6 +44,5 @@ def init(**kwargs):
api.initPaddle(*args) api.initPaddle(*args)
infer = inferencer.infer infer = inferencer.infer
batch = minibatch.batch batch = minibatch.batch
\ No newline at end of file
...@@ -20,8 +20,9 @@ import movielens ...@@ -20,8 +20,9 @@ import movielens
import conll05 import conll05
import uci_housing import uci_housing
import sentiment import sentiment
import wmt14
__all__ = [ __all__ = [
'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment' 'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment'
'uci_housing' 'uci_housing', 'wmt14'
] ]
...@@ -17,7 +17,7 @@ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/ ...@@ -17,7 +17,7 @@ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
import paddle.v2.dataset.common import paddle.v2.dataset.common
import tarfile import tarfile
__all__ = ['train', 'test'] __all__ = ['train', 'test', 'build_dict']
URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz' URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz'
MD5 = '30177ea32e27c525793142b6bf2c8e2d' MD5 = '30177ea32e27c525793142b6bf2c8e2d'
...@@ -37,7 +37,9 @@ def word_count(f, word_freq=None): ...@@ -37,7 +37,9 @@ def word_count(f, word_freq=None):
return word_freq return word_freq
def build_dict(train_filename, test_filename): def build_dict():
train_filename = './simple-examples/data/ptb.train.txt'
test_filename = './simple-examples/data/ptb.valid.txt'
with tarfile.open( with tarfile.open(
paddle.v2.dataset.common.download( paddle.v2.dataset.common.download(
paddle.v2.dataset.imikolov.URL, 'imikolov', paddle.v2.dataset.imikolov.URL, 'imikolov',
...@@ -45,27 +47,22 @@ def build_dict(train_filename, test_filename): ...@@ -45,27 +47,22 @@ def build_dict(train_filename, test_filename):
trainf = tf.extractfile(train_filename) trainf = tf.extractfile(train_filename)
testf = tf.extractfile(test_filename) testf = tf.extractfile(test_filename)
word_freq = word_count(testf, word_count(trainf)) word_freq = word_count(testf, word_count(trainf))
if '<unk>' in word_freq:
# remove <unk> for now, since we will set it as last index
del word_freq['<unk>']
TYPO_FREQ = 50 TYPO_FREQ = 50
word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items()) word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items())
dictionary = sorted(word_freq, key=lambda x: (-x[1], x[0])) word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*dictionary)) words, _ = list(zip(*word_freq_sorted))
word_idx = dict(zip(words, xrange(len(words)))) word_idx = dict(zip(words, xrange(len(words))))
word_idx['<unk>'] = len(words) word_idx['<unk>'] = len(words)
return word_idx return word_idx
word_idx = {} def reader_creator(filename, word_idx, n):
def reader_creator(filename, n):
global word_idx
if len(word_idx) == 0:
word_idx = build_dict('./simple-examples/data/ptb.train.txt',
'./simple-examples/data/ptb.valid.txt')
def reader(): def reader():
with tarfile.open( with tarfile.open(
paddle.v2.dataset.common.download( paddle.v2.dataset.common.download(
...@@ -84,9 +81,9 @@ def reader_creator(filename, n): ...@@ -84,9 +81,9 @@ def reader_creator(filename, n):
return reader return reader
def train(n): def train(word_idx, n):
return reader_creator('./simple-examples/data/ptb.train.txt', n) return reader_creator('./simple-examples/data/ptb.train.txt', word_idx, n)
def test(n): def test(word_idx, n):
return reader_creator('./simple-examples/data/ptb.valid.txt', n) return reader_creator('./simple-examples/data/ptb.valid.txt', word_idx, n)
import paddle.v2.dataset.imikolov import paddle.v2.dataset.imikolov
import unittest import unittest
WORD_DICT = paddle.v2.dataset.imikolov.build_dict()
class TestMikolov(unittest.TestCase): class TestMikolov(unittest.TestCase):
def check_reader(self, reader, n): def check_reader(self, reader, n):
...@@ -9,11 +11,15 @@ class TestMikolov(unittest.TestCase): ...@@ -9,11 +11,15 @@ class TestMikolov(unittest.TestCase):
def test_train(self): def test_train(self):
n = 5 n = 5
self.check_reader(paddle.v2.dataset.imikolov.train(n), n) self.check_reader(paddle.v2.dataset.imikolov.train(WORD_DICT, n), n)
def test_test(self): def test_test(self):
n = 5 n = 5
self.check_reader(paddle.v2.dataset.imikolov.test(n), n) self.check_reader(paddle.v2.dataset.imikolov.test(WORD_DICT, n), n)
def test_total(self):
_, idx = zip(*WORD_DICT.items())
self.assertEqual(sorted(idx)[-1], len(WORD_DICT) - 1)
if __name__ == '__main__': if __name__ == '__main__':
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
wmt14 dataset
"""
import paddle.v2.dataset.common
import tarfile
import os.path
import itertools
__all__ = ['train', 'test', 'build_dict']
URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
URL_TRAIN = 'http://localhost:8000/train.tgz'
MD5_TRAIN = '72de99da2830ea5a3a2c4eb36092bbc7'
def word_count(f, word_freq=None):
add = paddle.v2.dataset.common.dict_add
if word_freq == None:
word_freq = {}
for l in f:
for w in l.strip().split():
add(word_freq, w)
add(word_freq, '<s>')
add(word_freq, '<e>')
return word_freq
def get_word_dix(word_freq):
TYPO_FREQ = 50
word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items())
word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*word_freq_sorted))
word_idx = dict(zip(words, xrange(len(words))))
word_idx['<unk>'] = len(words)
return word_idx
def get_word_freq(train, dev):
word_freq = word_count(train, word_count(dev))
if '<unk>' in word_freq:
# remove <unk> for now, since we will set it as last index
del word_freq['<unk>']
return word_freq
def build_dict():
base_dir = './wmt14-data'
train_en_filename = base_dir + '/train/train.en'
train_fr_filename = base_dir + '/train/train.fr'
dev_en_filename = base_dir + '/dev/ntst1213.en'
dev_fr_filename = base_dir + '/dev/ntst1213.fr'
if not os.path.exists(train_en_filename) or not os.path.exists(
train_fr_filename):
with tarfile.open(
paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14',
MD5_TRAIN)) as tf:
tf.extractall(base_dir)
if not os.path.exists(dev_en_filename) or not os.path.exists(
dev_fr_filename):
with tarfile.open(
paddle.v2.dataset.common.download(URL_DEV_TEST, 'wmt14',
MD5_DEV_TEST)) as tf:
tf.extractall(base_dir)
f_en = open(train_en_filename)
f_fr = open(train_fr_filename)
f_en_dev = open(dev_en_filename)
f_fr_dev = open(dev_fr_filename)
word_freq_en = get_word_freq(f_en, f_en_dev)
word_freq_fr = get_word_freq(f_fr, f_fr_dev)
f_en.close()
f_fr.close()
f_en_dev.close()
f_fr_dev.close()
return get_word_dix(word_freq_en), get_word_dix(word_freq_fr)
def reader_creator(directory, path_en, path_fr, URL, MD5, dict_en, dict_fr):
def reader():
if not os.path.exists(path_en) or not os.path.exists(path_fr):
with tarfile.open(
paddle.v2.dataset.common.download(URL, 'wmt14', MD5)) as tf:
tf.extractall(directory)
f_en = open(path_en)
f_fr = open(path_fr)
UNK_en = dict_en['<unk>']
UNK_fr = dict_fr['<unk>']
for en, fr in itertools.izip(f_en, f_fr):
src_ids = [dict_en.get(w, UNK_en) for w in en.strip().split()]
tar_ids = [
dict_fr.get(w, UNK_fr)
for w in ['<s>'] + fr.strip().split() + ['<e>']
]
# remove sequence whose length > 80 in training mode
if len(src_ids) == 0 or len(tar_ids) <= 1 or len(
src_ids) > 80 or len(tar_ids) > 80:
continue
yield src_ids, tar_ids[:-1], tar_ids[1:]
f_en.close()
f_fr.close()
return reader
def train(dict_en, dict_fr):
directory = './wmt14-data'
return reader_creator(directory, directory + '/train/train.en',
directory + '/train/train.fr', URL_TRAIN, MD5_TRAIN,
dict_en, dict_fr)
def test(dict_en, dict_fr):
directory = './wmt14-data'
return reader_creator(directory, directory + '/dev/ntst1213.en',
directory + '/dev/ntst1213.fr', URL_DEV_TEST,
MD5_DEV_TEST, dict_en, dict_fr)
...@@ -5,7 +5,7 @@ from data_feeder import DataFeeder ...@@ -5,7 +5,7 @@ from data_feeder import DataFeeder
import itertools import itertools
import numpy import numpy
__all__ = ['Inference', 'infer'] __all__ = ['infer']
class Inference(object): class Inference(object):
......
...@@ -12,58 +12,23 @@ ...@@ -12,58 +12,23 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
Before this new package paddle.v2.layer, users would need to use functions `paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2,
in paddle.trainer_config_helpers.layers to configure networks. we want to make Paddle a plain Python package. The model config package defined
the way how to configure a neural network topology in Paddle Python code.
The Old Way:
========= The primary usage shows below.
This old way requires that the creation of a network be defined in a Python
function, say network_config, and that this Python function being passed to .. code-block:: python
paddle.trainer_config_helpers.parse_network_config for the creation of
protobuf message description of this network. import paddle.v2 as paddle
```python img = paddle.layer.data(name='img', type=paddle.data_type.dense_vector(784))
def network_config(): hidden = paddle.layer.fc(input=img, size=200)
img = paddle.trainer_config_helpers.data_layer(name="pixel", size=784) prediction = paddle.layer.fc(input=hidden, size=10,
inference = paddle.trainer_config_helpers.fc_layer( act=paddle.activation.Softmax())
input=img,
size=10, # use prediction instance where needed.
act=paddle.trainer_config_helpers.SoftmaxActivation()) parameters = paddle.v2.parameters.create(cost)
cost = paddle.trainer_config_helpers.classification_cost(
input=inference,
label=paddle.trainer_config_helpers.data_layer(name="label", size=10))
proto_desc = parse_network_config(network_config)
```
When parse_network_config executes network_config, those layer definition
functions like data_layer and fc_layer would change some Python global variables,
so that after the execution, parse_network_config could collect information from
these global variables and generates the protobuf message.
The New Way:
=========
In this PR, we define a function in paddle.v2.layer which creates a Python
class for each layer creation function in paddle.trainer_config_helpers.layers.
Users can use create a network as follows:
```python
img = paddle.v2.layer.data(name="pixel", size=784)
inference = paddle.v2.layer.fc(input=img, size=10, act=paddle.v2.layer.Softmax())
cost = paddle.v2.layer.classification(
input=inference,
label=paddle.v2.layer.data(name="label", size=10))
parameters = paddle.v2.parameters.create(cost)
```
This new way doesn't require those invocations to layer definition functions
to be in a Python function but could be anywhere.
Also, the creation of a protobuf message is hidden in the invocation of
paddle.v2.parameters.create, no longer exposed to users.
""" """
import collections import collections
......
...@@ -110,14 +110,14 @@ class DataFeederTest(unittest.TestCase): ...@@ -110,14 +110,14 @@ class DataFeederTest(unittest.TestCase):
self.assertAlmostEqual(value.all(), w[i].all()) self.assertAlmostEqual(value.all(), w[i].all())
def test_integer(self): def test_integer(self):
dim = 100 value_range = 100
batch_size = 32 batch_size = 32
index = [] index = []
for i in xrange(batch_size): for i in xrange(batch_size):
each_sample = [] each_sample = []
each_sample.append(np.random.randint(dim)) each_sample.append(np.random.randint(value_range))
index.append(each_sample) index.append(each_sample)
feeder = DataFeeder([('input', data_type.integer_value(dim))], feeder = DataFeeder([('input', data_type.integer_value(value_range))],
{'input': 0}) {'input': 0})
arg = feeder(index) arg = feeder(index)
output = arg.getSlotIds(0).copyToNumpyArray() output = arg.getSlotIds(0).copyToNumpyArray()
...@@ -125,7 +125,7 @@ class DataFeederTest(unittest.TestCase): ...@@ -125,7 +125,7 @@ class DataFeederTest(unittest.TestCase):
self.assertEqual(output.all(), index.flatten().all()) self.assertEqual(output.all(), index.flatten().all())
def test_integer_sequence(self): def test_integer_sequence(self):
dim = 10000 value_range = 10000
batch_size = 32 batch_size = 32
start = [0] start = [0]
data = [] data = []
...@@ -133,11 +133,12 @@ class DataFeederTest(unittest.TestCase): ...@@ -133,11 +133,12 @@ class DataFeederTest(unittest.TestCase):
each_sample = [] each_sample = []
each_sample.append( each_sample.append(
self.sparse_binary_reader( self.sparse_binary_reader(
dim, 30, non_empty=True)) value_range, 30, non_empty=True))
data.append(each_sample) data.append(each_sample)
start.append(len(each_sample[0]) + start[-1]) start.append(len(each_sample[0]) + start[-1])
feeder = DataFeeder([('input', data_type.integer_value_sequence(dim))], feeder = DataFeeder(
{'input': 0}) [('input', data_type.integer_value_sequence(value_range))],
{'input': 0})
arg = feeder(data) arg = feeder(data)
output_data = arg.getSlotIds(0).copyToNumpyArray() output_data = arg.getSlotIds(0).copyToNumpyArray()
output_start = arg.getSlotSequenceStartPositions(0).copyToNumpyArray() output_start = arg.getSlotSequenceStartPositions(0).copyToNumpyArray()
......
...@@ -8,7 +8,7 @@ from . import event as v2_event ...@@ -8,7 +8,7 @@ from . import event as v2_event
from . import optimizer as v2_optimizer from . import optimizer as v2_optimizer
from . import parameters as v2_parameters from . import parameters as v2_parameters
__all__ = ['ITrainer', 'SGD'] __all__ = ['SGD']
def default_event_handler(event): def default_event_handler(event):
...@@ -22,26 +22,7 @@ def default_event_handler(event): ...@@ -22,26 +22,7 @@ def default_event_handler(event):
pass pass
class ITrainer(object): class SGD():
"""
The interface of Trainer. The only exposed method is `train`.
"""
def train(self, reader, topology, parameters, event_handler=None):
"""
train method.
:param reader:
:param topology:
:param parameters:
:param event_handler:
:return:
"""
raise NotImplementedError()
class SGD(ITrainer):
def __init__(self, cost, parameters, update_equation): def __init__(self, cost, parameters, update_equation):
""" """
Simple SGD Trainer. Simple SGD Trainer.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册