diff --git a/cmake/FindSphinx.cmake b/cmake/FindSphinx.cmake
index d319442ef10b38b9edf5844e5540a92c7094c7ce..1c29cb22a31f1e41a6b5575837c6374175cfdea5 100644
--- a/cmake/FindSphinx.cmake
+++ b/cmake/FindSphinx.cmake
@@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
${source}
${destination}
COMMENT "Generating sphinx documentation: ${builder}"
- COMMAND ln -sf ${destination}/index_*.html ${destination}/index.html
+ COMMAND cd ${destination} && ln -s ./index_*.html index.html
)
set_property(
diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py
index 585f61c6fa4c89c8621815a168742429ac236898..e0fc0e04bbd21f691caa1ce3fb95c8a7065d1b3f 100644
--- a/demo/image_classification/api_v2_train.py
+++ b/demo/image_classification/api_v2_train.py
@@ -66,7 +66,7 @@ def main():
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
result = trainer.test(
- reader=paddle.reader.batched(
+ reader=paddle.batch(
paddle.dataset.cifar.test10(), batch_size=128),
reader_dict={'image': 0,
'label': 1})
@@ -77,7 +77,7 @@ def main():
parameters=parameters,
update_equation=momentum_optimizer)
trainer.train(
- reader=paddle.reader.batched(
+ reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(), buf_size=50000),
batch_size=128),
diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py
index 9b7ebde5007047e34da9274bf8165cfa527e2cf1..4fb1808ca11a6e6937c77737dcf21475c36b4650 100644
--- a/demo/mnist/api_train_v2.py
+++ b/demo/mnist/api_train_v2.py
@@ -98,7 +98,7 @@ def main():
result.metrics['classification_error_evaluator']))
trainer.train(
- reader=paddle.reader.batched(
+ reader=paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=8192),
batch_size=128),
@@ -115,7 +115,7 @@ def main():
probs = paddle.infer(
output=predict,
parameters=parameters,
- reader=paddle.reader.batched(
+ reader=paddle.batch(
paddle.reader.firstn(
paddle.reader.map_readers(lambda item: (item[0], ),
paddle.dataset.mnist.test()),
diff --git a/doc/api/index_cn.rst b/doc/api/index_cn.rst
index 3718cd73a2003b8ef6c406a9bd51dc68e76402dc..874dd9cb2278ce36a029b8745f2d82a7e642128e 100644
--- a/doc/api/index_cn.rst
+++ b/doc/api/index_cn.rst
@@ -1,37 +1,2 @@
-API中文手册
-============
-
-DataProvider API
-----------------
-
-.. toctree::
- :maxdepth: 1
-
- data_provider/dataprovider_cn.rst
- data_provider/pydataprovider2_cn.rst
-
-.. _api_trainer_config:
-
-Model Config API
-----------------
-
-.. toctree::
- :maxdepth: 1
-
- trainer_config_helpers/optimizers.rst
- trainer_config_helpers/data_sources.rst
- trainer_config_helpers/layers.rst
- trainer_config_helpers/activations.rst
- trainer_config_helpers/poolings.rst
- trainer_config_helpers/networks.rst
- trainer_config_helpers/evaluators.rst
- trainer_config_helpers/attrs.rst
-
-
-Applications API
-----------------
-
-.. toctree::
- :maxdepth: 1
-
- predict/swig_py_paddle_cn.rst
+API
+===
\ No newline at end of file
diff --git a/doc/api/index_en.rst b/doc/api/index_en.rst
index 10c297a71d6988c002de868e804ed9ee2345fbd7..b7f470e1f8a9a1c720e7d70832ec069339ddc60f 100644
--- a/doc/api/index_en.rst
+++ b/doc/api/index_en.rst
@@ -1,37 +1,10 @@
API
===
-DataProvider API
-----------------
-
-.. toctree::
- :maxdepth: 1
-
- data_provider/dataprovider_en.rst
- data_provider/pydataprovider2_en.rst
-
-.. _api_trainer_config:
-
Model Config API
----------------
.. toctree::
:maxdepth: 1
- trainer_config_helpers/optimizers.rst
- trainer_config_helpers/data_sources.rst
- trainer_config_helpers/layers.rst
- trainer_config_helpers/activations.rst
- trainer_config_helpers/poolings.rst
- trainer_config_helpers/networks.rst
- trainer_config_helpers/evaluators.rst
- trainer_config_helpers/attrs.rst
-
-
-Applications API
-----------------
-
-.. toctree::
- :maxdepth: 1
-
- predict/swig_py_paddle_en.rst
+ v2/model_configs.rst
\ No newline at end of file
diff --git a/doc/api/data_provider/dataprovider_cn.rst b/doc/api/v1/data_provider/dataprovider_cn.rst
similarity index 100%
rename from doc/api/data_provider/dataprovider_cn.rst
rename to doc/api/v1/data_provider/dataprovider_cn.rst
diff --git a/doc/api/data_provider/dataprovider_en.rst b/doc/api/v1/data_provider/dataprovider_en.rst
similarity index 100%
rename from doc/api/data_provider/dataprovider_en.rst
rename to doc/api/v1/data_provider/dataprovider_en.rst
diff --git a/doc/api/data_provider/pydataprovider2_cn.rst b/doc/api/v1/data_provider/pydataprovider2_cn.rst
similarity index 100%
rename from doc/api/data_provider/pydataprovider2_cn.rst
rename to doc/api/v1/data_provider/pydataprovider2_cn.rst
diff --git a/doc/api/data_provider/pydataprovider2_en.rst b/doc/api/v1/data_provider/pydataprovider2_en.rst
similarity index 100%
rename from doc/api/data_provider/pydataprovider2_en.rst
rename to doc/api/v1/data_provider/pydataprovider2_en.rst
diff --git a/doc/api/data_provider/src/mnist_config.py b/doc/api/v1/data_provider/src/mnist_config.py
similarity index 100%
rename from doc/api/data_provider/src/mnist_config.py
rename to doc/api/v1/data_provider/src/mnist_config.py
diff --git a/doc/api/data_provider/src/mnist_provider.dict.py b/doc/api/v1/data_provider/src/mnist_provider.dict.py
similarity index 100%
rename from doc/api/data_provider/src/mnist_provider.dict.py
rename to doc/api/v1/data_provider/src/mnist_provider.dict.py
diff --git a/doc/api/data_provider/src/mnist_train.txt b/doc/api/v1/data_provider/src/mnist_train.txt
similarity index 100%
rename from doc/api/data_provider/src/mnist_train.txt
rename to doc/api/v1/data_provider/src/mnist_train.txt
diff --git a/doc/api/data_provider/src/sentimental_config.py b/doc/api/v1/data_provider/src/sentimental_config.py
similarity index 100%
rename from doc/api/data_provider/src/sentimental_config.py
rename to doc/api/v1/data_provider/src/sentimental_config.py
diff --git a/doc/api/data_provider/src/sentimental_provider.py b/doc/api/v1/data_provider/src/sentimental_provider.py
similarity index 100%
rename from doc/api/data_provider/src/sentimental_provider.py
rename to doc/api/v1/data_provider/src/sentimental_provider.py
diff --git a/doc/api/data_provider/src/sentimental_train.txt b/doc/api/v1/data_provider/src/sentimental_train.txt
similarity index 100%
rename from doc/api/data_provider/src/sentimental_train.txt
rename to doc/api/v1/data_provider/src/sentimental_train.txt
diff --git a/doc/api/data_provider/src/train.list b/doc/api/v1/data_provider/src/train.list
similarity index 100%
rename from doc/api/data_provider/src/train.list
rename to doc/api/v1/data_provider/src/train.list
diff --git a/doc/api/v1/index_cn.rst b/doc/api/v1/index_cn.rst
new file mode 100644
index 0000000000000000000000000000000000000000..3718cd73a2003b8ef6c406a9bd51dc68e76402dc
--- /dev/null
+++ b/doc/api/v1/index_cn.rst
@@ -0,0 +1,37 @@
+API中文手册
+============
+
+DataProvider API
+----------------
+
+.. toctree::
+ :maxdepth: 1
+
+ data_provider/dataprovider_cn.rst
+ data_provider/pydataprovider2_cn.rst
+
+.. _api_trainer_config:
+
+Model Config API
+----------------
+
+.. toctree::
+ :maxdepth: 1
+
+ trainer_config_helpers/optimizers.rst
+ trainer_config_helpers/data_sources.rst
+ trainer_config_helpers/layers.rst
+ trainer_config_helpers/activations.rst
+ trainer_config_helpers/poolings.rst
+ trainer_config_helpers/networks.rst
+ trainer_config_helpers/evaluators.rst
+ trainer_config_helpers/attrs.rst
+
+
+Applications API
+----------------
+
+.. toctree::
+ :maxdepth: 1
+
+ predict/swig_py_paddle_cn.rst
diff --git a/doc/api/v1/index_en.rst b/doc/api/v1/index_en.rst
new file mode 100644
index 0000000000000000000000000000000000000000..10c297a71d6988c002de868e804ed9ee2345fbd7
--- /dev/null
+++ b/doc/api/v1/index_en.rst
@@ -0,0 +1,37 @@
+API
+===
+
+DataProvider API
+----------------
+
+.. toctree::
+ :maxdepth: 1
+
+ data_provider/dataprovider_en.rst
+ data_provider/pydataprovider2_en.rst
+
+.. _api_trainer_config:
+
+Model Config API
+----------------
+
+.. toctree::
+ :maxdepth: 1
+
+ trainer_config_helpers/optimizers.rst
+ trainer_config_helpers/data_sources.rst
+ trainer_config_helpers/layers.rst
+ trainer_config_helpers/activations.rst
+ trainer_config_helpers/poolings.rst
+ trainer_config_helpers/networks.rst
+ trainer_config_helpers/evaluators.rst
+ trainer_config_helpers/attrs.rst
+
+
+Applications API
+----------------
+
+.. toctree::
+ :maxdepth: 1
+
+ predict/swig_py_paddle_en.rst
diff --git a/doc/api/predict/src/predict_sample.py b/doc/api/v1/predict/src/predict_sample.py
similarity index 100%
rename from doc/api/predict/src/predict_sample.py
rename to doc/api/v1/predict/src/predict_sample.py
diff --git a/doc/api/predict/swig_py_paddle_cn.rst b/doc/api/v1/predict/swig_py_paddle_cn.rst
similarity index 100%
rename from doc/api/predict/swig_py_paddle_cn.rst
rename to doc/api/v1/predict/swig_py_paddle_cn.rst
diff --git a/doc/api/predict/swig_py_paddle_en.rst b/doc/api/v1/predict/swig_py_paddle_en.rst
similarity index 100%
rename from doc/api/predict/swig_py_paddle_en.rst
rename to doc/api/v1/predict/swig_py_paddle_en.rst
diff --git a/doc/api/trainer_config_helpers/activations.rst b/doc/api/v1/trainer_config_helpers/activations.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/activations.rst
rename to doc/api/v1/trainer_config_helpers/activations.rst
diff --git a/doc/api/trainer_config_helpers/attrs.rst b/doc/api/v1/trainer_config_helpers/attrs.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/attrs.rst
rename to doc/api/v1/trainer_config_helpers/attrs.rst
diff --git a/doc/api/trainer_config_helpers/data_sources.rst b/doc/api/v1/trainer_config_helpers/data_sources.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/data_sources.rst
rename to doc/api/v1/trainer_config_helpers/data_sources.rst
diff --git a/doc/api/trainer_config_helpers/evaluators.rst b/doc/api/v1/trainer_config_helpers/evaluators.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/evaluators.rst
rename to doc/api/v1/trainer_config_helpers/evaluators.rst
diff --git a/doc/api/trainer_config_helpers/layers.rst b/doc/api/v1/trainer_config_helpers/layers.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/layers.rst
rename to doc/api/v1/trainer_config_helpers/layers.rst
diff --git a/doc/api/trainer_config_helpers/networks.rst b/doc/api/v1/trainer_config_helpers/networks.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/networks.rst
rename to doc/api/v1/trainer_config_helpers/networks.rst
diff --git a/doc/api/trainer_config_helpers/optimizers.rst b/doc/api/v1/trainer_config_helpers/optimizers.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/optimizers.rst
rename to doc/api/v1/trainer_config_helpers/optimizers.rst
diff --git a/doc/api/trainer_config_helpers/poolings.rst b/doc/api/v1/trainer_config_helpers/poolings.rst
similarity index 100%
rename from doc/api/trainer_config_helpers/poolings.rst
rename to doc/api/v1/trainer_config_helpers/poolings.rst
diff --git a/doc/api/v2/model_configs.rst b/doc/api/v2/model_configs.rst
new file mode 100644
index 0000000000000000000000000000000000000000..a9f33b33ef61bf846013364672ec26ae075d0300
--- /dev/null
+++ b/doc/api/v2/model_configs.rst
@@ -0,0 +1,6 @@
+======
+Layers
+======
+
+.. automodule:: paddle.v2.layer
+ :members:
diff --git a/doc/templates/conf.py.cn.in b/doc/templates/conf.py.cn.in
index 418d718fbd9c61bff3acb9c2dab0638c0b650bab..6dc48704bc230bd1a573c4b4b2e7c07791e48ced 100644
--- a/doc/templates/conf.py.cn.in
+++ b/doc/templates/conf.py.cn.in
@@ -15,13 +15,19 @@ import sys
import os, subprocess
import shlex
from recommonmark import parser, transform
+try:
+ import py_paddle
+ import paddle
+ import paddle.v2
+except ImportError:
+ print("Must install paddle python package before generating documentation")
+ sys.exit(1)
MarkdownParser = parser.CommonMarkParser
AutoStructify = transform.AutoStructify
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, '@PROJ_ROOT@/python')
templates_path = ["@PROJ_ROOT@/doc_theme/templates"]
# -- General configuration ------------------------------------------------
diff --git a/doc/templates/conf.py.en.in b/doc/templates/conf.py.en.in
index e96c25cb75bee20d2e2949423d80ddab1d3450a1..b477f0120c4fa0544012080b7cfb8572d3c44b04 100644
--- a/doc/templates/conf.py.en.in
+++ b/doc/templates/conf.py.en.in
@@ -15,14 +15,20 @@ import sys
import os, subprocess
import shlex
from recommonmark import parser, transform
+try:
+ import py_paddle
+ import paddle
+ import paddle.v2
+except ImportError:
+ print("Must install paddle python package before generating documentation")
+ sys.exit(1)
+
MarkdownParser = parser.CommonMarkParser
AutoStructify = transform.AutoStructify
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, '@PROJ_ROOT@/python')
-
templates_path = ["@PROJ_ROOT@/doc_theme/templates"]
# -- General configuration ------------------------------------------------
diff --git a/doc/tutorials/quick_start/index_en.md b/doc/tutorials/quick_start/index_en.md
index 70dec2eb2a8c397bc56b1e6f52a624a3a6877905..ca110431cf921ae0480d3fb2b17c58f90a84cc0e 100644
--- a/doc/tutorials/quick_start/index_en.md
+++ b/doc/tutorials/quick_start/index_en.md
@@ -156,14 +156,14 @@ define_py_data_sources2(train_list='data/train.list',
obj="process",
args={"dictionary": word_dict})
```
-You can refer to the following link for more detailed examples and data formats: PyDataProvider2.
+You can refer to the following link for more detailed examples and data formats: PyDataProvider2.
## Network Architecture
We will describe four kinds of network architectures in this section.

First, you will build a logistic regression model. Later, you will also get chance to build other more powerful network architectures.
-For more detailed documentation, you could refer to: layer documentation. All configuration files are in `demo/quick_start` directory.
+For more detailed documentation, you could refer to: layer documentation. All configuration files are in `demo/quick_start` directory.
### Logistic Regression
The architecture is illustrated in the following picture:
@@ -366,7 +366,7 @@ You can use single layer LSTM model with Dropout for our text classification pro
## Optimization Algorithm
-Optimization algorithms include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network.
+Optimization algorithms include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network.
```python
settings(batch_size=128,
@@ -407,7 +407,7 @@ paddle train \
--init_model_path=./output/pass-0000x
```
-We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to Python Prediction API tutorial,or other demo for the prediction process using Python. You can also use the following script for inference or evaluation.
+We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to Python Prediction API tutorial,or other demo for the prediction process using Python. You can also use the following script for inference or evaluation.
inference script (predict.sh):
diff --git a/paddle/scripts/travis/docs.sh b/paddle/scripts/travis/docs.sh
index 6b43cad20b76e9abeb3cb10a726d3d8e3da5f8e2..53e998ef6c1b96d9e7d82b7effd12a27e6dc69f2 100755
--- a/paddle/scripts/travis/docs.sh
+++ b/paddle/scripts/travis/docs.sh
@@ -2,8 +2,12 @@
# Add set -e, cd to directory.
source ./common.sh
-
# Compile Documentation only.
+cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF ${EXTRA_CMAKE_OPTS}
+mkdir output
+make DESTDIR=./output install -j `nproc`
+pip install ./output/usr/local/opt/paddle/share/wheels/*
+rm -rf *
cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS}
make paddle_docs paddle_docs_cn
@@ -25,26 +29,41 @@ TARGET_BRANCH="gh-pages"
# Only deploy master branch to build latest documentation.
SOURCE_BRANCH="master"
-# If is not a Github pull request, and in master branch.
-if [ "$TRAVIS_PULL_REQUEST" != "false" -o "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ]; then
- exit 0
-fi
-
# Clone the repo to output directory
git clone $REPO output
cd output
-# checkout github page branch
-git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH
+function deploy_docs() {
+ SOURCE_BRANCH=$1
+ DIR=$2
+ # If is not a Github pull request
+ if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then
+ exit 0
+ fi
+ # If it is not watched branch.
+ if [ "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ]; then
+ return
+ fi
-# remove old docs. mv new docs.
-rm -rf doc doc_cn
-mv ../doc/cn/html doc_cn
-mv ../doc/en/html doc
+ # checkout github page branch
+ git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH
+
+ mkdir -p ${DIR}
+ # remove old docs. mv new docs.
+ set +e
+ rm -rf ${DIR}/doc ${DIR}/doc_cn
+ set -e
+ mv ../doc/cn/html ${DIR}/doc_cn
+ mv ../doc/en/html ${DIR}/doc
+ git add .
+}
+
+deploy_docs "master" "."
+deploy_docs "develop" "./develop/"
# Check is there anything changed.
set +e
-git diff --exit-code >/dev/null
+git diff --cached --exit-code >/dev/null
if [ $? -eq 0 ]; then
echo "No changes to the output on this push; exiting."
exit 0
@@ -57,7 +76,6 @@ if [ -n $SSL_KEY ]; then # Only push updated docs for github.com/PaddlePaddle/P
git config user.name "Travis CI"
git config user.email "paddle-dev@baidu.com"
git commit -m "Deploy to GitHub Pages: ${SHA}"
-
# Set ssh private key
openssl aes-256-cbc -K $SSL_KEY -iv $SSL_IV -in ../../paddle/scripts/travis/deploy_key.enc -out deploy_key -d
chmod 600 deploy_key
diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py
index f663ef735d6424c45815a73d112d135be0dc5f8e..25526bf409cf82f26979a84700ce948ac969df0c 100644
--- a/python/paddle/v2/__init__.py
+++ b/python/paddle/v2/__init__.py
@@ -28,6 +28,7 @@ import pooling
import inference
import networks
import py_paddle.swig_paddle as api
+import minibatch
__all__ = [
'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer',
@@ -45,3 +46,4 @@ def init(**kwargs):
infer = inference.infer
+batch = minibatch.batch
diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py
index 82f11a7c41149c2231130dc7c2205debb643aa89..6c371d3c9bdee94a91b9a48ff7c4a006c8d7eb21 100644
--- a/python/paddle/v2/dataset/__init__.py
+++ b/python/paddle/v2/dataset/__init__.py
@@ -20,8 +20,9 @@ import movielens
import conll05
import uci_housing
import sentiment
+import wmt14
__all__ = [
'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment'
- 'uci_housing'
+ 'uci_housing', 'wmt14'
]
diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py
new file mode 100644
index 0000000000000000000000000000000000000000..9904848b5d3ef95dc331fc0ba1a98f29f8b1dfeb
--- /dev/null
+++ b/python/paddle/v2/dataset/wmt14.py
@@ -0,0 +1,142 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+wmt14 dataset
+"""
+import paddle.v2.dataset.common
+import tarfile
+import os.path
+import itertools
+
+__all__ = ['train', 'test', 'build_dict']
+
+URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
+MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
+URL_TRAIN = 'http://localhost:8000/train.tgz'
+MD5_TRAIN = '72de99da2830ea5a3a2c4eb36092bbc7'
+
+
+def word_count(f, word_freq=None):
+ add = paddle.v2.dataset.common.dict_add
+ if word_freq == None:
+ word_freq = {}
+
+ for l in f:
+ for w in l.strip().split():
+ add(word_freq, w)
+ add(word_freq, '')
+ add(word_freq, '')
+
+ return word_freq
+
+
+def get_word_dix(word_freq):
+ TYPO_FREQ = 50
+ word_freq = filter(lambda x: x[1] > TYPO_FREQ, word_freq.items())
+ word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
+ words, _ = list(zip(*word_freq_sorted))
+ word_idx = dict(zip(words, xrange(len(words))))
+ word_idx[''] = len(words)
+ return word_idx
+
+
+def get_word_freq(train, dev):
+ word_freq = word_count(train, word_count(dev))
+ if '' in word_freq:
+ # remove for now, since we will set it as last index
+ del word_freq['']
+ return word_freq
+
+
+def build_dict():
+ base_dir = './wmt14-data'
+ train_en_filename = base_dir + '/train/train.en'
+ train_fr_filename = base_dir + '/train/train.fr'
+ dev_en_filename = base_dir + '/dev/ntst1213.en'
+ dev_fr_filename = base_dir + '/dev/ntst1213.fr'
+
+ if not os.path.exists(train_en_filename) or not os.path.exists(
+ train_fr_filename):
+ with tarfile.open(
+ paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14',
+ MD5_TRAIN)) as tf:
+ tf.extractall(base_dir)
+
+ if not os.path.exists(dev_en_filename) or not os.path.exists(
+ dev_fr_filename):
+ with tarfile.open(
+ paddle.v2.dataset.common.download(URL_DEV_TEST, 'wmt14',
+ MD5_DEV_TEST)) as tf:
+ tf.extractall(base_dir)
+
+ f_en = open(train_en_filename)
+ f_fr = open(train_fr_filename)
+ f_en_dev = open(dev_en_filename)
+ f_fr_dev = open(dev_fr_filename)
+
+ word_freq_en = get_word_freq(f_en, f_en_dev)
+ word_freq_fr = get_word_freq(f_fr, f_fr_dev)
+
+ f_en.close()
+ f_fr.close()
+ f_en_dev.close()
+ f_fr_dev.close()
+
+ return get_word_dix(word_freq_en), get_word_dix(word_freq_fr)
+
+
+def reader_creator(directory, path_en, path_fr, URL, MD5, dict_en, dict_fr):
+ def reader():
+ if not os.path.exists(path_en) or not os.path.exists(path_fr):
+ with tarfile.open(
+ paddle.v2.dataset.common.download(URL, 'wmt14', MD5)) as tf:
+ tf.extractall(directory)
+
+ f_en = open(path_en)
+ f_fr = open(path_fr)
+ UNK_en = dict_en['']
+ UNK_fr = dict_fr['']
+
+ for en, fr in itertools.izip(f_en, f_fr):
+ src_ids = [dict_en.get(w, UNK_en) for w in en.strip().split()]
+ tar_ids = [
+ dict_fr.get(w, UNK_fr)
+ for w in [''] + fr.strip().split() + ['']
+ ]
+
+ # remove sequence whose length > 80 in training mode
+ if len(src_ids) == 0 or len(tar_ids) <= 1 or len(
+ src_ids) > 80 or len(tar_ids) > 80:
+ continue
+
+ yield src_ids, tar_ids[:-1], tar_ids[1:]
+
+ f_en.close()
+ f_fr.close()
+
+ return reader
+
+
+def train(dict_en, dict_fr):
+ directory = './wmt14-data'
+ return reader_creator(directory, directory + '/train/train.en',
+ directory + '/train/train.fr', URL_TRAIN, MD5_TRAIN,
+ dict_en, dict_fr)
+
+
+def test(dict_en, dict_fr):
+ directory = './wmt14-data'
+ return reader_creator(directory, directory + '/dev/ntst1213.en',
+ directory + '/dev/ntst1213.fr', URL_DEV_TEST,
+ MD5_DEV_TEST, dict_en, dict_fr)
diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py
index 010773ddbd96d4226cccc1a63cfc133b78bdcffe..711226d659d49fc2646c34c011c7773ae2517ec9 100644
--- a/python/paddle/v2/layer.py
+++ b/python/paddle/v2/layer.py
@@ -12,58 +12,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
-Before this new package paddle.v2.layer, users would need to use functions
-in paddle.trainer_config_helpers.layers to configure networks.
-
-The Old Way:
-=========
-This old way requires that the creation of a network be defined in a Python
-function, say network_config, and that this Python function being passed to
-paddle.trainer_config_helpers.parse_network_config for the creation of
-protobuf message description of this network.
-
-```python
-def network_config():
- img = paddle.trainer_config_helpers.data_layer(name="pixel", size=784)
- inference = paddle.trainer_config_helpers.fc_layer(
- input=img,
- size=10,
- act=paddle.trainer_config_helpers.SoftmaxActivation())
- cost = paddle.trainer_config_helpers.classification_cost(
- input=inference,
- label=paddle.trainer_config_helpers.data_layer(name="label", size=10))
-
-proto_desc = parse_network_config(network_config)
-```
-
-When parse_network_config executes network_config, those layer definition
-functions like data_layer and fc_layer would change some Python global variables,
-so that after the execution, parse_network_config could collect information from
-these global variables and generates the protobuf message.
-
-
-
-The New Way:
-=========
-In this PR, we define a function in paddle.v2.layer which creates a Python
-class for each layer creation function in paddle.trainer_config_helpers.layers.
-Users can use create a network as follows:
-
-```python
-img = paddle.v2.layer.data(name="pixel", size=784)
-inference = paddle.v2.layer.fc(input=img, size=10, act=paddle.v2.layer.Softmax())
-cost = paddle.v2.layer.classification(
- input=inference,
- label=paddle.v2.layer.data(name="label", size=10))
-
-parameters = paddle.v2.parameters.create(cost)
-```
-
-This new way doesn't require those invocations to layer definition functions
-to be in a Python function but could be anywhere.
-
-Also, the creation of a protobuf message is hidden in the invocation of
-paddle.v2.parameters.create, no longer exposed to users.
+`paddle.v2.layer` is a part of model config packages in paddle.v2. In API v2,
+we want to make Paddle a plain Python package. The model config package defined
+the way how to configure a neural network topology in Paddle Python code.
+
+The primary usage shows below.
+
+.. code-block:: python
+
+ import paddle.v2 as paddle
+
+ img = paddle.layer.data(name='img', type=paddle.data_type.dense_vector(784))
+ hidden = paddle.layer.fc(input=img, size=200)
+ prediction = paddle.layer.fc(input=hidden, size=10,
+ act=paddle.activation.Softmax())
+
+ # use prediction instance where needed.
+ parameters = paddle.v2.parameters.create(cost)
"""
import collections
diff --git a/python/paddle/v2/minibatch.py b/python/paddle/v2/minibatch.py
new file mode 100644
index 0000000000000000000000000000000000000000..f01815a0ce068d503c23c4126e16cfcb28202bb8
--- /dev/null
+++ b/python/paddle/v2/minibatch.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def batch(reader, batch_size):
+ """
+ Create a batch reader.
+ :param reader: the data reader to read from.
+ :param batch_size: batch_size
+ :return: the batch reader.
+ """
+
+ def batch_reader():
+ r = reader()
+ batch = []
+ for instance in r:
+ batch.append(instance)
+ if len(batch) == batch_size:
+ yield batch
+ batch = []
+ if batch:
+ yield batch
+
+ return batch_reader
diff --git a/python/paddle/v2/reader/decorator.py b/python/paddle/v2/reader/decorator.py
index b7657e27764f099334ba3030c493a7607f323abe..c4ba110205a3c1cca2f5581d33048d2a205929b4 100644
--- a/python/paddle/v2/reader/decorator.py
+++ b/python/paddle/v2/reader/decorator.py
@@ -14,7 +14,7 @@
__all__ = [
'map_readers', 'buffered', 'compose', 'chain', 'shuffle',
- 'ComposeNotAligned', 'batched', 'firstn'
+ 'ComposeNotAligned', 'firstn'
]
import itertools
@@ -193,28 +193,6 @@ def buffered(reader, size):
return data_reader
-def batched(reader, batch_size):
- """
- Create a batched reader.
- :param reader: the data reader to read from.
- :param batch_size: batch_size
- :return: the batched reader.
- """
-
- def batched_reader():
- r = reader()
- batch = []
- for instance in r:
- batch.append(instance)
- if len(batch) == batch_size:
- yield batch
- batch = []
- if batch:
- yield batch
-
- return batched_reader
-
-
def firstn(reader, n):
"""
Limit the max number of samples that reader could return.