diff --git a/CMakeLists.txt b/CMakeLists.txt index d7e7e49e9a038acc6ca272433cd39b08c812eccc..090ac9e188422099cc4270b87064b5590e7b620c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 2.8) project(paddle CXX C) set(PADDLE_MAJOR_VERSION 0) -set(PADDLE_MINOR_VERSION 8) -set(PADDLE_PATCH_VERSION 0b3) +set(PADDLE_MINOR_VERSION 9) +set(PADDLE_PATCH_VERSION 0a0) set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION}) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") diff --git a/demo/sentiment/trainer_config.py b/demo/sentiment/trainer_config.py index 894070e7c97dcb29e8c0df31437a374be5f5d691..114a9138ebfef054c7d3ba99b4a510a452f8f2cd 100644 --- a/demo/sentiment/trainer_config.py +++ b/demo/sentiment/trainer_config.py @@ -29,6 +29,7 @@ settings( batch_size=128, learning_rate=2e-3, learning_method=AdamOptimizer(), + average_window=0.5, regularization=L2Regularization(8e-4), gradient_clipping_threshold=25) diff --git a/doc/algorithm/rnn/rnn.rst b/doc/algorithm/rnn/rnn.rst index 399c5da5fffc20dda78b9eefb2629308cabd748e..01d2caefb5cdf4e949511fd0f5bbafe0e604e881 100644 --- a/doc/algorithm/rnn/rnn.rst +++ b/doc/algorithm/rnn/rnn.rst @@ -17,7 +17,7 @@ PaddlePaddle does not need any preprocessing to sequence data, such as padding. .. code-block:: python - settings.slots = [ + settings.input_types = [ integer_value_sequence(len(settings.src_dict)), integer_value_sequence(len(settings.trg_dict)), integer_value_sequence(len(settings.trg_dict))] diff --git a/doc/demo/sentiment_analysis/sentiment_analysis.md b/doc/demo/sentiment_analysis/sentiment_analysis.md index 385f49891dcd840c525f7d1c3aaf7f08a7e4903f..c53952c544de9fa88a6318432e34b0d05b149445 100644 --- a/doc/demo/sentiment_analysis/sentiment_analysis.md +++ b/doc/demo/sentiment_analysis/sentiment_analysis.md @@ -6,7 +6,7 @@ Sentiment analysis is also used to monitor social media based on large amount of On the other hand, grabbing the user comments of products and analyzing their sentiment are useful to understand user preferences for companies, products, even competing products. -This tutorial will guide you through the process of training a Long Short Term Memory (LSTM) Network to classify the sentiment of sentences from [Large Movie Review Dataset](http://ai.stanford.edu/~amaas/data/sentiment/), sometimes known as the [Internet Movie Database (IMDB)](http://ai.stanford.edu/~amaas/papers/wvSent_acl2011.pdf). This dataset contains movie reviews along with their associated binary sentiment polarity labels, namely positive and negative. So randomly guessing yields 50% accuracy. +This tutorial will guide you through the process of training a Long Short Term Memory (LSTM) Network to classify the sentiment of sentences from [Large Movie Review Dataset](http://ai.stanford.edu/~amaas/data/sentiment/), sometimes known as the Internet Movie Database (IMDB). This dataset contains movie reviews along with their associated binary sentiment polarity labels, namely positive and negative. So randomly guessing yields 50% accuracy. ## Data Preparation @@ -39,7 +39,7 @@ imdbEr.txt imdb.vocab README test train * imdbEr.txt: expected rating for each token in imdb.vocab. * README: data documentation. -Both train and test set directory contains: +The file in train set directory is as follows. The test set also contains them except `unsup` and `urls_unsup.txt`. ``` labeledBow.feat neg pos unsup unsupBow.feat urls_neg.txt urls_pos.txt urls_unsup.txt @@ -151,6 +151,7 @@ settings( batch_size=128, learning_rate=2e-3, learning_method=AdamOptimizer(), + average_window=0.5, regularization=L2Regularization(8e-4), gradient_clipping_threshold=25 ) @@ -163,17 +164,18 @@ stacked_lstm_net(dict_dim, class_dim=class_dim, * **Data Definition**: * get\_config\_arg(): get arguments setted by `--config_args=xx` in commandline argument. - * Define TrainData and TestData provider, here using Python interface (PyDataProviderWrapper) of PaddlePaddle to load data. For details, you can refer to the document of PyDataProvider. + * Define data provider, here using Python interface to load data. For details, you can refer to the document of PyDataProvider2. * **Algorithm Configuration**: - * use sgd algorithm. - * use adam optimization. * set batch size of 128. - * set average sgd window. * set global learning rate. + * use adam optimization. + * set average sgd window. + * set L2 regularization. + * set gradient clipping threshold. * **Network Configuration**: - * dict_dim: get dictionary dimension. - * class_dim: set category number, IMDB has two label, namely positive and negative label. + * dict_dim: dictionary dimension. + * class_dim: category number, IMDB has two label, namely positive and negative label. * `stacked_lstm_net`: predefined network as shown in Figure 3, use this network by default. * `bidirectional_lstm_net`: predefined network as shown in Figure 2. diff --git a/doc/dev/new_layer/new_layer.rst b/doc/dev/new_layer/new_layer.rst index 2fa00730486dbe1f2c9585872068a77efa09f004..af8b76a3075194ead9be40d2c943238b2cfadecc 100644 --- a/doc/dev/new_layer/new_layer.rst +++ b/doc/dev/new_layer/new_layer.rst @@ -60,7 +60,7 @@ Implement C++ Class The C++ class of the layer implements the initialization, forward, and backward part of the layer. The fully connected layer is at :code:`paddle/gserver/layers/FullyConnectedLayer.h` and :code:`paddle/gserver/layers/FullyConnectedLayer.cpp`. We list simplified version of the code below. -It needs to derive the base class :code:`paddle::BaseLayer`, and it needs to override the following functions: +It needs to derive the base class :code:`paddle::Layer`, and it needs to override the following functions: - constructor and destructor. - :code:`init` function. It is used to initialize the parameters and settings. diff --git a/paddle/scripts/deb/build_scripts/build.sh b/paddle/scripts/deb/build_scripts/build.sh index 66a1cfb247dad0292c0832046fb121d14b15b5ba..d13dea514841b110c304b8aa0e65ad16e42c75f3 100755 --- a/paddle/scripts/deb/build_scripts/build.sh +++ b/paddle/scripts/deb/build_scripts/build.sh @@ -1,12 +1,12 @@ #!/bin/bash set -e +apt-get update apt-get install -y dh-make cd ~ mkdir -p ~/dist/gpu mkdir -p ~/dist/cpu mkdir -p ~/dist/cpu-noavx mkdir -p ~/dist/gpu-noavx -git clone https://github.com/baidu/Paddle.git paddle cd paddle mkdir build cd build diff --git a/paddle/scripts/deb/build_scripts/build_deb.sh b/paddle/scripts/deb/build_scripts/build_deb.sh index 1331c1249d5a7eae8bf8f4648aacd8579363a402..c38c6299f840345b7f6f6e0aad7482241d36198a 100755 --- a/paddle/scripts/deb/build_scripts/build_deb.sh +++ b/paddle/scripts/deb/build_scripts/build_deb.sh @@ -3,6 +3,6 @@ set -e docker build -t build_paddle_deb . rm -rf dist mkdir -p dist -docker run -v$PWD/dist:/root/dist --name tmp_build_deb_container build_paddle_deb +docker run -v$PWD/dist:/root/dist -v $PWD/../../../..:/root/paddle --name tmp_build_deb_container build_paddle_deb docker rm tmp_build_deb_container docker rmi build_paddle_deb diff --git a/paddle/scripts/docker/Dockerfile.cpu b/paddle/scripts/docker/Dockerfile.cpu index a833c69c66900ee23176909ffce0835f6637c391..69b8363b7ac9eed033ec4958e189e233b3dc2689 100644 --- a/paddle/scripts/docker/Dockerfile.cpu +++ b/paddle/scripts/docker/Dockerfile.cpu @@ -1,7 +1,7 @@ FROM ubuntu:14.04 MAINTAINER PaddlePaddle Dev Team COPY build.sh /root/ -ENV GIT_CHECKOUT=develop +ENV GIT_CHECKOUT=v0.9.0a0 ENV WITH_GPU=OFF ENV IS_DEVEL=OFF ENV WITH_DEMO=OFF diff --git a/paddle/scripts/docker/Dockerfile.cpu-demo b/paddle/scripts/docker/Dockerfile.cpu-demo index 1fda1e472b290c970a29e927db001abd949d9e62..ccbd183ee3c1ac27fc624f22847f53eb7d60b83d 100644 --- a/paddle/scripts/docker/Dockerfile.cpu-demo +++ b/paddle/scripts/docker/Dockerfile.cpu-demo @@ -1,7 +1,7 @@ FROM ubuntu:14.04 MAINTAINER PaddlePaddle Dev Team COPY build.sh /root/ -ENV GIT_CHECKOUT=develop +ENV GIT_CHECKOUT=v0.9.0a0 ENV WITH_GPU=OFF ENV IS_DEVEL=ON ENV WITH_DEMO=ON diff --git a/paddle/scripts/docker/Dockerfile.cpu-devel b/paddle/scripts/docker/Dockerfile.cpu-devel index 66bdc978ddcb4bdd2c670cbbb4004bfaba54c8b3..36460384f383ba10c4bff1d9875cd053d6391b97 100644 --- a/paddle/scripts/docker/Dockerfile.cpu-devel +++ b/paddle/scripts/docker/Dockerfile.cpu-devel @@ -1,7 +1,7 @@ FROM ubuntu:14.04 MAINTAINER PaddlePaddle Dev Team COPY build.sh /root/ -ENV GIT_CHECKOUT=develop +ENV GIT_CHECKOUT=v0.9.0a0 ENV WITH_GPU=OFF ENV IS_DEVEL=ON ENV WITH_DEMO=OFF diff --git a/paddle/scripts/docker/Dockerfile.cpu-noavx b/paddle/scripts/docker/Dockerfile.cpu-noavx index d0ba30e55afb2c91875838cf8e59f51250ce6f3a..fa3b7427b0ad3973423894fa7af54ae5a2514e06 100644 --- a/paddle/scripts/docker/Dockerfile.cpu-noavx +++ b/paddle/scripts/docker/Dockerfile.cpu-noavx @@ -1,7 +1,7 @@ FROM ubuntu:14.04 MAINTAINER PaddlePaddle Dev Team COPY build.sh /root/ -ENV GIT_CHECKOUT=develop +ENV GIT_CHECKOUT=v0.9.0a0 ENV WITH_GPU=OFF ENV IS_DEVEL=OFF ENV WITH_DEMO=OFF diff --git a/paddle/scripts/docker/Dockerfile.cpu-noavx-demo b/paddle/scripts/docker/Dockerfile.cpu-noavx-demo index 28439b4bdfab437ae947c1fba637632c149fcd1d..61315f762dee4d64251ef3d8db5b11b30a3ddb3a 100644 --- a/paddle/scripts/docker/Dockerfile.cpu-noavx-demo +++ b/paddle/scripts/docker/Dockerfile.cpu-noavx-demo @@ -1,7 +1,7 @@ FROM ubuntu:14.04 MAINTAINER PaddlePaddle Dev Team COPY build.sh /root/ -ENV GIT_CHECKOUT=develop +ENV GIT_CHECKOUT=v0.9.0a0 ENV WITH_GPU=OFF ENV IS_DEVEL=ON ENV WITH_DEMO=ON diff --git a/paddle/scripts/docker/Dockerfile.cpu-noavx-devel b/paddle/scripts/docker/Dockerfile.cpu-noavx-devel index eb4739d6dc742407549cfdaa73544282c260db87..76365311990b527ea473be840770bfeb6025d74f 100644 --- a/paddle/scripts/docker/Dockerfile.cpu-noavx-devel +++ b/paddle/scripts/docker/Dockerfile.cpu-noavx-devel @@ -1,7 +1,7 @@ FROM ubuntu:14.04 MAINTAINER PaddlePaddle Dev Team COPY build.sh /root/ -ENV GIT_CHECKOUT=develop +ENV GIT_CHECKOUT=v0.9.0a0 ENV WITH_GPU=OFF ENV IS_DEVEL=ON ENV WITH_DEMO=OFF diff --git a/paddle/scripts/docker/Dockerfile.gpu b/paddle/scripts/docker/Dockerfile.gpu index fa61cfeec851f128f3f073afc64e0499b322e4dd..1e023ae2818dbb27c457ff17b01fc4ab02815eba 100644 --- a/paddle/scripts/docker/Dockerfile.gpu +++ b/paddle/scripts/docker/Dockerfile.gpu @@ -1,7 +1,7 @@ FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04 MAINTAINER PaddlePaddle Dev Team COPY build.sh /root/ -ENV GIT_CHECKOUT=develop +ENV GIT_CHECKOUT=v0.9.0a0 ENV WITH_GPU=ON ENV IS_DEVEL=OFF ENV WITH_DEMO=OFF diff --git a/paddle/scripts/docker/Dockerfile.gpu-demo b/paddle/scripts/docker/Dockerfile.gpu-demo index 4f5417c1af072b6c5366dc5fe0dbedae3f9c880e..92b0dca4026c89c6749e14f189370183462333b8 100644 --- a/paddle/scripts/docker/Dockerfile.gpu-demo +++ b/paddle/scripts/docker/Dockerfile.gpu-demo @@ -1,7 +1,7 @@ FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04 MAINTAINER PaddlePaddle Dev Team COPY build.sh /root/ -ENV GIT_CHECKOUT=develop +ENV GIT_CHECKOUT=v0.9.0a0 ENV WITH_GPU=ON ENV IS_DEVEL=ON ENV WITH_DEMO=ON diff --git a/paddle/scripts/docker/Dockerfile.gpu-devel b/paddle/scripts/docker/Dockerfile.gpu-devel index 37cfced1908861b9131c1dd80a610eadb9bcd882..fb6f351fd2f7e0f950e00ac96681de88ca238f70 100644 --- a/paddle/scripts/docker/Dockerfile.gpu-devel +++ b/paddle/scripts/docker/Dockerfile.gpu-devel @@ -1,7 +1,7 @@ FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04 MAINTAINER PaddlePaddle Dev Team COPY build.sh /root/ -ENV GIT_CHECKOUT=develop +ENV GIT_CHECKOUT=v0.9.0a0 ENV WITH_GPU=ON ENV IS_DEVEL=ON ENV WITH_DEMO=OFF diff --git a/paddle/scripts/docker/Dockerfile.gpu-noavx b/paddle/scripts/docker/Dockerfile.gpu-noavx index 95fb125b799e8f0403cdae0d2c191188a52285e6..7567e62025506ca2ae8c1d35d595d92ed6de87f3 100644 --- a/paddle/scripts/docker/Dockerfile.gpu-noavx +++ b/paddle/scripts/docker/Dockerfile.gpu-noavx @@ -1,7 +1,7 @@ FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04 MAINTAINER PaddlePaddle Dev Team COPY build.sh /root/ -ENV GIT_CHECKOUT=develop +ENV GIT_CHECKOUT=v0.9.0a0 ENV WITH_GPU=ON ENV IS_DEVEL=OFF ENV WITH_DEMO=OFF diff --git a/paddle/scripts/docker/Dockerfile.gpu-noavx-demo b/paddle/scripts/docker/Dockerfile.gpu-noavx-demo index b5fbe4b941d6814cde304a116da253dd48ed41c8..ac52484c5cb513537283e1a0ffbe9df067fefc9a 100644 --- a/paddle/scripts/docker/Dockerfile.gpu-noavx-demo +++ b/paddle/scripts/docker/Dockerfile.gpu-noavx-demo @@ -1,7 +1,7 @@ FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04 MAINTAINER PaddlePaddle Dev Team COPY build.sh /root/ -ENV GIT_CHECKOUT=develop +ENV GIT_CHECKOUT=v0.9.0a0 ENV WITH_GPU=ON ENV IS_DEVEL=ON ENV WITH_DEMO=ON diff --git a/paddle/scripts/docker/Dockerfile.gpu-noavx-devel b/paddle/scripts/docker/Dockerfile.gpu-noavx-devel index 531c8ec7ae30cd688b06fe1ba03bd215be81096c..19202f306b8f71e93af085d5285098a1fbe1dba7 100644 --- a/paddle/scripts/docker/Dockerfile.gpu-noavx-devel +++ b/paddle/scripts/docker/Dockerfile.gpu-noavx-devel @@ -1,7 +1,7 @@ FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04 MAINTAINER PaddlePaddle Dev Team COPY build.sh /root/ -ENV GIT_CHECKOUT=develop +ENV GIT_CHECKOUT=v0.9.0a0 ENV WITH_GPU=ON ENV IS_DEVEL=ON ENV WITH_DEMO=OFF diff --git a/paddle/scripts/docker/Dockerfile.m4 b/paddle/scripts/docker/Dockerfile.m4 index 57c865584413381abc54dc759405360a5ae354f3..e14493ed9e842351125ab458db53fcc3f38233f6 100644 --- a/paddle/scripts/docker/Dockerfile.m4 +++ b/paddle/scripts/docker/Dockerfile.m4 @@ -1,7 +1,7 @@ FROM PADDLE_BASE_IMAGE MAINTAINER PaddlePaddle Dev Team COPY build.sh /root/ -ENV GIT_CHECKOUT=develop +ENV GIT_CHECKOUT=v0.9.0a0 ENV WITH_GPU=PADDLE_WITH_GPU ENV IS_DEVEL=PADDLE_IS_DEVEL ENV WITH_DEMO=PADDLE_WITH_DEMO diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in index 213cf2f1cc7e491dc6455f1af434446806aa4ccc..20ea2fedc4d464cdd5403af28bc917770c993b98 100644 --- a/paddle/scripts/submit_local.sh.in +++ b/paddle/scripts/submit_local.sh.in @@ -28,6 +28,34 @@ function version(){ echo " with_predict_sdk: @WITH_PREDICT_SDK@" } +function ver2num() { + # convert version to number. + if [ -z "$1" ]; then # empty argument + printf "%03d%03d%03d%03d%03d" 0 + else + local VERN=$(echo $1 | sed 's#v##g' | sed 's#\.# #g' \ + | sed 's#a# 0 #g' | sed 's#b# 1 #g' | sed 's#rc# 2 #g') + if [ `echo $VERN | wc -w` -eq 3 ] ; then + printf "%03d%03d%03d%03d%03d" $VERN 999 999 + else + printf "%03d%03d%03d%03d%03d" $VERN + fi + fi +} + +PADDLE_CONF_HOME="$HOME/.config/paddle" +mkdir -p ${PADDLE_CONF_HOME} + +if [ -z "${PADDLE_NO_STAT+x}" ]; then + SERVER_VER=`curl -m 5 -X POST --data content="{ \"version\": \"@PADDLE_VERSION@\" }"\ + -b ${PADDLE_CONF_HOME}/paddle.cookie \ + -c ${PADDLE_CONF_HOME}/paddle.cookie \ + http://api.paddlepaddle.org/version 2>/dev/null` + if [ $? -eq 0 ] && [ "$(ver2num @PADDLE_VERSION@)" -lt $(ver2num $SERVER_VER) ]; then + echo "Paddle release a new version ${SERVER_VER}, you can get the install package in http://www.paddlepaddle.org" + fi +fi + MYDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"