diff --git a/WORKSPACE b/WORKSPACE deleted file mode 100644 index f097c41da85affd1ff0b24200dbdbc63bf9c3ab6..0000000000000000000000000000000000000000 --- a/WORKSPACE +++ /dev/null @@ -1,31 +0,0 @@ -# External dependency to Google protobuf. -http_archive( - name="protobuf", - url="http://github.com/google/protobuf/archive/v3.1.0.tar.gz", - sha256="0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7", - strip_prefix="protobuf-3.1.0") - -# External dependency to gtest 1.7.0. This method comes from -# https://www.bazel.io/versions/master/docs/tutorial/cpp.html. -new_http_archive( - name="gtest", - url="https://github.com/google/googletest/archive/release-1.7.0.zip", - sha256="b58cb7547a28b2c718d1e38aee18a3659c9e3ff52440297e965f5edffe34b6d0", - build_file="third_party/gtest.BUILD", - strip_prefix="googletest-release-1.7.0") - -# External dependency to gflags. This method comes from -# https://github.com/gflags/example/blob/master/WORKSPACE. -new_git_repository( - name="gflags", - tag="v2.2.0", - remote="https://github.com/gflags/gflags.git", - build_file="third_party/gflags.BUILD") - -# External dependency to glog. This method comes from -# https://github.com/reyoung/bazel_playground/blob/master/WORKSPACE -new_git_repository( - name="glog", - remote="https://github.com/google/glog.git", - commit="b6a5e0524c28178985f0d228e9eaa43808dbec3c", - build_file="third_party/glog.BUILD") diff --git a/demo/quick_start/api_predict.sh b/demo/quick_start/api_predict.sh index c90d3b70548b3ef2a7e0e423c74cd97f1886c0fc..4d9aa9e8854ed79446a47dbc593f419cdda077b4 100755 --- a/demo/quick_start/api_predict.sh +++ b/demo/quick_start/api_predict.sh @@ -17,7 +17,7 @@ set -e #Note the default model is pass-00002, you shold make sure the model path #exists or change the mode path. #only test on trainer_config.lr.py -model=output/pass-00001/ +model=output/model/pass-00001/ config=trainer_config.lr.py label=data/labels.list dict=data/dict.txt diff --git a/demo/quick_start/cluster/cluster_train.sh b/demo/quick_start/cluster/cluster_train.sh new file mode 100755 index 0000000000000000000000000000000000000000..aac9b89b14b98ac8e2db7def19e5f06c01682493 --- /dev/null +++ b/demo/quick_start/cluster/cluster_train.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -e + +# Should run pserver.sh before run this script. +bin_dir=$(cd `dirname $0`; pwd) +home_dir=$(cd "${bin_dir}/.."; pwd) +source "$bin_dir/env.sh" + +model_dir="$bin_dir/output" +log_file="$bin_dir/train.log" + +pushd "$home_dir" +cfg=trainer_config.lr.py +paddle train \ + --config=$cfg \ + --save_dir=${model_dir} \ + --trainer_count=4 \ + --local=0 \ + --log_period=100 \ + --num_passes=15 \ + --use_gpu=false \ + --show_parameter_stats_period=100 \ + --test_all_data_in_one_period=1 \ + --num_gradient_servers=1 \ + --nics=`get_nics` \ + --port=7164 \ + --ports_num=1 \ + --pservers="127.0.0.1" \ + --comment="paddle_trainer" \ + 2>&1 | tee "$log_file" +popd diff --git a/demo/quick_start/cluster/env.sh b/demo/quick_start/cluster/env.sh new file mode 100644 index 0000000000000000000000000000000000000000..a404993835d0e479f65c89c5561855293b7b66f0 --- /dev/null +++ b/demo/quick_start/cluster/env.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -e + +function get_nics() { + machine=`uname -s` + local nics="" + if [ "$machine" == "Linux" ]; then + nics="lo" + elif [ "$machine" == "Darwin" ]; then + nics="lo0" + else + nics="unsupport" + fi + echo $nics +} diff --git a/demo/quick_start/cluster/pserver.sh b/demo/quick_start/cluster/pserver.sh new file mode 100755 index 0000000000000000000000000000000000000000..b187c1d9b9108a607ed310253d54ecc096f0e792 --- /dev/null +++ b/demo/quick_start/cluster/pserver.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -e +bin_dir=$(cd `dirname $0`; pwd) +source "$bin_dir/env.sh" + +paddle pserver \ + --nics=`get_nics` \ + --port=7164 \ + --ports_num=1 \ + --ports_num_for_sparse=1 \ + --num_gradient_servers=1 \ + --comment="paddle_pserver" \ + 2>&1 | tee 'pserver.log' diff --git a/doc/getstarted/build_and_install/docker_install_en.rst b/doc/getstarted/build_and_install/docker_install_en.rst index 57725c0d85997a5b67e08ebbf0979fca69998988..34279a29b2e4c84aa5039f2e5ab2c6ed9a06da2f 100644 --- a/doc/getstarted/build_and_install/docker_install_en.rst +++ b/doc/getstarted/build_and_install/docker_install_en.rst @@ -65,16 +65,13 @@ The general development workflow with Docker and Bazel is as follows: --name paddle \ -p 2022:22 \ -v $PWD:/paddle \ - -v $HOME/.cache/bazel:/root/.cache/bazel \ paddle:dev where :code:`-d` makes the container running in background, :code:`--name paddle` allows us to run a nginx container to serve documents in this container, :code:`-p 2022:22` allows us to SSH into this container, :code:`-v $PWD:/paddle` shares the source code - on the host with the container, :code:`-v - $HOME/.cache/bazel:/root/.cache/bazel` shares Bazel cache on the - host with the container. + on the host with the container. 4. SSH into the container: @@ -94,13 +91,6 @@ The general development workflow with Docker and Bazel is as follows: make -j `nproc` CTEST_OUTPUT_ON_FAILURE=1 ctest - or Bazel in the container: - - .. code-block:: bash - - cd /paddle - bazel test ... - CPU-only and GPU Images ----------------------- diff --git a/doc/tutorials/gan/gan.png b/doc/tutorials/gan/gan.png new file mode 100644 index 0000000000000000000000000000000000000000..001ed6cc19e8911f9b10f63211c9658160b3a06e Binary files /dev/null and b/doc/tutorials/gan/gan.png differ diff --git a/doc/tutorials/gan/index_en.md b/doc/tutorials/gan/index_en.md new file mode 100644 index 0000000000000000000000000000000000000000..99c8d730117a469c89abb218eeacf66103c0cbed --- /dev/null +++ b/doc/tutorials/gan/index_en.md @@ -0,0 +1,143 @@ +# Generative Adversarial Networks (GAN) + +This demo implements GAN training described in the original [GAN paper](https://arxiv.org/abs/1406.2661) and deep convolutional generative adversarial networks [DCGAN paper](https://arxiv.org/abs/1511.06434). + +The high-level structure of GAN is shown in Figure. 1 below. It is composed of two major parts: a generator and a discriminator, both of which are based on neural networks. The generator takes in some kind of noise with a known distribution and transforms it into an image. The discriminator takes in an image and determines whether it is artificially generated by the generator or a real image. So the generator and the discriminator are in a competitive game in which generator is trying to generate image to look as real as possible to fool the discriminator, while the discriminator is trying to distinguish between real and fake images. + +<p align="center"> + <img src="./gan.png" width="500" height="300"> +</p> +<p align="center"> + Figure 1. GAN-Model-Structure + <a href="https://ishmaelbelghazi.github.io/ALI/">figure credit</a> +</p> + +The generator and discriminator take turn to be trained using SGD. The objective function of the generator is for its generated images being classified as real by the discriminator, and the objective function of the discriminator is to correctly classify real and fake images. When the GAN model is trained to converge to the equilibrium state, the generator will transform the given noise distribution to the distribution of real images, and the discriminator will not be able to distinguish between real and fake images at all. + +## Implementation of GAN Model Structure +Since GAN model involves multiple neural networks, it requires to use paddle python API. So the code walk-through below can also partially serve as an introduction to the usage of Paddle Python API. + +There are three networks defined in gan_conf.py, namely **generator_training**, **discriminator_training** and **generator**. The relationship to the model structure we defined above is that **discriminator_training** is the discriminator, **generator** is the generator, and the **generator_training** combined the generator and discriminator since training generator would require the discriminator to provide loss function. This relationship is described in the following code: +```python +if is_generator_training: + noise = data_layer(name="noise", size=noise_dim) + sample = generator(noise) + +if is_discriminator_training: + sample = data_layer(name="sample", size=sample_dim) + +if is_generator_training or is_discriminator_training: + label = data_layer(name="label", size=1) + prob = discriminator(sample) + cost = cross_entropy(input=prob, label=label) + classification_error_evaluator( + input=prob, label=label, name=mode + '_error') + outputs(cost) + +if is_generator: + noise = data_layer(name="noise", size=noise_dim) + outputs(generator(noise)) +``` + +In order to train the networks defined in gan_conf.py, one first needs to initialize a Paddle environment, parse the config, create GradientMachine from the config and create trainer from GradientMachine as done in the code chunk below: +```python +import py_paddle.swig_paddle as api +# init paddle environment +api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10', + '--log_period=100', '--gpu_id=' + args.gpu_id, + '--save_dir=' + "./%s_params/" % data_source) + +# Parse config +gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source) +dis_conf = parse_config(conf, "mode=discriminator_training,data=" + data_source) +generator_conf = parse_config(conf, "mode=generator,data=" + data_source) + +# Create GradientMachine +dis_training_machine = api.GradientMachine.createFromConfigProto( +dis_conf.model_config) +gen_training_machine = api.GradientMachine.createFromConfigProto( +gen_conf.model_config) +generator_machine = api.GradientMachine.createFromConfigProto( +generator_conf.model_config) + +# Create trainer +dis_trainer = api.Trainer.create(dis_conf, dis_training_machine) +gen_trainer = api.Trainer.create(gen_conf, gen_training_machine) +``` + +In order to balance the strength between generator and discriminator, we schedule to train whichever one is performing worse by comparing their loss function value. The loss function value can be calculated by a forward pass through the GradientMachine. +```python +def get_training_loss(training_machine, inputs): + outputs = api.Arguments.createArguments(0) + training_machine.forward(inputs, outputs, api.PASS_TEST) + loss = outputs.getSlotValue(0).copyToNumpyMat() + return numpy.mean(loss) +``` + +After training one network, one needs to sync the new parameters to the other networks. The code below demonstrates one example of such use case: +```python +# Train the gen_training +gen_trainer.trainOneDataBatch(batch_size, data_batch_gen) + +# Copy the parameters from gen_training to dis_training and generator +copy_shared_parameters(gen_training_machine, +dis_training_machine) +copy_shared_parameters(gen_training_machine, generator_machine) +``` + + +## A Toy Example +With the infrastructure explained above, we can now walk you through a toy example of generating two dimensional uniform distribution using 10 dimensional Gaussian noise. + +The Gaussian noises are generated using the code below: +```python +def get_noise(batch_size, noise_dim): + return numpy.random.normal(size=(batch_size, noise_dim)).astype('float32') +``` + +The real samples (2-D uniform) are generated using the code below: +```python +# synthesize 2-D uniform data in gan_trainer.py:114 +def load_uniform_data(): + data = numpy.random.rand(1000000, 2).astype('float32') + return data +``` + +The generator and discriminator network are built using fully-connected layer and batch_norm layer, and are defined in gan_conf.py. + +To train the GAN model, one can use the command below. The flag -d specifies the training data (cifar, mnist or uniform) and flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu). +```bash +$python gan_trainer.py -d uniform --useGpu 1 +``` +The generated samples can be found in ./uniform_samples/ and one example is shown below as Figure 2. One can see that it roughly recovers the 2D uniform distribution. + +<p align="center"> + <img src="./uniform_sample.png" width="300" height="300"> +</p> +<p align="center"> + Figure 2. Uniform Sample +</p> + +## MNIST Example +### Data preparation +To download the MNIST data, one can use the following commands: +```bash +$cd data/ +$./get_mnist_data.sh +``` + +### Model description +Following the DC-Gan paper (https://arxiv.org/abs/1511.06434), we use convolution/convolution-transpose layer in the discriminator/generator network to better deal with images. The details of the network structures are defined in gan_conf_image.py. + +### Training the model +To train the GAN model on mnist data, one can use the following command: +```bash +$python gan_trainer.py -d mnist --useGpu 1 +``` +The generated sample images can be found at ./mnist_samples/ and one example is shown below as Figure 3. +<p align="center"> + <img src="./mnist_sample.png" width="300" height="300"> +</p> +<p align="center"> + Figure 3. MNIST Sample +</p> diff --git a/doc/tutorials/gan/mnist_sample.png b/doc/tutorials/gan/mnist_sample.png new file mode 100644 index 0000000000000000000000000000000000000000..f9c7bf7ddd7f148eac4fe347e9c38afaa8876760 Binary files /dev/null and b/doc/tutorials/gan/mnist_sample.png differ diff --git a/doc/tutorials/gan/uniform_sample.png b/doc/tutorials/gan/uniform_sample.png new file mode 100644 index 0000000000000000000000000000000000000000..4a96c45cae82673f5a1df986f2643a8026da7937 Binary files /dev/null and b/doc/tutorials/gan/uniform_sample.png differ diff --git a/doc/tutorials/image_classification/index_cn.md b/doc/tutorials/image_classification/index_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..87f465522a0fa21c8c03754b4be8dcb035c4de81 --- /dev/null +++ b/doc/tutorials/image_classification/index_cn.md @@ -0,0 +1,205 @@ +图åƒåˆ†ç±»æ•™ç¨‹ +========== + +在本教程ä¸ï¼Œæˆ‘们将使用CIFAR-10æ•°æ®é›†è®ç»ƒä¸€ä¸ªå·ç§¯ç¥žç»ç½‘络,并使用这个神ç»ç½‘络æ¥å¯¹å›¾ç‰‡è¿›è¡Œåˆ†ç±»ã€‚如下图所示,å·ç§¯ç¥žç»ç½‘络å¯ä»¥è¾¨è¯†å›¾ç‰‡ä¸çš„主体,并给出分类结果。 +<center>![Image Classification](./image_classification.png)</center> + +## æ•°æ®å‡†å¤‡ +首先下载CIFAR-10æ•°æ®é›†ã€‚下é¢æ˜¯CIFAR-10æ•°æ®é›†çš„官方网å€ï¼š + +<https://www.cs.toronto.edu/~kriz/cifar.html> + +我们准备了一个脚本,å¯ä»¥ç”¨äºŽä»Žå®˜æ–¹ç½‘站上下载CIFAR-10æ•°æ®é›†ï¼Œè½¬ä¸ºjpeg文件并å˜å…¥ç‰¹å®šçš„目录。使用这个脚本å‰è¯·ç¡®è®¤å·²ç»å®‰è£…了pillowåŠç›¸å…³ä¾èµ–模å—。å¯ä»¥å‚照下é¢çš„命令进行安装: + +1. 安装pillow + +```bash +sudo apt-get install libjpeg-dev +pip install pillow +``` + +2. 下载数æ®é›† + +```bash +cd demo/image_classification/data/ +sh download_cifar.sh +``` + +CIFAR-10æ•°æ®é›†åŒ…å«60000å¼ 32x32的彩色图片。图片分为10类,æ¯ä¸ªç±»åŒ…å«6000å¼ ã€‚å…¶ä¸50000å¼ å›¾ç‰‡ä½œä¸ºè®ç»ƒé›†ï¼Œ10000å¼ ä½œä¸ºæµ‹è¯•é›†ã€‚ + +下图展示了所有的图片类别,æ¯ä¸ªç±»åˆ«ä¸éšæœºæŠ½å–了10å¼ å›¾ç‰‡ã€‚ +<center>![Image Classification](./cifar.png)</center> + +脚本è¿è¡Œå®ŒæˆåŽï¼Œæˆ‘们应当会得到一个å为cifar-out的文件夹,其下å文件夹的结构如下 + + +``` +train +---airplane +---automobile +---bird +---cat +---deer +---dog +---frog +---horse +---ship +---truck +test +---airplane +---automobile +---bird +---cat +---deer +---dog +---frog +---horse +---ship +---truck +``` + +cifar-out下包å«`train`å’Œ`test`两个文件夹,其ä¸åˆ†åˆ«åŒ…å«äº†CIFAR-10ä¸çš„è®ç»ƒé›†å’Œæµ‹è¯•é›†ã€‚这两个文件夹下å„自有10个å文件夹,æ¯ä¸ªå文件夹下å˜å‚¨ç›¸åº”分类的图片。将图片按照上述结构å˜å‚¨å¥½ä¹‹åŽï¼Œæˆ‘们就å¯ä»¥ç€æ‰‹å¯¹åˆ†ç±»æ¨¡åž‹è¿›è¡Œè®ç»ƒäº†ã€‚ + +## é¢„å¤„ç† +æ•°æ®ä¸‹è½½ä¹‹åŽï¼Œè¿˜éœ€è¦è¿›è¡Œé¢„处ç†ï¼Œå°†æ•°æ®è½¬æ¢ä¸ºPaddleçš„æ ¼å¼ã€‚我们å¯ä»¥é€šè¿‡å¦‚下命令进行预处ç†å·¥ä½œï¼š + +``` +cd demo/image_classification/ +sh preprocess.sh +``` + +å…¶ä¸`preprocess.sh` 调用 `./demo/image_classification/preprocess.py` å¯¹å›¾ç‰‡è¿›è¡Œé¢„å¤„ç† +```sh +export PYTHONPATH=$PYTHONPATH:../../ +data_dir=./data/cifar-out +python preprocess.py -i $data_dir -s 32 -c 1 +``` + +`./demo/image_classification/preprocess.py` 使用如下å‚数: + +- `-i` 或 `--input` 给出输入数æ®æ‰€åœ¨è·¯å¾„ï¼› +- `-s` 或 `--size` 给出图片尺寸; +- `-c` 或 `--color` æ ‡ç¤ºå›¾ç‰‡æ˜¯å½©è‰²å›¾æˆ–ç°åº¦å›¾ + +## 模型è®ç»ƒ +在开始è®ç»ƒä¹‹å‰ï¼Œæˆ‘们需è¦å…ˆåˆ›å»ºä¸€ä¸ªæ¨¡åž‹é…置文件。下é¢æˆ‘们给出了一个é…置示例。**注æ„**,这里的列出的和`vgg_16_cifar.py`文件ç¨æœ‰å·®åˆ«ï¼Œå› 为该文件å¯é€‚用于预测。 + +```python +from paddle.trainer_config_helpers import * +data_dir='data/cifar-out/batches/' +meta_path=data_dir+'batches.meta' +args = {'meta':meta_path, 'mean_img_size': 32, + 'img_size': 32, 'num_classes': 10, + 'use_jpeg': 1, 'color': "color"} +define_py_data_sources2(train_list=data_dir+"train.list", + test_list=data_dir+'test.list', + module='image_provider', + obj='processData', + args=args) +settings( + batch_size = 128, + learning_rate = 0.1 / 128.0, + learning_method = MomentumOptimizer(0.9), + regularization = L2Regularization(0.0005 * 128)) + +img = data_layer(name='image', size=3*32*32) +lbl = data_layer(name="label", size=10) +# small_vgg is predined in trainer_config_helpers.network +predict = small_vgg(input_image=img, num_channels=3) +outputs(classification_cost(input=predict, label=lbl)) +``` + +在第一行ä¸æˆ‘们载入用于定义网络的函数。 +```python +from paddle.trainer_config_helpers import * +``` + +之åŽå®šä¹‰çš„`define_py_data_sources2`使用Pythonæ•°æ®æä¾›å™¨ï¼Œå…¶ä¸ `args`将在`image_provider.py`进行使用,该文件负责产生图片数æ®å¹¶ä¼ 递给Paddle系统 + - `meta`: è®ç»ƒé›†å¹³å‡å€¼ã€‚ + - `mean_img_size`: å¹³å‡ç‰¹å¾å›¾çš„高度åŠå®½åº¦ã€‚ + - `img_size`:输入图片的高度åŠå®½åº¦ã€‚ + - `num_classes`:类别个数。 + - `use_jpeg`:处ç†è¿‡ç¨‹ä¸æ•°æ®å˜å‚¨æ ¼å¼ã€‚ + - `color`ï¼šæ ‡ç¤ºæ˜¯å¦ä¸ºå½©è‰²å›¾ç‰‡ã€‚ + + `settings`用于设置è®ç»ƒç®—法。在下é¢çš„例åä¸ï¼Œlearning rate被设置为0.1除以batch size,而weight decay则为0.0005乘以batch size。 + + ```python +settings( + batch_size = 128, + learning_rate = 0.1 / 128.0, + learning_method = MomentumOptimizer(0.9), + regularization = L2Regularization(0.0005 * 128) +) +``` + +`small_vgg`定义了网络结构。这里我们使用的是一个å°çš„VGG网络。关于VGGå·ç§¯ç¥žç»ç½‘络的æè¿°å¯ä»¥å‚考:[http://www.robots.ox.ac.uk/~vgg/research/very_deep/](http://www.robots.ox.ac.uk/~vgg/research/very_deep/)。 +```python +# small_vgg is predined in trainer_config_helpers.network +predict = small_vgg(input_image=img, num_channels=3) +``` +é…置创建完毕åŽï¼Œå¯ä»¥è¿è¡Œè„šæœ¬train.shæ¥è®ç»ƒæ¨¡åž‹ã€‚ + +```bash +config=vgg_16_cifar.py +output=./cifar_vgg_model +log=train.log + +paddle train \ +--config=$config \ +--dot_period=10 \ +--log_period=100 \ +--test_all_data_in_one_period=1 \ +--use_gpu=1 \ +--save_dir=$output \ +2>&1 | tee $log + +python -m paddle.utils.plotcurve -i $log > plot.png +``` +- 这里我们使用的是GPU模å¼è¿›è¡Œè®ç»ƒã€‚å¦‚æžœä½ æ²¡æœ‰GPU环境,å¯ä»¥è®¾ç½®`use_gpu=0`。 +- `./demo/image_classification/vgg_16_cifar.py`是网络和数æ®é…置文件。å„项å‚数的详细说明å¯ä»¥åœ¨å‘½ä»¤è¡Œå‚数相关文档ä¸æ‰¾åˆ°ã€‚ +- 脚本`plotcurve.py`ä¾èµ–于pythonçš„`matplotlib`模å—ã€‚å› æ¤å¦‚果这个脚本è¿è¡Œå¤±è´¥ï¼Œä¹Ÿè®¸æ˜¯å› 为需è¦å®‰è£…`matplotlib`。 +在è®ç»ƒå®ŒæˆåŽï¼Œè®ç»ƒåŠæµ‹è¯•è¯¯å·®æ›²çº¿å›¾ä¼šè¢«`plotcurve.py`脚本ä¿å˜åœ¨ `plot.png`ä¸ã€‚下é¢æ˜¯ä¸€ä¸ªè¯¯å·®æ›²çº¿å›¾çš„示例: + +<center>![Training and testing curves.](./plot.png)</center> + +## 预测 +在è®ç»ƒå®ŒæˆåŽï¼Œæ¨¡åž‹åŠå‚数会被ä¿å˜åœ¨è·¯å¾„`./cifar_vgg_model/pass-%05d`下。例如第300个pass的模型会被ä¿å˜åœ¨`./cifar_vgg_model/pass-00299`。 + +è¦å¯¹ä¸€ä¸ªå›¾ç‰‡çš„进行分类预测,我们å¯ä»¥ä½¿ç”¨`predict.sh`ï¼Œè¯¥è„šæœ¬å°†è¾“å‡ºé¢„æµ‹åˆ†ç±»çš„æ ‡ç¾ï¼š + +``` +sh predict.sh +``` + +predict.sh: +``` +model=cifar_vgg_model/pass-00299/ +image=data/cifar-out/test/airplane/seaplane_s_000978.png +use_gpu=1 +python prediction.py $model $image $use_gpu +``` + +## ç»ƒä¹ +在CUB-200æ•°æ®é›†ä¸Šä½¿ç”¨VGG模型è®ç»ƒä¸€ä¸ªé¸Ÿç±»å›¾ç‰‡åˆ†ç±»æ¨¡åž‹ã€‚相关的鸟类数æ®é›†å¯ä»¥ä»Žå¦‚下地å€ä¸‹è½½ï¼Œå…¶ä¸åŒ…å«äº†200ç§é¸Ÿç±»çš„照片(主è¦æ¥è‡ªåŒ—美洲)。 + +<http://www.vision.caltech.edu/visipedia/CUB-200.html> + + + + +## 细节探究 +### å·ç§¯ç¥žç»ç½‘络 +å·ç§¯ç¥žç»ç½‘络是一ç§ä½¿ç”¨å·ç§¯å±‚çš„å‰å‘神ç»ç½‘络,很适åˆæž„建用于ç†è§£å›¾ç‰‡å†…容的模型。一个典型的神ç»ç½‘络如下图所示: + +![Convolutional Neural Network](./lenet.png) + +一个å·ç§¯ç¥žç»ç½‘络包å«å¦‚下层: + +- å·ç§¯å±‚:通过å·ç§¯æ“作从图片或特å¾å›¾ä¸æå–ç‰¹å¾ +- æ± åŒ–å±‚ï¼šä½¿ç”¨max-pooling对特å¾å›¾ä¸‹é‡‡æ · +- 全连接层:使输入层到éšè—层的神ç»å…ƒæ˜¯å…¨éƒ¨è¿žæŽ¥çš„。 + +å·ç§¯ç¥žç»ç½‘络在图片分类上有ç€æƒŠäººçš„æ€§èƒ½ï¼Œè¿™æ˜¯å› ä¸ºå®ƒå‘掘出了图片的两类é‡è¦ä¿¡æ¯ï¼šå±€éƒ¨å…³è”性质和空间ä¸å˜æ€§è´¨ã€‚通过交替使用å·ç§¯å’Œæ± 化处ç†ï¼Œ å·ç§¯ç¥žç»ç½‘络能够很好的表示这两类信æ¯ã€‚ + +关于如何定义网络ä¸çš„层,以åŠå¦‚何在层之间进行连接,请å‚考Layer文档。 diff --git a/doc/tutorials/image_classification/index_en.md b/doc/tutorials/image_classification/index_en.md index 29cfc99702c362d1eaeeff5332f56122b8de337a..60c81a6a539944634773f38ec4c9a59709dd4afc 100644 --- a/doc/tutorials/image_classification/index_en.md +++ b/doc/tutorials/image_classification/index_en.md @@ -147,7 +147,7 @@ for classification. A description of VGG network can be found here [http://www.r # small_vgg is predined in trainer_config_helpers.network predict = small_vgg(input_image=img, num_channels=3) ``` -After writing the config, we can train the model by running the script train.sh. Notice that the following script assumes the you run the script in the `./demo/image_classification` folder. If you run the script in a different folder, you need to change the paths of the scripts and the configuration files accordingly. +After writing the config, we can train the model by running the script train.sh. ```bash config=vgg_16_cifar.py diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp index 0d1e17529611d11136914cb810b0633e0afccedf..66115f8293b905809639afff779abfdb2bb3a54e 100644 --- a/paddle/api/GradientMachine.cpp +++ b/paddle/api/GradientMachine.cpp @@ -68,6 +68,14 @@ void GradientMachine::start() { m->machine->start(); } void GradientMachine::finish() { m->machine->finish(); } +void GradientMachine::onPassEnd() { m->machine->onPassEnd(); } + +void GradientMachine::prefetch(const Arguments& inArgs) { + auto& in = + m->cast<std::vector<paddle::Argument>>(inArgs.getInternalArgumentsPtr()); + m->machine->prefetch(in); +} + void GradientMachine::forward(const Arguments& inArgs, Arguments* outArgs, PassType passType) { diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h index f70a8ce26b165f7989b3182ed0a2feb320572efb..81c9eed0bccd5ad63f524cdb011fc73cd568f465 100644 --- a/paddle/api/PaddleAPI.h +++ b/paddle/api/PaddleAPI.h @@ -20,15 +20,11 @@ limitations under the License. */ #include <string> #include <vector> #include "paddle/utils/GlobalConstants.h" -#include "paddle/utils/TypeDefs.h" +#include "paddle/utils/common.h" /// Import PaddlePaddle's enumeration into global namespace. using namespace paddle::enumeration_wrapper; // NOLINT -#define DISABLE_COPY_AND_ASSIGN(classname) \ - classname(const classname& other); \ - classname& operator=(const classname& other) - /** * @brief Initialize paddle. * @@ -102,7 +98,7 @@ const size_t NO_SPARSE_ID = -1UL; struct MatrixPrivate; class Matrix { Matrix(); // User Cannot Create Matrix. - DISABLE_COPY_AND_ASSIGN(Matrix); + DISABLE_COPY(Matrix); static Matrix* createByPaddleMatrixPtr(void* sharedPtr); public: @@ -242,7 +238,7 @@ private: struct VectorPrivate; class Vector { - DISABLE_COPY_AND_ASSIGN(Vector); + DISABLE_COPY(Vector); Vector(); static Vector* createByPaddleVectorPtr(void* ptr); @@ -322,7 +318,7 @@ private: struct IVectorPrivate; class IVector { IVector(); - DISABLE_COPY_AND_ASSIGN(IVector); + DISABLE_COPY(IVector); static IVector* createByPaddleVectorPtr(void* ptr); public: @@ -402,7 +398,7 @@ struct ArgumentsPrivate; class Arguments { private: Arguments(); // Internal Create. - DISABLE_COPY_AND_ASSIGN(Arguments); + DISABLE_COPY(Arguments); public: /** @@ -472,7 +468,7 @@ enum GradientMatchineCreateMode { struct ParameterConfigPrivate; class ParameterConfig { - DISABLE_COPY_AND_ASSIGN(ParameterConfig); + DISABLE_COPY(ParameterConfig); ParameterConfig(); /** @@ -502,7 +498,7 @@ private: struct OptimizationConfigPrivate; class OptimizationConfig { - DISABLE_COPY_AND_ASSIGN(OptimizationConfig); + DISABLE_COPY(OptimizationConfig); OptimizationConfig(); public: @@ -527,7 +523,7 @@ struct ParameterPrivate; class Parameter { private: Parameter(); - DISABLE_COPY_AND_ASSIGN(Parameter); + DISABLE_COPY(Parameter); public: virtual ~Parameter(); @@ -572,7 +568,7 @@ struct ModelConfigPrivate; class ModelConfig { private: ModelConfig(); - DISABLE_COPY_AND_ASSIGN(ModelConfig); + DISABLE_COPY(ModelConfig); public: virtual ~ModelConfig(); @@ -593,7 +589,7 @@ struct TrainerConfigPrivate; class TrainerConfig { private: TrainerConfig(); - DISABLE_COPY_AND_ASSIGN(TrainerConfig); + DISABLE_COPY(TrainerConfig); public: virtual ~TrainerConfig(); @@ -633,7 +629,7 @@ public: struct ParameterTraverseCallbackPrivate; class ParameterTraverseCallback { - DISABLE_COPY_AND_ASSIGN(ParameterTraverseCallback); + DISABLE_COPY(ParameterTraverseCallback); ParameterTraverseCallback(); public: @@ -655,7 +651,7 @@ private: */ struct ParameterOptimizerPrivate; class ParameterOptimizer { - DISABLE_COPY_AND_ASSIGN(ParameterOptimizer); + DISABLE_COPY(ParameterOptimizer); ParameterOptimizer(); public: @@ -692,7 +688,7 @@ struct GradientMachinePrivate; class GradientMachine { private: GradientMachine(); - DISABLE_COPY_AND_ASSIGN(GradientMachine); + DISABLE_COPY(GradientMachine); public: virtual ~GradientMachine(); @@ -725,6 +721,16 @@ public: void start(); + /** + * Prefetch row ids of sparse parameter. + */ + void prefetch(const Arguments& inArgs); + + /** + * Do some thing when train pass ended. + */ + void onPassEnd(); + /** * The forward stage of GradientMachine. * @@ -866,7 +872,7 @@ struct EvaluatorPrivate; class Evaluator { private: Evaluator(); - DISABLE_COPY_AND_ASSIGN(Evaluator); + DISABLE_COPY(Evaluator); public: ~Evaluator(); @@ -900,7 +906,7 @@ private: TrainerPrivate* m; Trainer(); Trainer(TrainerConfig* optConfig, GradientMachine* gm); - DISABLE_COPY_AND_ASSIGN(Trainer); + DISABLE_COPY(Trainer); public: virtual ~Trainer(); @@ -966,7 +972,7 @@ public: struct SequenceGeneratorPrivate; class SequenceGenerator { - DISABLE_COPY_AND_ASSIGN(SequenceGenerator); + DISABLE_COPY(SequenceGenerator); SequenceGenerator(); public: diff --git a/paddle/api/paddle_ld_flags.py b/paddle/api/paddle_ld_flags.py index 7c8206e3fe09704debf5268f02128cc59e72af8d..b4d27b1cc728f92b2210f30b69f3f5899fe81d65 100644 --- a/paddle/api/paddle_ld_flags.py +++ b/paddle/api/paddle_ld_flags.py @@ -141,9 +141,12 @@ try: def c_flag(self): if self.with_coverage: - return ["-fprofile-arcs", "-ftest-coverage", "-O0", "-g"] + return [ + "-fprofile-arcs", "-ftest-coverage", "-O0", "-g", + "-std=c++11" + ] else: - return None + return ["-std=c++11"] except ImportError: class PaddleLDFlag(object): diff --git a/paddle/cuda/include/hl_base.h b/paddle/cuda/include/hl_base.h index 84c5f2d5c91feb7896643d2c5f60a279ebe944e7..5b9884b786530aee91312c7547496c94cd7a89cb 100644 --- a/paddle/cuda/include/hl_base.h +++ b/paddle/cuda/include/hl_base.h @@ -16,7 +16,31 @@ limitations under the License. */ #define HL_BASE_H_ #include <cstddef> -#include "paddle/utils/TypeDefs.h" + +#ifdef PADDLE_TYPE_DOUBLE +#define HL_FLOAT_MAX 3.40282347e+38F +#define HL_FLOAT_MIN 1.17549435e-38F +using real = double; +#else +#define HL_FLOAT_MAX 1.7976931348623157e+308 +#define HL_FLOAT_MIN 2.2250738585072014e-308 +using real = float; +#endif + +/** + * The maximum input value for exp, used to avoid overflow problem. + * currently only used for tanh function. + */ +#define EXP_MAX_INPUT 40.0 + +/** + * @brief DIVUP(x, y) is similar to ceil(x / y). + * @note For CUDA, DIVUP will be used to specify + * the size of blockDim. + */ +#ifndef DIVUP +#define DIVUP(x, y) (((x) + (y)-1) / (y)) +#endif /** * HPPL is an internal high performance parallel computing library @@ -181,46 +205,6 @@ typedef struct { size_t nnz; } _hl_sparse_matrix_s, *hl_sparse_matrix_s; -#ifndef PADDLE_TYPE_DOUBLE -/** - * HPPL data type: real (float or double) - * - * if real == float - * - * HL_FLOAT_MAX: 3.40282347e+38F - * - * HL_FLOAT_MIN: 1.17549435e-38F - */ -#define HL_FLOAT_MAX 3.40282347e+38F -/** - * if real == double - * - * HL_FLOAT_MAX: 1.7976931348623157e+308 - * - * HL_FLOAT_MIN: 2.2250738585072014e-308 - */ -#define HL_FLOAT_MIN 1.17549435e-38F -#else -#define HL_FLOAT_MAX 1.7976931348623157e+308 -#define HL_FLOAT_MIN 2.2250738585072014e-308 -#endif - -/** - * The maximum input value for exp, used to avoid overflow problem. - * - * Currently only used for tanh function. - */ -#define EXP_MAX_INPUT 40.0 - -/** - * @brief DIVUP(x, y) is similar to ceil(x / y). - * @note For CUDA, DIVUP will be used to specify - * the size of blockDim. - */ -#ifndef DIVUP -#define DIVUP(x, y) (((x) + (y)-1) / (y)) -#endif - #ifdef __NVCC__ #include "cuda_runtime.h" diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc index c0c8b0e60dbde11fd7e8ce056df7e8a7862049d2..6198f067bab2ec790e641e77dce058fe6a52491a 100644 --- a/paddle/cuda/src/hl_cuda_cudnn.cc +++ b/paddle/cuda/src/hl_cuda_cudnn.cc @@ -14,11 +14,11 @@ limitations under the License. */ #include "hl_cuda_cudnn.h" #include <cudnn.h> +#include <gflags/gflags.h> #include <mutex> #include "hl_cuda_cudnn.ph" #include "hl_dso_loader.h" #include "hl_thread.ph" -#include "paddle/utils/CommandLineParser.h" #include "paddle/utils/Logging.h" DEFINE_int32(cudnn_conv_workspace_limit_in_mb, diff --git a/paddle/cuda/src/hl_dso_loader.cc b/paddle/cuda/src/hl_dso_loader.cc index 54c7620fc081f681d9d33bcd711008fa5029df05..c92909de534a875028d6d4784b02f08648c85a9a 100644 --- a/paddle/cuda/src/hl_dso_loader.cc +++ b/paddle/cuda/src/hl_dso_loader.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "hl_dso_loader.h" -#include "paddle/utils/CommandLineParser.h" +#include <gflags/gflags.h> #include "paddle/utils/Logging.h" DEFINE_string(cudnn_dir, diff --git a/paddle/gserver/dataproviders/DataProvider.h b/paddle/gserver/dataproviders/DataProvider.h index 9b7f7e36cedaa230ae0694d87cc033bd6fa6e652..5f031fc7c0761a8fe97eb16fe1dd8e0a1debfcdb 100644 --- a/paddle/gserver/dataproviders/DataProvider.h +++ b/paddle/gserver/dataproviders/DataProvider.h @@ -34,8 +34,8 @@ limitations under the License. */ #include "paddle/utils/Logging.h" #include "paddle/utils/Queue.h" #include "paddle/utils/ThreadLocal.h" -#include "paddle/utils/TypeDefs.h" #include "paddle/utils/Util.h" +#include "paddle/utils/common.h" namespace paddle { /** diff --git a/paddle/gserver/layers/BatchNormalizationLayer.h b/paddle/gserver/layers/BatchNormalizationLayer.h index 052c2077322be59f9d41966c1c8b6ab20c8f85bb..195acbbfc58db8368f6db1c1595dd6b04801ee26 100644 --- a/paddle/gserver/layers/BatchNormalizationLayer.h +++ b/paddle/gserver/layers/BatchNormalizationLayer.h @@ -58,6 +58,8 @@ protected: /// to batch, channels* imagePixels. void shrinkMat(const MatrixPtr& in, MatrixPtr& out); + void onPassEnd() { firstTest_ = true; } + MatrixPtr tmpMat_, tmpGrad_; MatrixPtr expandedIn_, expandedOut_; MatrixPtr expandedInGrad_, expandedOutGrad_, inGrad_; diff --git a/paddle/gserver/layers/GruCompute.h b/paddle/gserver/layers/GruCompute.h index 42c0019319ac9f20f9c3349fb2429c30f03d682b..a56af21317d1d43c836f7fe599a4dc614804bfec 100644 --- a/paddle/gserver/layers/GruCompute.h +++ b/paddle/gserver/layers/GruCompute.h @@ -16,7 +16,7 @@ limitations under the License. */ #include "ModelConfig.pb.h" #include "hl_gpu.h" -#include "paddle/utils/TypeDefs.h" +#include "paddle/utils/common.h" namespace paddle { diff --git a/paddle/gserver/layers/LstmCompute.h b/paddle/gserver/layers/LstmCompute.h index 140a4c6ecf5cfaf1045cec3ca2db5d4f2e54aca4..0d65b4158ebdc04f199048bbba98317c89fc8beb 100644 --- a/paddle/gserver/layers/LstmCompute.h +++ b/paddle/gserver/layers/LstmCompute.h @@ -16,7 +16,7 @@ limitations under the License. */ #include "ModelConfig.pb.h" #include "hl_gpu.h" -#include "paddle/utils/TypeDefs.h" +#include "paddle/utils/common.h" namespace paddle { diff --git a/paddle/gserver/layers/MultinomialSampler.h b/paddle/gserver/layers/MultinomialSampler.h index 677b047029305549084770bdb5eadfeaafbfac8a..b48073c80b6f57cd86ceb80b9d749548c3acc1ac 100644 --- a/paddle/gserver/layers/MultinomialSampler.h +++ b/paddle/gserver/layers/MultinomialSampler.h @@ -16,7 +16,7 @@ limitations under the License. */ #include <memory> #include <random> -#include "paddle/utils/TypeDefs.h" +#include "paddle/utils/common.h" namespace paddle { diff --git a/paddle/gserver/layers/RecurrentLayer.cpp b/paddle/gserver/layers/RecurrentLayer.cpp index 94b16996a86d2c52c8b97cbe009076fa3ade03f7..55e0fdfb9048c02b2dcd474c6887eee180328260 100644 --- a/paddle/gserver/layers/RecurrentLayer.cpp +++ b/paddle/gserver/layers/RecurrentLayer.cpp @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include <gflags/gflags.h> #include "Layer.h" #include "SequenceToBatch.h" -#include "paddle/utils/CommandLineParser.h" #include "paddle/utils/Stat.h" DEFINE_bool(rnn_use_batch, false, "Using the batch method for calculation."); diff --git a/paddle/gserver/tests/TestUtil.cpp b/paddle/gserver/tests/TestUtil.cpp index e07c60861a4a6567fd1e28559b9806cb623a3bdf..c691fe26255914811c8861cff80495c821990179 100644 --- a/paddle/gserver/tests/TestUtil.cpp +++ b/paddle/gserver/tests/TestUtil.cpp @@ -13,9 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "TestUtil.h" - +#include <gflags/gflags.h> #include "paddle/math/SparseMatrix.h" -#include "paddle/utils/CommandLineParser.h" DEFINE_int32(fixed_seq_length, 0, "Produce some sequence of fixed length"); diff --git a/paddle/math/BaseMatrix.h b/paddle/math/BaseMatrix.h index 2933c20fbad930248c41969d88d45cf397b9dcf8..8f9bc9e823eb8062535920361899ce3cc06ec3a7 100644 --- a/paddle/math/BaseMatrix.h +++ b/paddle/math/BaseMatrix.h @@ -16,7 +16,7 @@ limitations under the License. */ #include <stdint.h> #include <cstddef> #include "TensorExpression.h" -#include "paddle/utils/TypeDefs.h" +#include "paddle/utils/common.h" namespace paddle { diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 25ce09e346694298e4901e52ab1ec6a3a8044263..bda863de38675fe481544a7e82b69f445df361bd 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -27,7 +27,7 @@ limitations under the License. */ #include "MemoryHandle.h" #include "Vector.h" #include "paddle/utils/ThreadLocal.h" -#include "paddle/utils/TypeDefs.h" +#include "paddle/utils/common.h" namespace paddle { diff --git a/paddle/math/SparseRowMatrix.h b/paddle/math/SparseRowMatrix.h index 9364feb4a1462a5a9d16ca0f69213ba32ad97d21..778a9bd845661849261b52dcbeb519809d0c6306 100644 --- a/paddle/math/SparseRowMatrix.h +++ b/paddle/math/SparseRowMatrix.h @@ -14,10 +14,10 @@ limitations under the License. */ #pragma once +#include <gflags/gflags.h> #include <string.h> #include <algorithm> #include "Matrix.h" -#include "paddle/utils/CommandLineParser.h" #include "paddle/utils/Util.h" DECLARE_bool(allow_inefficient_sparse_update); diff --git a/paddle/math/TensorExpression.h b/paddle/math/TensorExpression.h index 9bd789e8c511f33d8415e421281e99eb10fc63fe..f3d60e400380f7d7d645559318837b0d7706661d 100644 --- a/paddle/math/TensorExpression.h +++ b/paddle/math/TensorExpression.h @@ -17,7 +17,7 @@ limitations under the License. */ #include <cstddef> #include "hl_tensor_ops.h" #include "paddle/utils/Logging.h" -#include "paddle/utils/TypeDefs.h" +#include "paddle/utils/common.h" namespace paddle { diff --git a/paddle/math/Vector.h b/paddle/math/Vector.h index 8a24103bd4107035c8068c24ec3be6ec06957112..b4347a70f874a2a1bf933bbea4d1b15385f36090 100644 --- a/paddle/math/Vector.h +++ b/paddle/math/Vector.h @@ -22,7 +22,7 @@ limitations under the License. */ #include "BaseMatrix.h" #include "MemoryHandle.h" #include "paddle/utils/Thread.h" -#include "paddle/utils/TypeDefs.h" +#include "paddle/utils/common.h" namespace paddle { diff --git a/paddle/parameter/ParallelParameter.h b/paddle/parameter/ParallelParameter.h index 417e386dc74d308a6c0aefa2640f0f37de8dbf1f..1ee220d2dc1a26b3f394ca673975cc827f450206 100644 --- a/paddle/parameter/ParallelParameter.h +++ b/paddle/parameter/ParallelParameter.h @@ -28,7 +28,7 @@ limitations under the License. */ #include "paddle/parameter/ParameterUpdateFunctions.h" #include "paddle/utils/Flags.h" #include "paddle/utils/Locks.h" -#include "paddle/utils/TypeDefs.h" +#include "paddle/utils/common.h" #include "ParameterConfig.pb.h" diff --git a/paddle/parameter/Parameter.cpp b/paddle/parameter/Parameter.cpp index 1673fc6e533e416dfe4db557a1a8968667d1bfff..29d6e20dc16968cdda3e79b66b0c81aaaf303ef4 100644 --- a/paddle/parameter/Parameter.cpp +++ b/paddle/parameter/Parameter.cpp @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Parameter.h" +#include <gflags/gflags.h> #include <fstream> #include "AverageOptimizer.h" #include "FirstOrderOptimizer.h" @@ -23,7 +24,6 @@ limitations under the License. */ #include "paddle/math/CpuSparseMatrix.h" #include "paddle/math/MathUtils.h" #include "paddle/math/SparseRowMatrix.h" -#include "paddle/utils/CommandLineParser.h" #include "paddle/utils/Logging.h" DEFINE_int32(enable_grad_share, diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h index 532c6770e596c33dfe7fd42f32157b2c6c19e18e..e05137b315f254795de26a5ff0ac977e7968f4d8 100644 --- a/paddle/parameter/Parameter.h +++ b/paddle/parameter/Parameter.h @@ -29,8 +29,8 @@ limitations under the License. */ #include "paddle/utils/GlobalConstants.h" #include "paddle/utils/Locks.h" #include "paddle/utils/ThreadLocal.h" -#include "paddle/utils/TypeDefs.h" #include "paddle/utils/Util.h" +#include "paddle/utils/common.h" namespace paddle { diff --git a/paddle/parameter/ParameterUpdateFunctions.h b/paddle/parameter/ParameterUpdateFunctions.h index 2d277e47e7eafc118fa37343e93e8a331a260aa9..2cb379871716ffd9e75eede607276b6b3f200e6b 100644 --- a/paddle/parameter/ParameterUpdateFunctions.h +++ b/paddle/parameter/ParameterUpdateFunctions.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/math/Vector.h" -#include "paddle/utils/TypeDefs.h" +#include "paddle/utils/common.h" namespace paddle { diff --git a/paddle/pserver/BaseClient.cpp b/paddle/pserver/BaseClient.cpp index b4ac7a2506921b2409baaff077cc3541f3dc8d73..0e031a7e20cbc975f0dc368fb1523c1f63d8646b 100644 --- a/paddle/pserver/BaseClient.cpp +++ b/paddle/pserver/BaseClient.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "BaseClient.h" +#include <gflags/gflags.h> #include <string.h> #include <vector> -#include "paddle/utils/CommandLineParser.h" #include "paddle/utils/Stat.h" DECLARE_string(pservers); diff --git a/paddle/pserver/BaseClient.h b/paddle/pserver/BaseClient.h index 262afafbe2d61305a158d945fac2d3b265012cbd..ccf05ae1ca3ab76fbe9d36237969207768de4dd2 100644 --- a/paddle/pserver/BaseClient.h +++ b/paddle/pserver/BaseClient.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/math/Matrix.h" #include "paddle/pserver/ProtoServer.h" #include "paddle/utils/Queue.h" -#include "paddle/utils/TypeDefs.h" +#include "paddle/utils/common.h" namespace paddle { diff --git a/paddle/pserver/LightNetwork.cpp b/paddle/pserver/LightNetwork.cpp index cbc105e651faa0f283b3becb10449f4e1bc78b38..8c8ba0a2e51b85bde0544c6780b07130336a6bdd 100644 --- a/paddle/pserver/LightNetwork.cpp +++ b/paddle/pserver/LightNetwork.cpp @@ -18,6 +18,7 @@ limitations under the License. */ #include <netinet/tcp.h> #include <sys/socket.h> #include <sys/types.h> +#include <chrono> #include <arpa/inet.h> #include <net/if.h> @@ -382,8 +383,20 @@ void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) { setOption(sockfd); /// Now connect to the server - PCHECK(connect(sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)) >= 0) - << "ERROR connecting to " << serverAddr; + int retry_second = 0; + int error = 0; + do { + error = connect(sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)); + if (error == ECONNREFUSED) { + LOG(WARNING) << "connection refused by pserver, try again!"; + if (retry_second++ >= 7) { + LOG(FATAL) << "connection refused by pserver, maybe pserver failed!"; + } + std::this_thread::sleep_for(std::chrono::seconds(1)); + } else { + PCHECK(error >= 0) << "ERROR connecting to " << serverAddr; + } + } while (error == ECONNREFUSED); channel_.reset(new SocketChannel(sockfd, serverAddr)); tcpRdma_ = F_TCP; diff --git a/paddle/pserver/ParameterClient2.h b/paddle/pserver/ParameterClient2.h index eed71ccb43b0fec76a74a7f00662c32c97c26ff4..70cfc6d70072f399ef97eef1a0e6111a127cbd9f 100644 --- a/paddle/pserver/ParameterClient2.h +++ b/paddle/pserver/ParameterClient2.h @@ -26,8 +26,8 @@ limitations under the License. */ #include "paddle/utils/Flags.h" #include "paddle/utils/Locks.h" #include "paddle/utils/Queue.h" -#include "paddle/utils/TypeDefs.h" #include "paddle/utils/Util.h" +#include "paddle/utils/common.h" #include "ParameterService.pb.h" diff --git a/paddle/pserver/ParameterServer2.h b/paddle/pserver/ParameterServer2.h index b0cf22e1fb158e76fcee1ce6ef1f375995803ce6..79d1eb97ff149f4f5ca9a924c1b0b7ba629f1e33 100644 --- a/paddle/pserver/ParameterServer2.h +++ b/paddle/pserver/ParameterServer2.h @@ -32,7 +32,7 @@ limitations under the License. */ #include "paddle/utils/Locks.h" #include "paddle/utils/Stat.h" #include "paddle/utils/ThreadLocal.h" -#include "paddle/utils/TypeDefs.h" +#include "paddle/utils/common.h" #include "ParameterService.pb.h" diff --git a/paddle/scripts/docker/Dockerfile b/paddle/scripts/docker/Dockerfile index f26055d0d4c99327580357f1118ae5eeca1c6d99..b01de499bd1fbcfff1f655535f574ae2caa17707 100644 --- a/paddle/scripts/docker/Dockerfile +++ b/paddle/scripts/docker/Dockerfile @@ -17,18 +17,6 @@ RUN cd /usr/src/gtest && cmake . && make && cp *.a /usr/lib RUN pip install -U BeautifulSoup docopt PyYAML pillow \ sphinx sphinx_rtd_theme recommonmark -# cmake tends to hide and blur the dependencies between code modules, as -# noted here https://github.com/PaddlePaddle/Paddle/issues/763. We are -# thinking about using Bazel to fix this problem, e.g., -# https://github.com/PaddlePaddle/Paddle/issues/681#issuecomment-263996102. To -# start the trail of fixing, we add Bazel to our Dockerfiles. -RUN apt-get update && apt-get install -y curl software-properties-common \ - && add-apt-repository ppa:webupd8team/java \ - && echo "oracle-java8-installer shared/accepted-oracle-license-v1-1 select true" | debconf-set-selections \ - && echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list \ - && curl https://bazel.build/bazel-release.pub.gpg | apt-key add - \ - && apt-get update && apt-get install -y oracle-java8-installer bazel - ARG WITH_AVX ARG WITH_DOC ARG WITH_SWIG_PY diff --git a/paddle/scripts/docker/Dockerfile.gpu b/paddle/scripts/docker/Dockerfile.gpu index d13b97714727acd6c1c57fb64603374faacc4fa5..a68cc79b84271c63d41a89494150381d96748b67 100644 --- a/paddle/scripts/docker/Dockerfile.gpu +++ b/paddle/scripts/docker/Dockerfile.gpu @@ -17,18 +17,6 @@ RUN cd /usr/src/gtest && cmake . && make && cp *.a /usr/lib RUN pip install -U BeautifulSoup docopt PyYAML pillow \ sphinx sphinx_rtd_theme recommonmark -# cmake tends to hide and blur the dependencies between code modules, as -# noted here https://github.com/PaddlePaddle/Paddle/issues/763. We are -# thinking about using Bazel to fix this problem, e.g., -# https://github.com/PaddlePaddle/Paddle/issues/681#issuecomment-263996102. To -# start the trail of fixing, we add Bazel to our Dockerfiles. -RUN apt-get update && apt-get install -y curl software-properties-common \ - && add-apt-repository ppa:webupd8team/java \ - && echo "oracle-java8-installer shared/accepted-oracle-license-v1-1 select true" | debconf-set-selections \ - && echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list \ - && curl https://bazel.build/bazel-release.pub.gpg | apt-key add - \ - && apt-get update && apt-get install -y oracle-java8-installer bazel - ARG WITH_AVX ARG WITH_DOC ARG WITH_SWIG_PY diff --git a/paddle/setup.py.in b/paddle/setup.py.in index b4c38a41b86683f89b6d02e9db97b75e9dca89ea..464ad632868bd1fd4d88547212421302ca0b2116 100644 --- a/paddle/setup.py.in +++ b/paddle/setup.py.in @@ -30,8 +30,10 @@ is_lin = (system == 'linux') # The extra links will passed from COMAKE # because generate paddle LDFLAGS is too complicated to do in setup.py # it just read COMAKE generated LDFLAGS. +extra_comps = [] extra_links = [] obj = api.paddle_ld_flags.PaddleLDFlag() +extra_comps = obj.c_flag() ldflags = obj.ldflag_str() if ldflags is not None: extra_links.extend(ldflags.split(" ")) @@ -51,20 +53,15 @@ elif is_osx == True: include_dirs = [np.get_include(), "../"] # include numpy and paddle. -extra_c = obj.c_flag() - -attr=dict() -if extra_c is not None: - attr["extra_compile_args"] = extra_c - setup(name="py_paddle", version="@PADDLE_VERSION@", ext_modules=[ Extension('py_paddle._swig_paddle', # Build SWIG Extension. ['Paddle_wrap.cxx'], + language = "c++", include_dirs = include_dirs, extra_link_args = extra_links, - **attr + extra_compile_args = extra_comps ) ], packages=['py_paddle'], diff --git a/paddle/trainer/ThreadParameterUpdater.h b/paddle/trainer/ThreadParameterUpdater.h index 880f1f9ddc49a1193ce23901419d988cae84eb88..bc08a9e9f0eda1cab7776ba76c67e88add1028a9 100644 --- a/paddle/trainer/ThreadParameterUpdater.h +++ b/paddle/trainer/ThreadParameterUpdater.h @@ -33,8 +33,8 @@ namespace paddle { because at the current moment, the merging on CPU is happening on the main thread, and the its parameter size can be much larger than the one GPU. Thus, for GPU, the parameter updates happens in updateImpl() function, which - is called by gradient machines as a callback function as a callback function - supplied to backward() and forwardBackward(). + is called by gradient machines as a callback function supplied to backward() + and forwardBackward(). For CPU, the parameter updates happens in separate threads maintained by this class. */ diff --git a/paddle/utils/CommandLineParser.cpp b/paddle/utils/CommandLineParser.cpp deleted file mode 100644 index 63f16bc54c575a0d5ae02141be3c467ee784b095..0000000000000000000000000000000000000000 --- a/paddle/utils/CommandLineParser.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "CommandLineParser.h" - -namespace paddle { -#ifndef GFLAGS_NS -#define GFLAGS_NS google -#endif - -namespace gflags_ns = GFLAGS_NS; - -void ParseCommandLineFlags(int* argc, char** argv, bool withHelp) { - if (withHelp) { - gflags_ns::ParseCommandLineFlags(argc, &argv, true); - } else { - gflags_ns::ParseCommandLineNonHelpFlags(argc, &argv, true); - } -} - -} // namespace paddle diff --git a/paddle/utils/CommandLineParser.h b/paddle/utils/CommandLineParser.h deleted file mode 100644 index 4e89f90bb910cee1adc7fb8dace81ff58435351f..0000000000000000000000000000000000000000 --- a/paddle/utils/CommandLineParser.h +++ /dev/null @@ -1,22 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include <gflags/gflags.h> - -namespace paddle { -void ParseCommandLineFlags(int* argc, char** argv, bool withHelp = true); - -} // namespace paddle diff --git a/paddle/utils/CpuId.h b/paddle/utils/CpuId.h index 7a354da75851ed7cca4e85e77714624634951f00..1218e8194c4e837ca880744f92e769a68ba474de 100644 --- a/paddle/utils/CpuId.h +++ b/paddle/utils/CpuId.h @@ -11,7 +11,7 @@ limitations under the License. */ #pragma once -#include "DisableCopy.h" +#include "common.h" namespace paddle { diff --git a/paddle/utils/CustomStackTrace.cpp b/paddle/utils/CustomStackTrace.cpp index 66b38218a7c7ec146f366ded516ebe22d012e47f..9723d7df9744989d9dd6035e51eae35764656065 100644 --- a/paddle/utils/CustomStackTrace.cpp +++ b/paddle/utils/CustomStackTrace.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "CustomStackTrace.h" +#include <gflags/gflags.h> #include <iostream> -#include "CommandLineParser.h" DEFINE_bool( layer_stack_error_only_current_thread, diff --git a/paddle/utils/DisableCopy.h b/paddle/utils/DisableCopy.h deleted file mode 100644 index 41de98bbde664651803c8db4c0cd7216b2ff4231..0000000000000000000000000000000000000000 --- a/paddle/utils/DisableCopy.h +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -/** - * Disable copy macro. - */ -#define DISABLE_COPY(CLASS_NAME) \ - CLASS_NAME(CLASS_NAME &&) = delete; \ - CLASS_NAME(const CLASS_NAME &other) = delete; \ - CLASS_NAME &operator=(const CLASS_NAME &other) = delete diff --git a/paddle/utils/Flags.h b/paddle/utils/Flags.h index 2ebbcb24eb061531d0807756528d7bf16e6aa124..3e72f8356d883b353127ccae80f2881320d20b2b 100644 --- a/paddle/utils/Flags.h +++ b/paddle/utils/Flags.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "CommandLineParser.h" +#include <gflags/gflags.h> DECLARE_bool(parallel_nn); DECLARE_int32(async_count); diff --git a/paddle/utils/Locks.h b/paddle/utils/Locks.h index 0f922f3548d97eb16ca897564faf1bf083f0d5ac..a21872e89ebc172b87c8b5c3731a89302f34f521 100644 --- a/paddle/utils/Locks.h +++ b/paddle/utils/Locks.h @@ -19,7 +19,7 @@ limitations under the License. */ #include <condition_variable> #include <mutex> -#include "DisableCopy.h" +#include "common.h" namespace paddle { diff --git a/paddle/utils/ThreadLocal.cpp b/paddle/utils/ThreadLocal.cpp index 75ccbd28cf21b7fafb43a072503dff14a29fec8a..d27dae33fd039bbefdbc65908e5ce7dc58eceab7 100644 --- a/paddle/utils/ThreadLocal.cpp +++ b/paddle/utils/ThreadLocal.cpp @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ThreadLocal.h" -#include "CommandLineParser.h" + +#include <gflags/gflags.h> + #include "Util.h" DEFINE_bool(thread_local_rand_use_global_seed, diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp index 7c0d66c488f5064641c53ea7995a75c330a3e49d..0f778dbebf4e124c7a240d738b8f73cef03fc477 100644 --- a/paddle/utils/Util.cpp +++ b/paddle/utils/Util.cpp @@ -24,10 +24,10 @@ limitations under the License. */ #include <fstream> #include <mutex> -#include "paddle/utils/Logging.h" +#include <gflags/gflags.h> -#include "CommandLineParser.h" #include "CustomStackTrace.h" +#include "Logging.h" #include "StringUtil.h" #include "Thread.h" #include "ThreadLocal.h" @@ -152,7 +152,12 @@ void initMain(int argc, char** argv) { line += ' '; } LOG(INFO) << "commandline: " << line; - ParseCommandLineFlags(&argc, argv, true); + +#ifndef GFLAGS_GFLAGS_H_ + namespace gflags = google; +#endif + + gflags::ParseCommandLineFlags(&argc, &argv, true); CHECK_EQ(argc, 1) << "Unknown commandline argument: " << argv[1]; installProfilerSwitch(); diff --git a/paddle/utils/Util.h b/paddle/utils/Util.h index 24ddde28e7e9f44c32d70e1b9621954ee77b2883..dc15ada5862d648af27aa1b0e8c8a5cce012ded8 100644 --- a/paddle/utils/Util.h +++ b/paddle/utils/Util.h @@ -26,13 +26,11 @@ limitations under the License. */ #include <unordered_map> #include <vector> -#include "CommandLineParser.h" -#include "DisableCopy.h" #include "Logging.h" #include "TrainerConfig.pb.h" +#include "common.h" #include "Flags.h" -#include "TypeDefs.h" #include "hl_gpu.h" /** diff --git a/paddle/utils/Version.h b/paddle/utils/Version.h index d1a07d9485076e5382d47f7408fcbf032166b1ed..aa5df3243893145dbcc7e7ef2592555fc1c88fc9 100644 --- a/paddle/utils/Version.h +++ b/paddle/utils/Version.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include <stddef.h> #include <iostream> -#include "TypeDefs.h" +#include "common.h" namespace paddle { diff --git a/paddle/utils/TypeDefs.h b/paddle/utils/common.h similarity index 71% rename from paddle/utils/TypeDefs.h rename to paddle/utils/common.h index c50a05e82daefd1273c896f3603957f4484ecd5d..202a9d980d8350c230daaf473dd34d4069479e5f 100644 --- a/paddle/utils/TypeDefs.h +++ b/paddle/utils/common.h @@ -14,13 +14,20 @@ limitations under the License. */ #pragma once +/** + * Disable copy macro. + */ +#define DISABLE_COPY(class_name) \ + class_name(class_name &&) = delete; \ + class_name(const class_name &other) = delete; \ + class_name &operator=(const class_name &other) = delete + namespace paddle { + #ifdef PADDLE_TYPE_DOUBLE -typedef double real; +using real = double; #else -typedef float real; +using real = float; #endif } // namespace paddle - -using paddle::real; diff --git a/paddle/utils/tests/test_CustomStackTrace.cpp b/paddle/utils/tests/test_CustomStackTrace.cpp index 2ce199837601755ac018889c07c223ad34c4a45b..18dd0aac4305006745dcd8e0a0717fb0fb939778 100644 --- a/paddle/utils/tests/test_CustomStackTrace.cpp +++ b/paddle/utils/tests/test_CustomStackTrace.cpp @@ -12,10 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include <gtest/gtest.h> #include <chrono> -#include "paddle/utils/CommandLineParser.h" +#include <gflags/gflags.h> +#include <gtest/gtest.h> + #include "paddle/utils/CustomStackTrace.h" #include "paddle/utils/Locks.h" #include "paddle/utils/Util.h" diff --git a/paddle/utils/tests/test_SpinLock.cpp b/paddle/utils/tests/test_SpinLock.cpp index 8351e7e3acd1afe1c6507ffced32f27ce065e5ce..605bedb6c912b0436f40e3eff93d5cf95d8dc489 100644 --- a/paddle/utils/tests/test_SpinLock.cpp +++ b/paddle/utils/tests/test_SpinLock.cpp @@ -12,9 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include <gtest/gtest.h> #include <vector> -#include "paddle/utils/CommandLineParser.h" + +#include <gflags/gflags.h> +#include <gtest/gtest.h> + #include "paddle/utils/Locks.h" #include "paddle/utils/Logging.h" #include "paddle/utils/Util.h" diff --git a/paddle/utils/tests/test_ThreadBarrier.cpp b/paddle/utils/tests/test_ThreadBarrier.cpp index 60c2214ffd1066ed4f7b95cd63dfe6a24fe66d67..1237f1b731b2fb733d6823619df2c574476b89de 100644 --- a/paddle/utils/tests/test_ThreadBarrier.cpp +++ b/paddle/utils/tests/test_ThreadBarrier.cpp @@ -12,10 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include <gtest/gtest.h> #include <set> #include <vector> -#include "paddle/utils/CommandLineParser.h" + +#include <gflags/gflags.h> +#include <gtest/gtest.h> + #include "paddle/utils/Locks.h" #include "paddle/utils/Logging.h" #include "paddle/utils/Util.h" diff --git a/python/paddle/trainer_config_helpers/__init__.py b/python/paddle/trainer_config_helpers/__init__.py index ef9859f8313efc09fe59f6eb5df92568e9413a40..13155ebddbb49c502d9d4110704ab09f49825be2 100644 --- a/python/paddle/trainer_config_helpers/__init__.py +++ b/python/paddle/trainer_config_helpers/__init__.py @@ -21,6 +21,5 @@ from networks import * from optimizers import * from attrs import * from config_parser_utils import * - # This will enable operator overload for LayerOutput -import math as layer_math +import layer_math diff --git a/python/paddle/trainer_config_helpers/attrs.py b/python/paddle/trainer_config_helpers/attrs.py index 59bb18bfcab30540bd38ca8d1cb300813d30fee8..bf0208834600fef3bcf1b0496da8f5f77aea44c5 100644 --- a/python/paddle/trainer_config_helpers/attrs.py +++ b/python/paddle/trainer_config_helpers/attrs.py @@ -19,34 +19,34 @@ __all__ = [ def convert_and_compare(x, Type): - """ - Convert x to be the same type as Type and then convert back to - check whether there is a loss of information - :param x: object to be checked - :param Type: target type to check x over - + """ + Convert x to be the same type as Type and then convert back to + check whether there is a loss of information + :param x: object to be checked + :param Type: target type to check x over + """ return type(x)(Type(x)) == x def is_compatible_with(x, Type): - """ - Check if x has a type compatible with Type - :param x: object to be checked - :param Type: target type to check x over - + """ + Check if x has a type compatible with Type + :param x: object to be checked + :param Type: target type to check x over + """ if type(x) == Type: return True try: if float == Type or int == Type: - # avoid those types that can be converted to float/int but not very - # meaningful and could potentially lead to error - # i.e., str and bool typed value should not be used for initializing float/int variable + # avoid those types that can be converted to float/int but not very + # meaningful and could potentially lead to error + # i.e., str and bool typed value should not be used for initializing float/int variable if not isinstance(x, str) and not isinstance(x, bool): return convert_and_compare(x, Type) elif bool == Type: - # should not use string type to initialize bool variable + # should not use string type to initialize bool variable if not isinstance(x, str): return convert_and_compare(x, Type) else: @@ -88,6 +88,10 @@ class ParameterAttribute(object): :type learning_rate: float or None :param momentum: The parameter momentum. None means use global value. :type momentum: float or None + :param gradient_clipping_threshold: gradient clipping threshold. If gradient + value larger than some value, will be + clipped. + :type gradient_clipping_threshold: float :param sparse_update: Enable sparse update for this parameter. It will enable both local and remote sparse update. :type sparse_update: bool @@ -104,6 +108,7 @@ class ParameterAttribute(object): l2_rate=None, learning_rate=None, momentum=None, + gradient_clipping_threshold=None, sparse_update=False): # initialize strategy. if is_static: @@ -152,6 +157,11 @@ class ParameterAttribute(object): self.attr['sparse_update'] = True self.attr['sparse_remote_update'] = True + if gradient_clipping_threshold is not None and \ + is_compatible_with(gradient_clipping_threshold, float): + self.attr['gradient_clipping_threshold'] = \ + gradient_clipping_threshold + def set_default_parameter_name(self, name): """ Set default parameter name. If parameter not set, then will use default diff --git a/python/paddle/trainer_config_helpers/math.py b/python/paddle/trainer_config_helpers/layer_math.py similarity index 100% rename from python/paddle/trainer_config_helpers/math.py rename to python/paddle/trainer_config_helpers/layer_math.py diff --git a/third_party/gflags.BUILD b/third_party/gflags.BUILD deleted file mode 100644 index 85e8bd0bd74942102e5e9a9f817dc49383a745e7..0000000000000000000000000000000000000000 --- a/third_party/gflags.BUILD +++ /dev/null @@ -1,12 +0,0 @@ -# Bazel (http://bazel.io/) BUILD file for gflags. -# -# See INSTALL.md for instructions for adding gflags to a Bazel workspace. - -licenses(["notice"]) - -exports_files(["src/gflags_complections.sh", "COPYING.txt"]) - -load(":bazel/gflags.bzl", "gflags_sources", "gflags_library") -(hdrs, srcs) = gflags_sources(namespace=["google", "gflags"]) -gflags_library(hdrs=hdrs, srcs=srcs, threads=0) -gflags_library(hdrs=hdrs, srcs=srcs, threads=1) diff --git a/third_party/gflags_test/BUILD b/third_party/gflags_test/BUILD deleted file mode 100644 index b50615203ba17c74a4c7611b685f3d3210389bbf..0000000000000000000000000000000000000000 --- a/third_party/gflags_test/BUILD +++ /dev/null @@ -1,10 +0,0 @@ -licenses(["notice"]) # Apache 2.0 - -cc_test( - name="gflags_test", - srcs=["gflags_test.cc"], - copts=["-Iexternal/gtest/include"], - deps=[ - "@gtest//:gtest", - "@gflags//:gflags", - ], ) diff --git a/third_party/gflags_test/gflags_test.cc b/third_party/gflags_test/gflags_test.cc deleted file mode 100644 index 53286e7e5be062cf66b37d07047b173ea831e6c4..0000000000000000000000000000000000000000 --- a/third_party/gflags_test/gflags_test.cc +++ /dev/null @@ -1,33 +0,0 @@ -#include <iostream> -#include <string> - -#include "gflags/gflags.h" -#include "gtest/gtest.h" - -DEFINE_bool(verbose, false, "Display program name before message"); -DEFINE_string(message, "Hello world!", "Message to print"); - -static bool IsNonEmptyMessage(const char *flagname, const std::string &value) { - return value[0] != '\0'; -} -DEFINE_validator(message, &IsNonEmptyMessage); - -namespace third_party { -namespace gflags_test { - -TEST(GflagsTest, ParseAndPrint) { - gflags::SetUsageMessage("some usage message"); - gflags::SetVersionString("1.0.0"); - int argc = 1; - char program_name[] = "gflags_test"; - char **argv = new char *[2]; - argv[0] = program_name; - argv[1] = NULL; - gflags::ParseCommandLineFlags(&argc, reinterpret_cast<char ***>(&argv), true); - EXPECT_EQ("gflags_test", std::string(gflags::ProgramInvocationShortName())); - EXPECT_EQ("Hello world!", FLAGS_message); - gflags::ShutDownCommandLineFlags(); -} - -} // namespace gflags_test -} // namespace third_party diff --git a/third_party/glog.BUILD b/third_party/glog.BUILD deleted file mode 100644 index a0ff1d6b416c2217b62f64bceee3c6a611c11dfe..0000000000000000000000000000000000000000 --- a/third_party/glog.BUILD +++ /dev/null @@ -1,128 +0,0 @@ -licenses(["notice"]) - -cc_library( - visibility=["//visibility:public"], - name="glog", - includes=[ - ".", - "src", - ], - copts=[ - "-D_START_GOOGLE_NAMESPACE_='namespace google {'", - "-D_END_GOOGLE_NAMESPACE_='}'", - "-DGOOGLE_NAMESPACE='google'", - "-DGOOGLE_GLOG_DLL_DECL=''", - "-DHAVE_DLADDR", - "-DHAVE_SNPRINTF", - "-DHAVE_DLFCN_H", - "-DHAVE_FCNTL", - "-DHAVE_GLOB_H", - "-DHAVE_INTTYPES_H", - "-DHAVE_LIBPTHREAD", - "-DHAVE_SYS_SYSCALL_H", - "-DHAVE_MEMORY_H", - "-DHAVE_NAMESPACES", - "-DHAVE_PREAD", - "-DHAVE_PTHREAD", - "-DHAVE_PWD_H", - "-DHAVE_PWRITE", - "-DHAVE_RWLOCK", - "-DHAVE_SIGACTION", - "-DHAVE_SIGALTSTACK", - "-DHAVE_STDINT_H", - "-DHAVE_STRING_H", - "-DHAVE_SYS_TIME_H", - "-DHAVE_SYS_TYPES_H", - "-DHAVE_SYS_UCONTEXT_H", - "-DHAVE_SYS_UTSNAME_H", - "-DHAVE_UNISTD_H", - "-DHAVE_USING_OPERATOR", - "-DHAVE_HAVE___ATTRIBUTE___", - "-DHAVE_HAVE___BUILTIN_EXPECT", - #"-DNO_FRAME_POINTER", - "-D_GNU_SOURCE", - #"-fno-sanitize=thread", - #"-fno-sanitize=address", - "-Iexternal/glog/src", - ], - srcs=[ - "src/demangle.cc", - "src/logging.cc", - "src/raw_logging.cc", - "src/signalhandler.cc", - "src/symbolize.cc", - "src/utilities.cc", - "src/vlog_is_on.cc", - ":config_h", - ":logging_h", - ":raw_logging_h", - ":stl_logging_h", - ":vlog_is_on_h", - ], - hdrs=[ - "src/demangle.h", - "src/mock-log.h", - "src/stacktrace.h", - "src/symbolize.h", - "src/utilities.h", - "src/base/commandlineflags.h", - "src/base/googleinit.h", - "src/base/mutex.h", - "src/glog/log_severity.h", - ]) - -genrule( - name="config_h", - srcs=["src/config.h.cmake.in"], - outs=["config.h"], - cmd="awk '{ gsub(/^#cmakedefine/, \"//cmakedefine\"); print; }' $(<) > $(@)", -) - -genrule( - name="logging_h", - srcs=["src/glog/logging.h.in"], - outs=["glog/logging.h"], - cmd="$(location :gen_sh) < $(<) > $(@)", - tools=[":gen_sh"]) - -genrule( - name="raw_logging_h", - srcs=["src/glog/raw_logging.h.in"], - outs=["glog/raw_logging.h"], - cmd="$(location :gen_sh) < $(<) > $(@)", - tools=[":gen_sh"]) - -genrule( - name="stl_logging_h", - srcs=["src/glog/stl_logging.h.in"], - outs=["glog/stl_logging.h"], - cmd="$(location :gen_sh) < $(<) > $(@)", - tools=[":gen_sh"]) - -genrule( - name="vlog_is_on_h", - srcs=["src/glog/vlog_is_on.h.in"], - outs=["glog/vlog_is_on.h"], - cmd="$(location :gen_sh) < $(<) > $(@)", - tools=[":gen_sh"]) - -genrule( - name="gen_sh", - outs=["gen.sh"], - cmd=""" -cat > $@ <<"EOF" -#! /bin/sh -sed -e 's/@ac_cv_have_unistd_h@/1/g' \ - -e 's/@ac_cv_have_stdint_h@/1/g' \ - -e 's/@ac_cv_have_systypes_h@/1/g' \ - -e 's/@ac_cv_have_libgflags_h@/1/g' \ - -e 's/@ac_cv_have_uint16_t@/1/g' \ - -e 's/@ac_cv_have___builtin_expect@/1/g' \ - -e 's/@ac_cv_have_.*@/0/g' \ - -e 's/@ac_google_start_namespace@/namespace google {/g' \ - -e 's/@ac_google_end_namespace@/}/g' \ - -e 's/@ac_google_namespace@/google/g' \ - -e 's/@ac_cv___attribute___noinline@/__attribute__((noinline))/g' \ - -e 's/@ac_cv___attribute___noreturn@/__attribute__((noreturn))/g' \ - -e 's/@ac_cv___attribute___printf_4_5@/__attribute__((__format__ (__printf__, 4, 5)))/g' -EOF""") diff --git a/third_party/glog_test/BUILD b/third_party/glog_test/BUILD deleted file mode 100644 index 56d08e95f8e8f063829ae68586fa9ef53306fef6..0000000000000000000000000000000000000000 --- a/third_party/glog_test/BUILD +++ /dev/null @@ -1,10 +0,0 @@ -licenses(["notice"]) # Apache 2.0 - -cc_test( - name="glog_test", - srcs=["glog_test.cc"], - copts=["-Iexternal/gtest/include"], - deps=[ - "@gtest//:gtest", - "@glog//:glog", - ], ) diff --git a/third_party/glog_test/glog_test.cc b/third_party/glog_test/glog_test.cc deleted file mode 100644 index f1d737d625d25e8675f636075876903c42881a35..0000000000000000000000000000000000000000 --- a/third_party/glog_test/glog_test.cc +++ /dev/null @@ -1,7 +0,0 @@ -#include <iostream> -#include <string> - -#include "glog/logging.h" -#include "gtest/gtest.h" - -TEST(GlogTest, Logging) { LOG(INFO) << "Hello world"; } diff --git a/third_party/gtest.BUILD b/third_party/gtest.BUILD deleted file mode 100644 index 9255b51d9aaa9c7ee5cbc1b2d537815c7ecbfcba..0000000000000000000000000000000000000000 --- a/third_party/gtest.BUILD +++ /dev/null @@ -1,8 +0,0 @@ -cc_library( - name="gtest", - srcs=glob( - ["src/*.cc"], exclude=["src/gtest-all.cc"]), - hdrs=glob(["include/**/*.h", "src/*.h"]), - copts=["-Iexternal/gtest/include"], - linkopts=["-pthread"], - visibility=["//visibility:public"], ) diff --git a/third_party/protobuf_test/BUILD b/third_party/protobuf_test/BUILD deleted file mode 100644 index 67d4293c70eef081f6bb95de9774613a19ba91dd..0000000000000000000000000000000000000000 --- a/third_party/protobuf_test/BUILD +++ /dev/null @@ -1,24 +0,0 @@ -licenses(["notice"]) # Apache 2.0 - -load("@protobuf//:protobuf.bzl", "cc_proto_library") - -cc_proto_library( - name="example_proto", - srcs=["example.proto"], - protoc="@protobuf//:protoc", - default_runtime="@protobuf//:protobuf", ) - -cc_library( - name="example_lib", - srcs=["example_lib.cc"], - hdrs=["example_lib.h"], - deps=[":example_proto"], ) - -cc_test( - name="example_lib_test", - srcs=["example_lib_test.cc"], - copts=["-Iexternal/gtest/include"], - deps=[ - "@gtest//:gtest", - ":example_lib", - ], ) diff --git a/third_party/protobuf_test/README.md b/third_party/protobuf_test/README.md deleted file mode 100644 index e8bdeee6fee66ef79d0b813b4d8dfa4c180754c6..0000000000000000000000000000000000000000 --- a/third_party/protobuf_test/README.md +++ /dev/null @@ -1 +0,0 @@ -This package tests that Bazel can build protobuf related rules. diff --git a/third_party/protobuf_test/example.proto b/third_party/protobuf_test/example.proto deleted file mode 100644 index 6a7eada9c14a9df5d3ef8971b636c14a11da3d11..0000000000000000000000000000000000000000 --- a/third_party/protobuf_test/example.proto +++ /dev/null @@ -1,7 +0,0 @@ -syntax = "proto3"; - -package third_party.protobuf_test; - -message Greeting { - string name = 1; -} diff --git a/third_party/protobuf_test/example_lib.cc b/third_party/protobuf_test/example_lib.cc deleted file mode 100644 index ced377bc0a17dde31c5c853dec1a852fa0be7223..0000000000000000000000000000000000000000 --- a/third_party/protobuf_test/example_lib.cc +++ /dev/null @@ -1,9 +0,0 @@ -#include "third_party/protobuf_test/example_lib.h" - -namespace third_party { -namespace protobuf_test { - -std::string get_greet(const Greeting& who) { return "Hello " + who.name(); } - -} // namespace protobuf_test -} // namespace thrid_party diff --git a/third_party/protobuf_test/example_lib.h b/third_party/protobuf_test/example_lib.h deleted file mode 100644 index 516326e812e19eb162f5392b519904a65c66c660..0000000000000000000000000000000000000000 --- a/third_party/protobuf_test/example_lib.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include "third_party/protobuf_test/example.pb.h" - -#include <string> - -namespace third_party { -namespace protobuf_test { - -std::string get_greet(const Greeting &who); - -} // namespace protobuf_test -} // namespace third_party diff --git a/third_party/protobuf_test/example_lib_test.cc b/third_party/protobuf_test/example_lib_test.cc deleted file mode 100644 index 6229f56e6026908fff991765bd6bdaff6f8236ac..0000000000000000000000000000000000000000 --- a/third_party/protobuf_test/example_lib_test.cc +++ /dev/null @@ -1,15 +0,0 @@ -#include "third_party/protobuf_test/example_lib.h" - -#include "gtest/gtest.h" - -namespace third_party { -namespace protobuf_test { - -TEST(ProtobufTest, GetGreet) { - Greeting g; - g.set_name("Paddle"); - EXPECT_EQ("Hello Paddle", get_greet(g)); -} - -} // namespace protobuf_test -} // namespace third_party