diff --git a/benchmark/IntelOptimizedPaddle.md b/benchmark/IntelOptimizedPaddle.md index 040f5ffa41968cbf93a817faa1db86c18956341e..16c2390fd31bf1c79f29735fb98180d3f7302eb2 100644 --- a/benchmark/IntelOptimizedPaddle.md +++ b/benchmark/IntelOptimizedPaddle.md @@ -12,11 +12,11 @@ Machine: System: CentOS release 6.3 (Final), Docker 1.12.1. -PaddlePaddle: paddlepaddle/paddle:latest (TODO: will rerun after 0.11.0) - -- MKL-DNN tag v0.10 -- MKLML 2018.0.20170720 +PaddlePaddle: paddlepaddle/paddle:latest (for MKLML and MKL-DNN), paddlepaddle/paddle:latest-openblas (for OpenBLAS) +- MKL-DNN tag v0.11 +- MKLML 2018.0.1.20171007 - OpenBLAS v0.2.20 +(TODO: will rerun after 0.11.0) On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively. @@ -31,17 +31,37 @@ Input image size - 3 * 224 * 224, Time: images/second | BatchSize | 64 | 128 | 256 | |--------------|-------| -----| --------| -| OpenBLAS | 7.82 | 8.62 | 10.34 | -| MKLML | 11.02 | 12.86 | 15.33 | -| MKL-DNN | 27.69 | 28.8 | 29.27 | +| OpenBLAS | 7.80 | 9.00 | 10.80 | +| MKLML | 12.12 | 13.70 | 16.18 | +| MKL-DNN | 28.46 | 29.83 | 30.44 | + + +chart on batch size 128 +TBD + + - ResNet-50 + +| BatchSize | 64 | 128 | 256 | +|--------------|-------| ------| -------| +| OpenBLAS | 25.22 | 25.68 | 27.12 | +| MKLML | 32.52 | 31.89 | 33.12 | +| MKL-DNN | 81.69 | 82.35 | 84.08 | chart on batch size 128 TBD - - ResNet - GoogLeNet +| BatchSize | 64 | 128 | 256 | +|--------------|-------| ------| -------| +| OpenBLAS | 89.52 | 96.97 | 108.25 | +| MKLML | 128.46| 137.89| 158.63 | +| MKL-DNN     | 250.46| 264.83| 269.50 | + +chart on batch size 128 +TBD + ### Laptop TBD ### Desktop diff --git a/benchmark/paddle/image/googlenet.py b/benchmark/paddle/image/googlenet.py index bc893bab98c4d2e07c62fbd012d51a0939db4766..a88ecac67d9e677f14f6dc24ba9a337b1245243f 100644 --- a/benchmark/paddle/image/googlenet.py +++ b/benchmark/paddle/image/googlenet.py @@ -5,6 +5,7 @@ height = 224 width = 224 num_class = 1000 batch_size = get_config_arg('batch_size', int, 128) +use_gpu = get_config_arg('use_gpu', bool, True) args = {'height': height, 'width': width, 'color': True, 'num_class': num_class} define_py_data_sources2( @@ -16,6 +17,8 @@ settings( learning_method=MomentumOptimizer(0.9), regularization=L2Regularization(0.0005 * batch_size)) +conv_projection = conv_projection if use_gpu else img_conv_layer + def inception2(name, input, channels, \ filter1, filter3R, filter3, @@ -138,7 +141,7 @@ def inception(name, input, channels, \ cat = concat_layer( name=name, input=[cov1, cov3, cov5, covprj], - bias_attr=True, + bias_attr=True if use_gpu else False, act=ReluActivation()) return cat diff --git a/benchmark/paddle/image/run_mkldnn.sh b/benchmark/paddle/image/run_mkldnn.sh index 3cc779b48d082985f75ab1c053fbe262bc6d58aa..f768f6c29a84b40f917e0ccfde4d8c15f65c818b 100755 --- a/benchmark/paddle/image/run_mkldnn.sh +++ b/benchmark/paddle/image/run_mkldnn.sh @@ -40,6 +40,7 @@ fi for use_mkldnn in True False; do for batchsize in 64 128 256; do train vgg 19 $batchsize $use_mkldnn - train resnet 50 $batchsize $use_mkldnn + train resnet 50 $batchsize $use_mkldnn + train googlenet v1 $batchsize $use_mkldnn done done diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake index c819eb4d70898e48eab499c666168d78262d4240..d4f252bb9f64c8db82b841fedf0817f5d8596501 100644 --- a/cmake/external/gflags.cmake +++ b/cmake/external/gflags.cmake @@ -28,15 +28,8 @@ INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR}) ExternalProject_Add( extern_gflags ${EXTERNAL_PROJECT_LOG_ARGS} - # TODO(yiwang): The annoying warnings mentioned in - # https://github.com/PaddlePaddle/Paddle/issues/3277 are caused by - # gflags. I fired a PR https://github.com/gflags/gflags/pull/230 - # to fix it. Before it gets accepted by the gflags team, we use - # my personal fork, which contains above fix, temporarily. Let's - # change this back to the official Github repo once my PR is - # merged. - GIT_REPOSITORY "https://github.com/wangkuiyi/gflags.git" - GIT_TAG 986964c07427ecb9cdb5bd73f73ebbd40e54dadb + GIT_REPOSITORY "https://github.com/gflags/gflags.git" + GIT_TAG 77592648e3f3be87d6c7123eb81cbad75f9aef5a PREFIX ${GFLAGS_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/cmake/generic.cmake b/cmake/generic.cmake index b9c1dde97bc444d793d67ff622fd6b13c6435a9a..7b82d409a3b64a5fc8fdfe526a2e82a4e1c9fa8e 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -459,11 +459,11 @@ function(py_test TARGET_NAME) if(WITH_TESTING) set(options STATIC static SHARED shared) set(oneValueArgs "") - set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(multiValueArgs SRCS DEPS ARGS) + cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} COMMAND env PYTHONPATH=${PADDLE_PYTHON_BUILD_DIR}/lib-python - python2 ${py_test_SRCS} + ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() endfunction() diff --git a/cmake/util.cmake b/cmake/util.cmake index ad905ab55ba3537054fa5b30b5fca4d83c406702..0dc33ce385175d1e2dc454d41db467d4b9d9cf9a 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -168,17 +168,3 @@ function(create_resources res_file output_file) COMMAND python ARGS ${PADDLE_SOURCE_DIR}/cmake/make_resource.py ${res_file} ${output_file} DEPENDS ${res_file} ${PADDLE_SOURCE_DIR}/cmake/make_resource.py) endfunction() - - -# Create a python unittest using run_python_tests.sh, -# which takes care of making correct running environment -function(add_python_test TEST_NAME) - foreach(arg ${ARGN}) - get_filename_component(py_fn ${arg} NAME_WE) - set(TRG_NAME ${TEST_NAME}_${py_fn}) - add_test(NAME ${TRG_NAME} - COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR} - python2 ${arg} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - endforeach() -endfunction() diff --git a/doc/design/reader/README.md b/doc/design/reader/README.md index 320dccec3ddc7bfe6042f4e65b2518ea7b1ad24a..2cd4b6225b61cf374458e40afabad7745f61ba71 100644 --- a/doc/design/reader/README.md +++ b/doc/design/reader/README.md @@ -1,25 +1,25 @@ # Python Data Reader Design Doc -At training and testing time, PaddlePaddle programs need to read data. To ease the users' work to write data reading code, we define that +During the training and testing phases, PaddlePaddle programs need to read data. To help the users write code that performs reading input data, we define the following: -- A *reader* is a function that reads data (from file, network, random number generator, etc) and yields data items. -- A *reader creator* is a function that returns a reader function. -- A *reader decorator* is a function, which accepts one or more readers, and returns a reader. -- A *batch reader* is a function that reads data (from *reader*, file, network, random number generator, etc) and yields a batch of data items. +- A *reader*: A function that reads data (from file, network, random number generator, etc) and yields the data items. +- A *reader creator*: A function that returns a reader function. +- A *reader decorator*: A function, which takes in one or more readers, and returns a reader. +- A *batch reader*: A function that reads data (from *reader*, file, network, random number generator, etc) and yields a batch of data items. -and provide function which converts reader to batch reader, frequently used reader creators and reader decorators. +and also provide a function which can convert a reader to a batch reader, frequently used reader creators and reader decorators. ## Data Reader Interface -Indeed, *data reader* doesn't have to be a function that reads and yields data items. It can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`): +*Data reader* doesn't have to be a function that reads and yields data items. It can just be any function without any parameters that creates an iterable (anything can be used in `for x in iterable`) as follows: ``` iterable = data_reader() ``` -Element produced from the iterable should be a **single** entry of data, **not** a mini batch. That entry of data could be a single item, or a tuple of items. Item should be of [supported type](http://www.paddlepaddle.org/doc/ui/data_provider/pydataprovider2.html?highlight=dense_vector#input-types) (e.g., numpy 1d array of float32, int, list of int) +The item produced from the iterable should be a **single** entry of data and **not** a mini batch. The entry of data could be a single item or a tuple of items. Item should be of one of the [supported types](http://www.paddlepaddle.org/doc/ui/data_provider/pydataprovider2.html?highlight=dense_vector#input-types) (e.g., numpy 1d array of float32, int, list of int etc.) -An example implementation for single item data reader creator: +An example implementation for single item data reader creator is as follows: ```python def reader_creator_random_image(width, height): @@ -29,7 +29,7 @@ def reader_creator_random_image(width, height): return reader ``` -An example implementation for multiple item data reader creator: +An example implementation for multiple item data reader creator is as follows: ```python def reader_creator_random_image_and_label(width, height, label): def reader(): @@ -40,9 +40,10 @@ def reader_creator_random_image_and_label(width, height, label): ## Batch Reader Interface -*batch reader* can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`). The output of the iterable should be a batch (list) of data items. Each item inside the list must be a tuple. +*Batch reader* can be any function without any parameters that creates an iterable (anything can be used in `for x in iterable`). The output of the iterable should be a batch (list) of data items. Each item inside the list should be a tuple. + +Here are some valid outputs: -Here are valid outputs: ```python # a mini batch of three data items. Each data item consist three columns of data, each of which is 1. [(1, 1, 1), @@ -58,20 +59,22 @@ Here are valid outputs: Please note that each item inside the list must be a tuple, below is an invalid output: ```python # wrong, [1,1,1] needs to be inside a tuple: ([1,1,1],). - # Otherwise it's ambiguous whether [1,1,1] means a single column of data [1, 1, 1], - # or three column of datas, each of which is 1. + # Otherwise it is ambiguous whether [1,1,1] means a single column of data [1, 1, 1], + # or three columns of data, each of which is 1. [[1,1,1], [2,2,2], [3,3,3]] ``` -It's easy to convert from reader to batch reader: +It is easy to convert from a reader to a batch reader: + ```python mnist_train = paddle.dataset.mnist.train() mnist_train_batch_reader = paddle.batch(mnist_train, 128) ``` -Also easy to create custom batch reader: +It is also straight forward to create a custom batch reader: + ```python def custom_batch_reader(): while True: @@ -85,7 +88,8 @@ mnist_random_image_batch_reader = custom_batch_reader ## Usage -batch reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`: +Following is how we can use the reader with PaddlePaddle: +The batch reader, a mapping from item(s) to data layer, the batch size and the number of total passes will be passed into `paddle.train` as follows: ```python # two data layer is created: @@ -99,13 +103,13 @@ paddle.train(batch_reader, {"image":0, "label":1}, 128, 10, ...) ## Data Reader Decorator -*Data reader decorator* takes a single or multiple data reader, returns a new data reader. It is similar to a [python decorator](https://wiki.python.org/moin/PythonDecorators), but it does not use `@` syntax. +The *Data reader decorator* takes in a single reader or multiple data readers and returns a new data reader. It is similar to a [python decorator](https://wiki.python.org/moin/PythonDecorators), but it does not use `@` in the syntax. -Since we have a strict interface for data readers (no parameter, return a single data item). Data reader can be used flexiable via data reader decorators. Following are a few examples: +Since we have a strict interface for data readers (no parameters and return a single data item), a data reader can be used in a flexible way using data reader decorators. Following are a few examples: ### Prefetch Data -Since reading data may take time and training can not proceed without data. It is generally a good idea to prefetch data. +Since reading data may take some time and training can not proceed without data, it is generally a good idea to prefetch the data. Use `paddle.reader.buffered` to prefetch data: @@ -117,9 +121,9 @@ buffered_reader = paddle.reader.buffered(paddle.dataset.mnist.train(), 100) ### Compose Multiple Data Readers -For example, we want to use a source of real images (reusing mnist dataset), and a source of random images as input for [Generative Adversarial Networks](https://arxiv.org/abs/1406.2661). +For example, if we want to use a source of real images (say reusing mnist dataset), and a source of random images as input for [Generative Adversarial Networks](https://arxiv.org/abs/1406.2661). -We can do: +We can do the following : ```python def reader_creator_random_image(width, height): @@ -139,13 +143,13 @@ false_reader = reader_creator_bool(False) reader = paddle.reader.compose(paddle.dataset.mnist.train(), data_reader_creator_random_image(20, 20), true_reader, false_reader) # Skipped 1 because paddle.dataset.mnist.train() produces two items per data entry. -# And we don't care second item at this time. +# And we don't care about the second item at this time. paddle.train(paddle.batch(reader, 128), {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...) ``` ### Shuffle -Given shuffle buffer size `n`, `paddle.reader.shuffle` will return a data reader that buffers `n` data entries and shuffle them before a data entry is read. +Given the shuffle buffer size `n`, `paddle.reader.shuffle` returns a data reader that buffers `n` data entries and shuffles them before a data entry is read. Example: ```python @@ -154,21 +158,21 @@ reader = paddle.reader.shuffle(paddle.dataset.mnist.train(), 512) ## Q & A -### Why reader return only a single entry, but not a mini batch? +### Why does a reader return only a single entry, and not a mini batch? -Always returning a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2). +Returning a single entry makes reusing existing data readers much easier (for example, if an existing reader returns 3 entries instead if a single entry, the training code will be more complicated because it need to handle cases like a batch size 2). -We provide function `paddle.batch` to turn (single entry) reader into batch reader. +We provide a function: `paddle.batch` to turn (a single entry) reader into a batch reader. -### Why do we need batch reader, isn't train take reader and batch_size as arguments sufficient? +### Why do we need a batch reader, isn't is sufficient to give the reader and batch_size as arguments during training ? -In most of the case, train taking reader and batch_size as arguments would be sufficent. However sometimes user want to customize order of data entries inside a mini batch. Or even change batch size dynamically. +In most of the cases, it would be sufficient to give the reader and batch_size as arguments to the train method. However sometimes the user wants to customize the order of data entries inside a mini batch, or even change the batch size dynamically. For these cases using a batch reader is very efficient and helpful. -### Why use a dictionary but not a list to provide mapping? +### Why use a dictionary instead of a list to provide mapping? -We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["image", "label"]`) is because that user can easily resue item (e.g., using `{"image_a":0, "image_b":0, "label":1}`) or skip item (e.g., using `{"image_a":0, "label":2}`). +Using a dictionary (`{"image":0, "label":1}`) instead of a list (`["image", "label"]`) gives the advantage that the user can easily reuse the items (e.g., using `{"image_a":0, "image_b":0, "label":1}`) or even skip an item (e.g., using `{"image_a":0, "label":2}`). -### How to create custom data reader creator +### How to create a custom data reader creator ? ```python def image_reader_creator(image_path, label_path, n): @@ -192,7 +196,7 @@ paddle.train(paddle.batch(reader, 128), {"image":0, "label":1}, ...) ### How is `paddle.train` implemented -An example implementation of paddle.train could be: +An example implementation of paddle.train is: ```python def train(batch_reader, mapping, batch_size, total_pass): diff --git a/doc/getstarted/basic_usage/index_cn.rst b/doc/getstarted/basic_usage/index_cn.rst deleted file mode 100644 index b473944fc7fb89d3e0a0b330933f2226734bb5bd..0000000000000000000000000000000000000000 --- a/doc/getstarted/basic_usage/index_cn.rst +++ /dev/null @@ -1,108 +0,0 @@ -经典的线性回归任务 -================== - -PaddlePaddle是源于百度的一个深度学习平台。这份简短的介绍将向你展示如何利用PaddlePaddle来解决一个经典的线性回归问题。 - -任务简介 --------- - -我们展示如何用PaddlePaddle解决 `单变量的线性回归 `_ 问题。线性回归的输入是一批点 `(x, y)` ,其中 `y = wx + b + ε`, 而 ε 是一个符合高斯分布的随机变量。线性回归的输出是从这批点估计出来的参数 `w` 和 `b` 。 - -一个例子是房产估值。我们假设房产的价格(y)是其大小(x)的一个线性函数,那么我们可以通过收集市场上房子的大小和价格,用来估计线性函数的参数w 和 b。 - -准备数据 ------------ - -假设变量 `x` 和 `y` 的真实关系为: `y = 2x + 0.3 + ε`,这里展示如何使用观测数据来拟合这一线性关系。首先,Python代码将随机产生2000个观测点,作为线性回归的输入。下面脚本符合PaddlePaddle期待的读取数据的Python程序的模式。 - -.. code-block:: python - - # dataprovider.py - from paddle.trainer.PyDataProvider2 import * - import random - - # 定义输入数据的类型: 2个浮点数 - @provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False) - def process(settings, input_file): - for i in xrange(2000): - x = random.random() - yield [x], [2*x+0.3] - -训练模型 ------------ - -为了还原 `y = 2x + 0.3`,我们先从一条随机的直线 `y' = wx + b` 开始,然后利用观测数据调整 `w` 和 `b` 使得 `y'` 和 `y` 的差距不断减小,最终趋于接近。这个过程就是模型的训练过程,而 `w` 和 `b` 就是模型的参数,即我们的训练目标。 - -在PaddlePaddle里,该模型的网络配置如下。 - -.. code-block:: python - - # trainer_config.py - from paddle.trainer_config_helpers import * - - # 1. 定义数据来源,调用上面的process函数获得观测数据 - data_file = 'empty.list' - with open(data_file, 'w') as f: f.writelines(' ') - define_py_data_sources2(train_list=data_file, test_list=None, - module='dataprovider', obj='process',args={}) - - # 2. 学习算法。控制如何改变模型参数 w 和 b - settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer()) - - # 3. 神经网络配置 - x = data_layer(name='x', size=1) - y = data_layer(name='y', size=1) - # 线性计算网络层: ȳ = wx + b - ȳ = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b')) - # 计算误差函数,即 ȳ 和真实 y 之间的距离 - cost = square_error_cost(input= ȳ, label=y) - outputs(cost) - - -这段简短的配置展示了PaddlePaddle的基本用法: - -- 第一部分定义了数据输入。一般情况下,PaddlePaddle先从一个文件列表里获得数据文件地址,然后交给用户自定义的函数(例如上面的 `process`函数)进行读入和预处理从而得到真实输入。本文中由于输入数据是随机生成的不需要读输入文件,所以放一个空列表(`empty.list`)即可。 - -- 第二部分主要是选择学习算法,它定义了模型参数改变的规则。PaddlePaddle提供了很多优秀的学习算法,这里使用一个基于momentum的随机梯度下降(SGD)算法,该算法每批量(batch)读取12个采样数据进行随机梯度计算来更新更新。 - -- 最后一部分是神经网络的配置。由于PaddlePaddle已经实现了丰富的网络层,所以很多时候你需要做的只是定义正确的网络层并把它们连接起来。这里使用了三种网络单元: - - - **数据层**:数据层 `data_layer` 是神经网络的入口,它读入数据并将它们传输到接下来的网络层。这里数据层有两个,分别对应于变量 `x` 和 `y`。 - - **全连接层**:全连接层 `fc_layer` 是基础的计算单元,这里利用它建模变量之间的线性关系。计算单元是神经网络的核心,PaddlePaddle支持大量的计算单元和任意深度的网络连接,从而可以拟合任意的函数来学习复杂的数据关系。 - - **回归误差代价层**:回归误差代价层 `square_error_cost` 是众多误差代价函数层的一种,它们在训练过程作为网络的出口,用来计算模型的误差,是模型参数优化的目标函数。 - -定义了网络结构并保存为 `trainer_config.py` 之后,运行以下训练命令: - -.. code-block:: bash - - paddle train --config=trainer_config.py --save_dir=./output --num_passes=30 - -PaddlePaddle将在观测数据集上迭代训练30轮,并将每轮的模型结果存放在 `./output` 路径下。从输出日志可以看到,随着轮数增加误差代价函数的输出在不断的减小,这意味着模型在训练数据上不断的改进,直到逼近真实解:` y = 2x + 0.3 ` - -模型检验 ------------ - -训练完成后,我们希望能够检验模型的好坏。一种常用的做法是用学习的模型对另外一组测试数据进行预测,评价预测的效果。在这个例子中,由于已经知道了真实答案,我们可以直接观察模型的参数是否符合预期来进行检验。 - -PaddlePaddle将每个模型参数作为一个numpy数组单独存为一个文件,所以可以利用如下方法读取模型的参数。 - -.. code-block:: python - - import numpy as np - import os - - def load(file_name): - with open(file_name, 'rb') as f: - f.read(16) # skip header for float type. - return np.fromfile(f, dtype=np.float32) - - print 'w=%.6f, b=%.6f' % (load('output/pass-00029/w'), load('output/pass-00029/b')) - # w=1.999743, b=0.300137 - -.. image:: ./parameters.png - :align: center - :scale: 80 % - -从图中可以看到,虽然 `w` 和 `b` 都使用随机值初始化,但在起初的几轮训练中它们都在快速逼近真实值,并且后续仍在不断改进,使得最终得到的模型几乎与真实模型一致。 - -这样,我们用PaddlePaddle解决了单变量线性回归问题, 包括数据输入、模型训练和最后的结果验证。 diff --git a/doc/getstarted/basic_usage/index_en.rst b/doc/getstarted/basic_usage/index_en.rst deleted file mode 100644 index 2cc438ebbe0f97345d25354b93b4ebbd43502415..0000000000000000000000000000000000000000 --- a/doc/getstarted/basic_usage/index_en.rst +++ /dev/null @@ -1,101 +0,0 @@ -Simple Linear Regression -======================== - -PaddlePaddle is a deep learning platform open-sourced by Baidu. With PaddlePaddle, you can easily train a classic neural network within a couple lines of configuration, or you can build sophisticated models that provide state-of-the-art performance on difficult learning tasks like sentiment analysis, machine translation, image caption and so on. - -Problem Background ------------------- - -Now, to give you a hint of what using PaddlePaddle looks like, let's start with a fundamental learning problem - `simple linear regression `_: you have observed a set of two-dimensional data points of ``X`` and ``Y``, where ``X`` is an explanatory variable and ``Y`` is corresponding dependent variable, and you want to recover the underlying correlation between ``X`` and ``Y``. Linear regression can be used in many practical scenarios. For example, ``X`` can be a variable about house size, and ``Y`` a variable about house price. You can build a model that captures relationship between them by observing real estate markets. - -Prepare the Data ------------------ - -Suppose the true relationship can be characterized as ``Y = 2X + 0.3``, let's see how to recover this pattern only from observed data. Here is a piece of python code that feeds synthetic data to PaddlePaddle. The code is pretty self-explanatory, the only extra thing you need to add for PaddlePaddle is a definition of input data types. - - .. code-block:: python - - # dataprovider.py - from paddle.trainer.PyDataProvider2 import * - import random - - # define data types of input: 2 real numbers - @provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False) - def process(settings, input_file): - for i in xrange(2000): - x = random.random() - yield [x], [2*x+0.3] - -Train a NeuralNetwork ----------------------- - -To recover this relationship between ``X`` and ``Y``, we use a neural network with one layer of linear activation units and a square error cost layer. Don't worry if you are not familiar with these terminologies, it's just saying that we are starting from a random line ``Y' = wX + b`` , then we gradually adapt ``w`` and ``b`` to minimize the difference between ``Y'`` and ``Y``. Here is what it looks like in PaddlePaddle: - - .. code-block:: python - - # trainer_config.py - from paddle.trainer_config_helpers import * - - # 1. read data. Suppose you saved above python code as dataprovider.py - data_file = 'empty.list' - with open(data_file, 'w') as f: f.writelines(' ') - define_py_data_sources2(train_list=data_file, test_list=None, - module='dataprovider', obj='process',args={}) - - # 2. learning algorithm - settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer()) - - # 3. Network configuration - x = data_layer(name='x', size=1) - y = data_layer(name='y', size=1) - y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b')) - cost = square_error_cost(input=y_predict, label=y) - outputs(cost) - -Some of the most fundamental usages of PaddlePaddle are demonstrated: - -- The first part shows how to feed data into PaddlePaddle. In general cases, PaddlePaddle reads raw data from a list of files, and then do some user-defined process to get real input. In this case, we only need to create a placeholder file since we are generating synthetic data on the fly. - -- The second part describes learning algorithm. It defines in what ways adjustments are made to model parameters. PaddlePaddle provides a rich set of optimizers, but a simple momentum based optimizer will suffice here, and it processes 12 data points each time. - -- Finally, the network configuration. It usually is as simple as "stacking" layers. Three kinds of layers are used in this configuration: - - **Data Layer**: a network always starts with one or more data layers. They provide input data to the rest of the network. In this problem, two data layers are used respectively for ``X`` and ``Y``. - - **FC Layer**: FC layer is short for Fully Connected Layer, which connects all the input units to current layer and does the actual computation specified as activation function. Computation layers like this are the fundamental building blocks of a deeper model. - - **Cost Layer**: in training phase, cost layers are usually the last layers of the network. They measure the performance of current model, and provide guidence to adjust parameters. - -Now that everything is ready, you can train the network with a simple command line call: - - .. code-block:: bash - - paddle train --config=trainer_config.py --save_dir=./output --num_passes=30 - - -This means that PaddlePaddle will train this network on the synthectic dataset for 30 passes, and save all the models under path ``./output``. You will see from the messages printed out during training phase that the model cost is decreasing as time goes by, which indicates we are getting a closer guess. - - -Evaluate the Model -------------------- - -Usually, a different dataset that left out during training phase should be used to evalute the models. However, we are lucky enough to know the real answer: ``w=2, b=0.3``, thus a better option is to check out model parameters directly. - -In PaddlePaddle, training is just to get a collection of model parameters, which are ``w`` and ``b`` in this case. Each parameter is saved in an individual file in the popular ``numpy`` array format. Here is the code that reads parameters from last pass. - - .. code-block:: python - - import numpy as np - import os - - def load(file_name): - with open(file_name, 'rb') as f: - f.read(16) # skip header for float type. - return np.fromfile(f, dtype=np.float32) - - print 'w=%.6f, b=%.6f' % (load('output/pass-00029/w'), load('output/pass-00029/b')) - # w=1.999743, b=0.300137 - - .. image:: parameters.png - :align: center - -Although starts from a random guess, you can see that value of ``w`` changes quickly towards 2 and ``b`` changes quickly towards 0.3. In the end, the predicted line is almost identical with real answer. - -There, you have recovered the underlying pattern between ``X`` and ``Y`` only from observed data. diff --git a/doc/getstarted/basic_usage/parameters.png b/doc/getstarted/basic_usage/parameters.png deleted file mode 100644 index 2ec67480951e21f0400bce1c34b3108dcd65c18c..0000000000000000000000000000000000000000 Binary files a/doc/getstarted/basic_usage/parameters.png and /dev/null differ diff --git a/doc/getstarted/build_and_install/build_from_source_cn.rst b/doc/getstarted/build_and_install/build_from_source_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..55665ac8edfcf20290936fba4c3e410b33e1f3d4 --- /dev/null +++ b/doc/getstarted/build_and_install/build_from_source_cn.rst @@ -0,0 +1,113 @@ +从源码编译PaddlePaddle +====================== + +.. _build_step: + +编译方法 +---------------- + +PaddlePaddle主要使用 `CMake `_ 以及GCC, G++作为编译工具。 +我们推荐您使用PaddlePaddle编译环境镜像完成编译,这样可以免去单独安装编译依赖的步骤,可选的不同编译环境 +可以在 `这里 `_ 找到。 +编译PaddlePaddle,需要执行: + +.. code-block:: bash + + git clone https://github.com/PaddlePaddle/Paddle.git + cd Paddle + # 如果使用Docker编译环境,执行下面的命令编译CPU-Only的二进制 + docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x paddle/scripts/docker/build.sh + # 如果不使用Docker编译环境,执行下面的命令 + mkdir build + cd build + cmake -DWITH_GPU=OFF -DWITH_TESTING=OFF .. + make + + +编译完成后会在build/python/dist目录下生成输出的whl包,可以选在在当前机器安装也可以拷贝到目标机器安装: + +.. code-block:: bash + + pip install python/dist/*.whl + + +.. _build_step: + +编译依赖 +---------------- + +PaddlePaddle编译需要使用到下面的依赖(包含但不限于),其他的依赖软件,会自动在编译时下载。 + +.. csv-table:: PaddlePaddle编译依赖 + :header: "依赖", "版本", "说明" + :widths: 10, 15, 30 + + "CMake", ">=3.5", "" + "GCC", "4.8.2", "推荐使用CentOS的devtools2" + "Python", "2.7.x", "依赖libpython2.7.so" + "pip", ">=9.0", "" + "numpy", "", "" + "SWIG", ">=2.0", "" + "Go", ">=1.8", "可选" + + +.. _build_options: + +编译选项 +---------------- + +PaddlePaddle的编译选项,包括生成CPU/GPU二进制文件、链接何种BLAS库等。 +用户可在调用cmake的时候设置它们,详细的cmake使用方法可以参考 +`官方文档 `_ 。 + +在cmake的命令行中,通过使用 ``-D`` 命令设置该类编译选项,例如: + +.. code-block:: bash + + cmake .. -DWITH_GPU=OFF + +.. csv-table:: 编译选项说明 + :header: "选项", "说明", "默认值" + :widths: 1, 7, 2 + + "WITH_GPU", "是否支持GPU", "ON" + "WITH_C_API", "是否仅编译CAPI", "OFF" + "WITH_DOUBLE", "是否使用双精度浮点数", "OFF" + "WITH_DSO", "是否运行时动态加载CUDA动态库,而非静态加载CUDA动态库。", "ON" + "WITH_AVX", "是否编译含有AVX指令集的PaddlePaddle二进制文件", "ON" + "WITH_PYTHON", "是否内嵌PYTHON解释器", "ON" + "WITH_STYLE_CHECK", "是否编译时进行代码风格检查", "ON" + "WITH_TESTING", "是否开启单元测试", "ON" + "WITH_DOC", "是否编译中英文文档", "OFF" + "WITH_SWIG_PY", "是否编译PYTHON的SWIG接口,该接口可用于预测和定制化训练", "Auto" + "WITH_GOLANG", "是否编译go语言的可容错parameter server", "ON" + "WITH_MKL", "是否使用MKL数学库,如果为否则是用OpenBLAS", "ON" + +BLAS ++++++ + +PaddlePaddle支持 `MKL `_ 和 +`OpenBlAS `_ 两种BLAS库。默认使用MKL。如果使用MKL并且机器含有AVX2指令集, +还会下载MKL-DNN数学库,详细参考 `这里 `_ 。 + +如果关闭MKL,则会使用OpenBLAS作为BLAS库。 + +CUDA/cuDNN ++++++++++++ + +PaddlePaddle在编译时/运行时会自动找到系统中安装的CUDA和cuDNN库进行编译和执行。 +使用参数 :code:`-DCUDA_ARCH_NAME=Auto` 可以指定开启自动检测SM架构,加速编译。 + +PaddlePaddle可以使用cuDNN v5.1之后的任何一个版本来编译运行,但尽量请保持编译和运行使用的cuDNN是同一个版本。 +我们推荐使用最新版本的cuDNN。 + +编译选项的设置 +++++++++++++++ + +PaddePaddle通过编译时指定路径来实现引用各种BLAS/CUDA/cuDNN库。cmake编译时,首先在系统路径( :code:`/usr/lib:/usr/local/lib` )中搜索这几个库,同时也会读取相关路径变量来进行搜索。 通过使用 ``-D`` 命令可以设置,例如 + +.. code-block:: bash + + cmake .. -DWITH_GPU=ON -DWITH_TESTING=OFF -DCUDNN_ROOT=/opt/cudnnv5 + +**注意:这几个编译选项的设置,只在第一次cmake的时候有效。如果之后想要重新设置,推荐清理整个编译目录(** :code:`rm -rf` )**后,再指定。** diff --git a/doc/getstarted/build_and_install/build_from_source_en.md b/doc/getstarted/build_and_install/build_from_source_en.md deleted file mode 100644 index 2f1461489495618718d5abaeab9cbeda9b93700f..0000000000000000000000000000000000000000 --- a/doc/getstarted/build_and_install/build_from_source_en.md +++ /dev/null @@ -1,236 +0,0 @@ -Installing from Sources -========================== - -* [1. Download and Setup](#download) -* [2. Requirements](#requirements) -* [3. Build on Ubuntu](#ubuntu) -* [4. Build on Centos](#centos) - - -## Download and Setup -You can download PaddlePaddle from the [github source](https://github.com/PaddlePaddle/Paddle). - -```bash -git clone https://github.com/PaddlePaddle/Paddle paddle -cd paddle -``` -## Requirements - -To compile the source code, your computer must be equipped with the following dependencies. - -- **Compiler**: GCC >= 4.8 or Clang >= 3.3 (AppleClang >= 5.1) and gfortran compiler -- **CMake**: CMake >= 3.0 (at least CMake 3.4 on Mac OS X) -- **BLAS**: MKL, OpenBlas or ATLAS -- **Python**: only support Python 2.7 -- **Go** - -**Note:** For CUDA 7.0 and CUDA 7.5, GCC 5.0 and up are not supported! -For CUDA 8.0, GCC versions later than 5.3 are not supported! - -### Options - -PaddlePaddle supports some build options. - - - - - - - - - - - - - - - - - - - - - - - - - - -
OptionalDescription
WITH_GPUCompile PaddlePaddle with NVIDIA GPU
WITH_AVXCompile PaddlePaddle with AVX intrinsics
WITH_DSOCompile PaddlePaddle with dynamic linked CUDA
WITH_TESTINGCompile PaddlePaddle with unit testing
WITH_SWIG_PYCompile PaddlePaddle with inference api
WITH_STYLE_CHECKCompile PaddlePaddle with style check
WITH_PYTHONCompile PaddlePaddle with python interpreter
WITH_DOUBLECompile PaddlePaddle with double precision
WITH_RDMACompile PaddlePaddle with RDMA support
WITH_TIMERCompile PaddlePaddle with stats timer
WITH_PROFILERCompile PaddlePaddle with GPU profiler
WITH_DOCCompile PaddlePaddle with documentation
WITH_COVERAGECompile PaddlePaddle with code coverage
COVERALLS_UPLOADPackage code coverage data to coveralls
ON_TRAVISExclude special unit test on Travis CI
- - -**Note:** - - The GPU version works best with Cuda Toolkit 8.0 and cuDNN v5. - - Other versions like Cuda Toolkit 7.0, 7.5 and cuDNN v3, v4 are also supported. - - **To utilize cuDNN v5, Cuda Toolkit 7.5 is prerequisite and vice versa.** - -As a simple example, consider the following: - -1. **BLAS Dependencies(optional)** - - CMake will search BLAS libraries from the system. If not found, OpenBLAS will be downloaded, built and installed automatically. - To utilize preinstalled BLAS, you can simply specify MKL, OpenBLAS or ATLAS via `MKL_ROOT`, `OPENBLAS_ROOT` or `ATLAS_ROOT`. - - ```bash - # specify MKL - cmake .. -DMKL_ROOT= - # or specify OpenBLAS - cmake .. -DOPENBLAS_ROOT= - ``` - -2. **Doc Dependencies(optional)** - - To generate PaddlePaddle's documentation, install dependencies and set `-DWITH_DOC=ON` as follows: - - ```bash - pip install 'sphinx>=1.4.0' - pip install sphinx_rtd_theme recommonmark - - # install doxygen on Ubuntu - sudo apt-get install doxygen - # install doxygen on Mac OS X - brew install doxygen - - # active docs in cmake - cmake .. -DWITH_DOC=ON` - ``` - -## Build on Ubuntu 14.04 - -### Install Dependencies - -- **Paddle Dependencies** - - ```bash - # necessary - sudo apt-get update - sudo apt-get install -y git curl gcc g++ gfortran make build-essential automake - sudo apt-get install -y python python-pip python-numpy libpython-dev bison - sudo pip install 'protobuf==3.1.0.post1' - - # Install Go - # You can follow https://golang.org/doc/install for a detailed explanation. - wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ - tar -C $HOME -xzf go.tgz && \ - mkdir $HOME/gopath && \ - rm go.tgz - - # Setup environment variables - export GOROOT=$HOME/go - export GOPATH=$HOME/gopath - export PATH=$PATH:$GOROOT/bin - - # install cmake 3.4 - curl -sSL https://cmake.org/files/v3.4/cmake-3.4.1.tar.gz | tar -xz && \ - cd cmake-3.4.1 && ./bootstrap && make -j4 && sudo make install && \ - cd .. && rm -rf cmake-3.4.1 - ``` - -- **GPU Dependencies (optional)** - - To build GPU version, you will need the following installed: - - 1. a CUDA-capable GPU - 2. A supported version of Linux with a GCC compiler and toolchain - 3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads) - 4. NVIDIA cuDNN Library (available at https://developer.nvidia.com/cudnn) - - The CUDA development environment relies on tight integration with the host development environment, - including the host compiler and C runtime libraries, and is therefore only supported on - distribution versions that have been qualified for this CUDA Toolkit release. - - After downloading cuDNN library, issue the following commands: - - ```bash - sudo tar -xzf cudnn-7.5-linux-x64-v5.1.tgz -C /usr/local - sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn* - ``` - Then you need to set LD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc. - - ```bash - export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH - export PATH=/usr/local/cuda/bin:$PATH - ``` - -### Build and Install - -As usual, the best option is to create build folder under paddle project directory. - -```bash -mkdir build && cd build -``` - -Finally, you can build and install PaddlePaddle: - -```bash -# you can add build option here, such as: -cmake .. -DCMAKE_INSTALL_PREFIX= -# please use sudo make install, if you want to install PaddlePaddle into the system -make -j `nproc` && make install -# set PaddlePaddle installation path in ~/.bashrc -export PATH=/bin:$PATH -# install PaddlePaddle Python modules. -sudo pip install /opt/paddle/share/wheels/*.whl -``` - -## Build on Centos 7 - -### Install Dependencies - -- **CPU Dependencies** - - ```bash - # necessary - sudo yum update - sudo yum install -y epel-release - sudo yum install -y make cmake3 python-devel python-pip gcc-gfortran swig git - sudo pip install wheel numpy - sudo pip install 'protobuf>=3.0.0' - ``` - -- **GPU Dependencies (optional)** - - To build GPU version, you will need the following installed: - - 1. a CUDA-capable GPU - 2. A supported version of Linux with a GCC compiler and toolchain - 3. NVIDIA CUDA Toolkit (available at http://developer.nvidia.com/cuda-downloads) - 4. NVIDIA cuDNN Library (available at https://developer.nvidia.com/cudnn) - - The CUDA development environment relies on tight integration with the host development environment, - including the host compiler and C runtime libraries, and is therefore only supported on - distribution versions that have been qualified for this CUDA Toolkit release. - - After downloading cuDNN library, issue the following commands: - - ```bash - sudo tar -xzf cudnn-7.5-linux-x64-v5.1.tgz -C /usr/local - sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn* - ``` - Then you need to set LD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc. - - ```bash - export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH - export PATH=/usr/local/cuda/bin:$PATH - ``` - -### Build and Install - -As usual, the best option is to create build folder under paddle project directory. - -```bash -mkdir build && cd build -``` - -Finally, you can build and install PaddlePaddle: - -```bash -# you can add build option here, such as: -cmake3 .. -DCMAKE_INSTALL_PREFIX= -# please use sudo make install, if you want to install PaddlePaddle into the system -make -j `nproc` && make install -# set PaddlePaddle installation path in ~/.bashrc -export PATH=/bin:$PATH -# install PaddlePaddle Python modules. -sudo pip install /opt/paddle/share/wheels/*.whl -``` diff --git a/doc/getstarted/build_and_install/build_from_source_en.rst b/doc/getstarted/build_and_install/build_from_source_en.rst new file mode 100644 index 0000000000000000000000000000000000000000..9a3ed7dd57137ddf3d6213222c17433822b01dbb --- /dev/null +++ b/doc/getstarted/build_and_install/build_from_source_en.rst @@ -0,0 +1,127 @@ +Build PaddlePaddle from Sources +========================== + +.. _build_step: + +How To Build +---------------- + +PaddlePaddle mainly uses `CMake `_ and GCC, G++ as compile +tools. We recommend you to use our pre-built Docker image to run the build +to avoid installing dependencies by yourself. We have several build environment +Docker images `here `_. +Then run: + +.. code-block:: bash + + git clone https://github.com/PaddlePaddle/Paddle.git + cd Paddle + # run the following command to build CPU-Only binaries if you are using docker + docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddlepaddle/paddle_manylinux_devel:cuda8.0_cudnn5 bash -x paddle/scripts/docker/build.sh + # else run these commands + mkdir build + cd build + cmake -DWITH_GPU=OFF -DWITH_TESTING=OFF .. + make + +When the compile finishes, you can get the output whl package under +build/python/dist, then you can choose to install the whl on local +machine or copy it to the target machine. + +.. code-block:: bash + + pip install python/dist/*.whl + +.. _build_step: + +Compile Dependencies +---------------- + +PaddlePaddle need the following dependencies when compiling, other dependencies +will be downloaded automatically. + +.. csv-table:: PaddlePaddle Compile Dependencies + :header: "Dependency", "Version", "Description" + :widths: 10, 15, 30 + + "CMake", ">=3.5", "" + "GCC", "4.8.2", "Recommend devtools2 for CentOS" + "Python", "2.7.x", "Need libpython2.7.so" + "pip", ">=9.0", "" + "numpy", "", "" + "SWIG", ">=2.0", "" + "Go", ">=1.8", "Optional" + + +.. _build_options: + +Build Options +---------------- + +Build options include whether build binaries for CPU or GPU, which BLAS +library to use etc. You may pass these settings when running cmake. +For detailed cmake tutorial please refer to `here `_ 。 + +.. _build_options_bool: + +Bool Type Options +---------------- + +You can add :code:`-D` argument to pass such options, like: + +.. code-block:: bash + + cmake .. -DWITH_GPU=OFF + +.. csv-table:: Bool Type Options + :header: "Option", "Description", "Default" + :widths: 1, 7, 2 + + "WITH_GPU", "Build with GPU support", "ON" + "WITH_C_API", "Build only CAPI", "OFF" + "WITH_DOUBLE", "Build with double precision", "OFF" + "WITH_DSO", "Dynamically load CUDA libraries", "ON" + "WITH_AVX", "Build with AVX support", "ON" + "WITH_PYTHON", "Build with integrated Python interpreter", "ON" + "WITH_STYLE_CHECK", "Check code style when building", "ON" + "WITH_TESTING", "Build unit tests", "ON" + "WITH_DOC", "Build documentaions", "OFF" + "WITH_SWIG_PY", "Build Python SWIG interface for V2 API", "Auto" + "WITH_GOLANG", "Build fault-tolerant parameter server written in go", "ON" + "WITH_MKL", "Use MKL as BLAS library, else use OpenBLAS", "ON" + + +BLAS ++++++ + +PaddlePaddle supports `MKL `_ and +`OpenBlAS `_ as BLAS library。By default it uses MKL. +If you are using MKL and your machine supports AVX2, MKL-DNN will also be downloaded +and used, for more `details `_ . + +If you choose not to use MKL, then OpenBlAS will be used. + +CUDA/cuDNN ++++++++++++ + +PaddlePaddle will automatically find CUDA and cuDNN when compiling and running. +parameter :code:`-DCUDA_ARCH_NAME=Auto` can be used to detect SM architecture +automatically in order to speed up the build. + +PaddlePaddle can build with any version later than cuDNN v5.1, and we intend to +keep on with latest cuDNN versions. Be sure to run with the same version of cuDNN +you built. + +Pass Compile Options +++++++++++++++ + +You can pass compile options to use intended BLAS/CUDA/Cudnn libraries. +When running cmake command, it will search system paths like +:code:`/usr/lib:/usr/local/lib` and then search paths that you +passed to cmake, i.e. + +.. code-block:: bash + + cmake .. -DWITH_GPU=ON -DWITH_TESTING=OFF -DCUDNN_ROOT=/opt/cudnnv5 + +**NOTE: These options only take effect when running cmake for the first time, you need to clean the cmake cache or clean the build directory (** :code:`rm -rf` **) if you want to change it.** diff --git a/doc/getstarted/build_and_install/cmake.png b/doc/getstarted/build_and_install/cmake.png deleted file mode 100644 index a58cd09ad99cf27cc1ca5785fe54d726b83a82f6..0000000000000000000000000000000000000000 Binary files a/doc/getstarted/build_and_install/cmake.png and /dev/null differ diff --git a/doc/getstarted/build_and_install/cmake/build_from_source_cn.rst b/doc/getstarted/build_and_install/cmake/build_from_source_cn.rst deleted file mode 100644 index be0c1ffa451b2901ec06621dd4d886f800b4562e..0000000000000000000000000000000000000000 --- a/doc/getstarted/build_and_install/cmake/build_from_source_cn.rst +++ /dev/null @@ -1,43 +0,0 @@ -PaddlePaddle的编译选项 -====================== - -PaddlePaddle的编译选项,包括生成CPU/GPU二进制文件、链接何种BLAS库等。用户可在调用cmake的时候设置它们,详细的cmake使用方法可以参考 `官方文档 `_ 。 - -Bool型的编译选项 ----------------- -用户可在cmake的命令行中,通过使用 ``-D`` 命令设置该类编译选项,例如 - -.. code-block:: bash - - cmake .. -DWITH_GPU=OFF - -.. csv-table:: Bool型的编译选项 - :widths: 1, 7, 2 - :file: compile_options.csv - -BLAS/CUDA/Cudnn的编译选项 --------------------------- -BLAS -+++++ - -PaddlePaddle支持以下任意一种BLAS库:`MKL `_ ,`ATLAS `_ ,`OpenBlAS `_ 和 `REFERENCE BLAS `_ 。 - -.. csv-table:: BLAS路径相关的编译选项 - :widths: 1, 2, 7 - :file: cblas_settings.csv - -CUDA/Cudnn -+++++++++++ - -PaddlePaddle可以使用cudnn v2之后的任何一个版本来编译运行,但尽量请保持编译和运行使用的cudnn是同一个版本。 我们推荐使用最新版本的cudnn v5.1。 - -编译选项的设置 -++++++++++++++ - -PaddePaddle通过编译时指定路径来实现引用各种BLAS/CUDA/Cudnn库。cmake编译时,首先在系统路径(/usr/lib\:/usr/local/lib)中搜索这几个库,同时也会读取相关路径变量来进行搜索。 通过使用 ``-D`` 命令可以设置,例如 - -.. code-block:: bash - - cmake .. -DMKL_ROOT=/opt/mkl/ -DCUDNN_ROOT=/opt/cudnnv5 - -注意:这几个编译选项的设置,只在第一次cmake的时候有效。如果之后想要重新设置,推荐清理整个编译目录(``rm -rf``)后,再指定。 diff --git a/doc/getstarted/build_and_install/cmake/cblas_settings.csv b/doc/getstarted/build_and_install/cmake/cblas_settings.csv deleted file mode 100644 index a6356baf16a0d3d2499e39d2055d8ee878dcaef2..0000000000000000000000000000000000000000 --- a/doc/getstarted/build_and_install/cmake/cblas_settings.csv +++ /dev/null @@ -1,5 +0,0 @@ -编译选项,描述,注意 -MKL_ROOT,MKL的路径,${MKL_ROOT}/include下需要包含mkl.h,${MKL_ROOT}/lib目录下需要包含mkl_core,mkl_sequential和mkl_intel_lp64三个库。 -ATLAS_ROOT,ATLAS的路径,${ATLAS_ROOT}/include下需要包含cblas.h,${ATLAS_ROOT}/lib下需要包含cblas和atlas两个库。 -OPENBLAS_ROOT,OpenBLAS的路径,${OPENBLAS_ROOT}/include下需要包含cblas.h,${OPENBLAS_ROOT}/lib下需要包含openblas库。 -REFERENCE_CBLAS_ROOT,REFERENCE BLAS的路径,${REFERENCE_CBLAS_ROOT}/include下需要包含cblas.h,${REFERENCE_CBLAS_ROOT}/lib下需要包含cblas库。 \ No newline at end of file diff --git a/doc/getstarted/build_and_install/cmake/compile_options.csv b/doc/getstarted/build_and_install/cmake/compile_options.csv deleted file mode 100644 index 463b825470579d0c3736a408b1e82dd33e6f8d42..0000000000000000000000000000000000000000 --- a/doc/getstarted/build_and_install/cmake/compile_options.csv +++ /dev/null @@ -1,12 +0,0 @@ -选项,说明,默认值 -WITH_GPU,是否支持GPU。,取决于是否寻找到CUDA工具链 -WITH_DOUBLE,是否使用双精度浮点数。,否 -WITH_DSO,是否运行时动态加载CUDA动态库,而非静态加载CUDA动态库。,是 -WITH_AVX,是否编译含有AVX指令集的PaddlePaddle二进制文件,是 -WITH_PYTHON,是否内嵌PYTHON解释器。方便今后的嵌入式移植工作。,是 -WITH_STYLE_CHECK,是否编译时进行代码风格检查,是 -WITH_RDMA,是否开启RDMA,否 -WITH_TIMER,是否开启计时功能。如果开启会导致运行略慢,打印的日志变多,但是方便调试和测Benchmark,否 -WITH_TESTING,是否开启单元测试,取决于是否寻找到GTEST -WITH_DOC,是否编译中英文文档,否 -WITH_SWIG_PY,是否编译PYTHON的SWIG接口,该接口可用于预测和定制化训练,取决于是否寻找到SWIG \ No newline at end of file diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst index 0d34dec8e908c5e61001500725187a2233797f46..07933b2e0bbca809f6c4e90e7ff8f71d1b3304b2 100644 --- a/doc/getstarted/build_and_install/docker_install_cn.rst +++ b/doc/getstarted/build_and_install/docker_install_cn.rst @@ -1,222 +1,139 @@ -PaddlePaddle的Docker容器使用方式 +使用Docker安装运行PaddlePaddle ================================ -PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 `_ 才能充分利用Mac OS X和Windows上的硬件资源。 +使用Docker安装和运行PaddlePaddle可以无需考虑依赖环境即可运行。并且也可以在Windows的docker中运行。 +您可以在 `Docker官网 `_ 获得基本的Docker安装和使用方法。 -Docker使用入门 ------------------------------- - -几个基础的概念帮助理解和使用Docker: +如果您在使用Windows,可以参考 +`这篇 `_ +教程,完成在Windows上安装和使用Docker。 -- *镜像*:一个Docker镜像是一个打包好的软件。它包含了这个软件本身和它所依赖的运行环境。PaddlePaddle的Docker镜像就包含了PaddlePaddle的Python库以及其依赖的多个Python库。这样我们可以直接在Docker中运行需要的程序而不需要安装后在执行。可以执行: +在了解Docker的基本使用方法之后,即可开始下面的步骤: - .. code-block:: bash +.. _docker_pull: - docker images +获取PaddlePaddle的Docker镜像 +------------------------------ - 来列出当前系统中的所有镜像,同样可以执行: +执行下面的命令获取最新的PaddlePaddle Docker镜像 .. code-block:: bash - - docker pull paddlepaddle/paddle:0.10.0 - 来下载Docker镜像,paddlepaddle/paddle是从官方镜像源Dockerhub.com下载的,推荐国内用户使用docker.paddlepaddle.org/paddle下载。 + docker pull paddlepaddle/paddle -- *容器*: 如果说一个Docker镜像就是一个程序,那容器就是这个程序运行时产生的“进程”。 - 实际上,一个容器就是一个操作系统的进程,但是是运行在独立的进程空间,文件系统以及网络之上。 - 可以执行: +对于国内用户,我们提供了加速访问的镜像源: .. code-block:: bash - docker run paddlepaddle/paddle:0.10.0 + docker pull docker.paddlepaddle.org/paddle - 来使用一个镜像启动一个容器。 - -- 默认情况下,Docker容器会运行在独立的文件系统空间之上,我们无法在Docker容器中 - 访问到主机上的文件。可以通过*挂载Volume*的方式,将主机上的文件或目录挂载到 - Docker容器中。下面的命令把当前目录挂载到了容器中的 /data 目录下,容器使用 - debian镜像,并且启动后执行 :code:`ls /data`。 +下载GPU版本的Docker镜像: .. code-block:: bash - docker run --rm -v $(pwd):/data debian ls /data - -PaddlePaddle发布的Docker镜像使用说明 ------------------------------- - -我们把PaddlePaddle的编译环境打包成一个镜像,称为开发镜像,里面涵盖了 -PaddlePaddle需要的所有编译工具。把编译出来的PaddlePaddle也打包成一个镜 -像,称为生产镜像,里面涵盖了PaddlePaddle运行所需的所有环境。每次 -PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以及开发镜像。运 -行镜像包括纯CPU版本和GPU版本以及其对应的非AVX版本。我们会在 -`dockerhub.com `_ -和国内镜像`docker.paddlepaddle.org` 提供最新 -的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。 - -**注意:为了方便在国内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您在国内,请把文档里命令中的paddlepaddle/paddle替换成docker.paddlepaddle.org/paddle。** - -1. 开发镜像::code:`paddlepaddle/paddle:0.10.0-dev` - - 这个镜像包含了Paddle相关的开发工具以及编译和运行环境。用户可以使用开发镜像代替配置本地环境,完成开发,编译,发布, - 文档编写等工作。由于不同的Paddle的版本可能需要不同的依赖和工具,所以如果需要自行配置开发环境需要考虑版本的因素。 - 开发镜像包含了以下工具: - - - gcc/clang - - nvcc - - Python - - sphinx - - woboq - - sshd - 很多开发者会使用远程的安装有GPU的服务器工作,用户可以使用ssh登录到这台服务器上并执行 :code:`docker exec`进入开发镜像并开始工作, - 也可以在开发镜像中启动一个SSHD服务,方便开发者直接登录到镜像中进行开发: - - 以交互容器方式运行开发镜像: - - .. code-block:: bash - - docker run -it --rm -v $(pwd):/paddle paddlepaddle/paddle:0.10.0-dev /bin/bash - - 或者,可以以后台进程方式运行容器: - - .. code-block:: bash - - docker run -d -p 2202:22 -p 8888:8888 -v $(pwd):/paddle paddlepaddle/paddle:0.10.0-dev /usr/sbin/sshd -D - - 然后用密码 :code:`root` SSH进入容器: - - .. code-block:: bash - - ssh -p 2202 root@localhost - - SSH方式的一个优点是我们可以从多个终端进入容器。比如,一个终端运行vi,另一个终端运行Python。另一个好处是我们可以把PaddlePaddle容器运行在远程服务器上,并在笔记本上通过SSH与其连接。 - -2. 生产镜像:根据CPU、GPU和非AVX区分了如下4个镜像: - - - GPU/AVX::code:`paddlepaddle/paddle:-gpu` - - GPU/no-AVX::code:`paddlepaddle/paddle:-gpu-noavx` - - CPU/AVX::code:`paddlepaddle/paddle:` - - CPU/no-AVX::code:`paddlepaddle/paddle:-noavx` - - 纯CPU镜像以及GPU镜像都会用到AVX指令集,但是2008年之前生产的旧电脑不支持AVX。以下指令能检查Linux电脑是否支持AVX: - - .. code-block:: bash - - if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi - - 如果输出是No,就需要选择使用no-AVX的镜像 - - **注:在0.10.0之后的版本,PaddlePaddle都可以自动判断硬件是否支持AVX,所以无需判断AVX即可使用** + docker pull paddlepaddle/paddle:latest-gpu + docker pull docker.paddlepaddle.org/paddle:latest-gpu - 以上方法在GPU镜像里也能用,只是请不要忘记提前在物理机上安装GPU最新驱动。 - 为了保证GPU驱动能够在镜像里面正常运行,我们推荐使用[nvidia-docker](https://github.com/NVIDIA/nvidia-docker)来运行镜像。 +选择下载使用不同的BLAS库的Docker镜像: - .. code-block:: bash - - nvidia-docker run -it --rm paddledev/paddle:0.10.0-gpu /bin/bash + .. code-block:: bash - 注意: 如果使用nvidia-docker存在问题,你也许可以尝试更老的方法,具体如下,但是我们并不推荐这种方法。: + # 默认是使用MKL的镜像 + docker pull paddlepaddle/paddle + # 使用OpenBLAS的镜像 + docker pull paddlepaddle/paddle:latest-openblas - .. code-block:: bash +下载指定版本的Docker镜像,可以从 `DockerHub网站 `_ 获取可选的tag,并执行下面的命令: - export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" - export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') - docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:0.10.0-gpu + .. code-block:: bash -3. 运行以及发布您的AI程序 + docker pull paddlepaddle/paddle:[tag] + # 比如: + docker pull docker.paddlepaddle.org/paddle:0.10.0-gpu - 假设您已经完成了一个AI训练的python程序 :code:`a.py`,这个程序是您在开发机上使用开发镜像完成开发。此时您可以运行这个命令在开发机上进行测试运行: +.. _docker_run: - .. code-block:: bash +在Docker中执行PaddlePaddle训练程序 +------------------------------ - docker run -it -v $PWD:/work paddle /work/a.py +假设您已经在当前目录(比如在/home/work)编写了一个PaddlePaddle的程序 :code:`train.py` (可以参考 +`PaddlePaddleBook `_ +编写),就可以使用下面的命令开始执行训练: - 如果要使用GPU,请运行: + .. code-block:: bash - .. code-block:: bash + cd /home/work + docker run -it -v $PWD:/work paddlepaddle/paddle /work/train.py + +上述命令中, :code:`-it` 参数说明容器已交互式运行; :code:`-v $PWD:/work` +指定将当前路径(Linux中$PWD变量会展开为当前路径的绝对路径)挂载到容器内部的 :code:`/work` +目录; :code:`paddlepaddle/paddle` 指定需要使用的容器; 最后 :code:`/work/train.py` +为容器内执行的命令,即运行训练程序。 - nvidia-docker run -it -v $PWD:/work paddle /work/a.py +当然,您也可以进入到Docker容器中,以交互式的方式执行或调试您的代码: + .. code-block:: bash + docker run -it -v $PWD:/work paddlepaddle/paddle /bin/bash + cd /work + python train.py - 这里`a.py`包含的所有依赖假设都可以在Paddle的运行容器中。如果需要包含更多的依赖、或者需要发布您的应用的镜像,可以编写`Dockerfile`使用`FROM paddledev/paddle:0.10.0` - 创建和发布自己的AI程序镜像。 +**注:PaddlePaddle Docker镜像为了减小体积,默认没有安装vim,您可以在容器中执行** :code:`apt-get install -y vim` **安装后,在容器中编辑代码。** -运行PaddlePaddle Book ---------------------- +.. _docker_run_book: -Jupyter Notebook是一个开源的web程序,大家可以通过它制作和分享带有代码、公式、图表、文字的交互式文档。用户可以通过网页浏览文档。 +使用Docker启动PaddlePaddle Book教程 +------------------------------ +使用Docker可以快速在本地启动一个包含了PaddlePaddle官方Book教程的Jupyter Notebook,可以通过网页浏览。 PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Notebook。 如果您想要更深入了解deep learning,PaddlePaddle Book一定是您最好的选择。 +大家可以通过它阅读教程,或者制作和分享带有代码、公式、图表、文字的交互式文档。 我们提供可以直接运行PaddlePaddle Book的Docker镜像,直接运行: -.. code-block:: bash + .. code-block:: bash - docker run -p 8888:8888 paddlepaddle/book + docker run -p 8888:8888 paddlepaddle/book 然后在浏览器中输入以下网址: -.. code-block:: text + .. code-block:: text - http://localhost:8888/ + http://localhost:8888/ 就这么简单,享受您的旅程! -通过Docker容器开发PaddlePaddle ------------------------------- - -开发人员可以在Docker开发镜像中开发PaddlePaddle。这样开发人员可以以一致的方式在不同的平台上工作 - Linux,Mac OS X和Windows。 +.. _docker_run_gpu: -1. 制作PaddlePaddle开发镜像 - - PaddlePaddle每次发布新版本都会发布对应的开发镜像供开发者直接使用。这里介绍如生成造这个开发镜像。 - 生成Docker镜像的方式有两个,一个是直接把一个容器转换成镜像,另一个是创建Dockerfile并运行docker build指令按照Dockerfile生成镜像。第一个方法的好处是简单快捷,适合自己实验,可以快速迭代。第二个方法的好处是Dockerfile可以把整个生成流程描述很清楚,其他人很容易看懂镜像生成过程,持续集成系统也可以简单地复现这个过程。我们采用第二个方法。Dockerfile位于PaddlePaddle repo的根目录。生成生产镜像只需要运行: - - .. code-block:: bash - - git clone https://github.com/PaddlePaddle/Paddle.git - cd Paddle - docker build -t paddle:dev . - - docker build这个命令的-t指定了生成的镜像的名字,这里我们用paddle:dev。到此,PaddlePaddle开发镜像就被构建完毕了。 +使用Docker执行GPU训练 +------------------------------ -2. 制作PaddlePaddle生产镜像 +为了保证GPU驱动能够在镜像里面正常运行,我们推荐使用 +`nvidia-docker `_ 来运行镜像。 +请不要忘记提前在物理机上安装GPU最新驱动。 - 生产镜像的生成分为两步,第一步是运行: + .. code-block:: bash - .. code-block:: bash - - docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=OFF" -e "WITH_TEST=ON" paddle:dev + nvidia-docker run -it -v $PWD:/work paddledev/paddle:latest-gpu /bin/bash - 以上命令会编译PaddlePaddle,生成运行程序,以及生成创建生产镜像的Dockerfile。所有生成的的文件都在build目录下。“WITH_GPU”控制生成的生产镜像是否支持GPU,“WITH_AVX”控制生成的生产镜像是否支持AVX,”WITH_TEST“控制是否生成单元测试。 +**注: 如果没有安装nvidia-docker,可以尝试以下的方法,将CUDA库和Linux设备挂载到Docker容器内:** - 第二步是运行: + .. code-block:: bash - .. code-block:: bash - - docker build -t paddle:prod -f build/Dockerfile ./build + export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" + export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') + docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:latest-gpu - 以上命令会按照生成的Dockerfile把生成的程序拷贝到生产镜像中并做相应的配置,最终生成名为paddle:prod的生产镜像。 +**关于AVX:** -3. 运行单元测试 +AVX是一种CPU指令集,可以加速PaddlePaddle的计算。最新的PaddlePaddle Docker镜像默认 +是开启AVX编译的,所以,如果您的电脑不支持AVX,需要单独 +`编译 <./build_from_source_cn.rst>`_ PaddlePaddle为no-avx版本。 - 运行以下指令: +以下指令能检查Linux电脑是否支持AVX: .. code-block:: bash - - docker run -it -v $(pwd):/paddle paddle:dev bash -c "cd /paddle/build && ctest" - -文档 ----- - -Paddle的Docker开发镜像带有一个通过 `woboq code browser -`_ 生成的HTML版本的C++源代码,便于用户浏览C++源码。 -只要在Docker里启动PaddlePaddle的时候给它一个名字,就可以再运行另一个Nginx Docker镜像来服务HTML代码: - -.. code-block:: bash - - docker run -d --name paddle-cpu-doc paddle:0.10.0-dev - docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx + if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi -接着我们就能够打开浏览器在 http://localhost:8088/paddle/ 浏览代码。 +如果输出是No,就需要选择使用no-AVX的镜像 diff --git a/doc/getstarted/build_and_install/docker_install_en.rst b/doc/getstarted/build_and_install/docker_install_en.rst index 94860240f6a4a9bed8a865684a8a79960489280e..9b977c9c72e36b4b47cbf56ae848ab83d9895783 100644 --- a/doc/getstarted/build_and_install/docker_install_en.rst +++ b/doc/getstarted/build_and_install/docker_install_en.rst @@ -1,270 +1,146 @@ PaddlePaddle in Docker Containers ================================= -Docker container is currently the only officially-supported way to -running PaddlePaddle. This is reasonable as Docker now runs on all -major operating systems including Linux, Mac OS X, and Windows. -Please be aware that you will need to change `Dockers settings -`_ to make full use -of your hardware resource on Mac OS X and Windows. +Run PaddlePaddle in Docker container so that you don't need to care about +runtime dependencies, also you can run under Windows system. You can get +tutorials at `here `_ . -Working With Docker -------------------- +If you are using Windows, please refer to +`this `_ +tutorial to start running docker under windows. -Docker is simple as long as we understand a few basic concepts: +After you've read above tutorials you may proceed the following steps. -- *image*: A Docker image is a pack of software. It could contain one or more programs and all their dependencies. For example, the PaddlePaddle's Docker image includes pre-built PaddlePaddle and Python and many Python packages. We can run a Docker image directly, other than installing all these software. We can type +.. _docker_pull: - .. code-block:: bash - - docker images +Pull PaddlePaddle Docker Image +------------------------------ - to list all images in the system. We can also run +Run the following command to download the latest Docker images: .. code-block:: bash - - docker pull paddlepaddle/paddle:0.10.0 - to download a Docker image, paddlepaddle/paddle in this example, - from Dockerhub.com. + docker pull paddlepaddle/paddle -- *container*: considering a Docker image a program, a container is a - "process" that runs the image. Indeed, a container is exactly an - operating system process, but with a virtualized filesystem, network - port space, and other virtualized environment. We can type +For users in China, we provide a faster mirror: .. code-block:: bash - docker run paddlepaddle/paddle:0.10.0 + docker pull docker.paddlepaddle.org/paddle - to start a container to run a Docker image, paddlepaddle/paddle in this example. - -- By default docker container have an isolated file system namespace, - we can not see the files in the host file system. By using *volume*, - mounted files in host will be visible inside docker container. - Following command will mount current dirctory into /data inside - docker container, run docker container from debian image with - command :code:`ls /data`. +Download GPU version images: .. code-block:: bash - docker run --rm -v $(pwd):/data debian ls /data - -Usage of CPU-only and GPU Images ----------------------------------- - -We package PaddlePaddle's compile environment into a Docker image, -called the develop image, it contains all compiling tools that -PaddlePaddle needs. We package compiled PaddlePaddle program into a -Docker image as well, called the production image, it contains all -runtime environment that running PaddlePaddle needs. For each version -of PaddlePaddle, we release both of them. Production image includes -CPU-only version and a CUDA GPU version and their no-AVX versions. - -We put the docker images on `dockerhub.com -`_. You can find the -latest versions under "tags" tab at dockerhub.com. - -** NOTE: If you are in China, you can use our Docker image registry mirror to speed up the download process. To use it, please replace all paddlepaddle/paddle in the commands to docker.paddlepaddle.org/paddle.** - - -1. development image :code:`paddlepaddle/paddle:-dev` - - This image has packed related develop tools and runtime - environment. Users and developers can use this image instead of - their own local computer to accomplish development, build, - releasing, document writing etc. While different version of paddle - may depends on different version of libraries and tools, if you - want to setup a local environment, you must pay attention to the - versions. The development image contains: - - - gcc/clang - - nvcc - - Python - - sphinx - - woboq - - sshd - - Many developers use servers with GPUs, they can use ssh to login to - the server and run :code:`docker exec` to enter the docker - container and start their work. Also they can start a development - docker image with SSHD service, so they can login to the container - and start work. - -2. Production images, this image might have multiple variants: - - - GPU/AVX::code:`paddlepaddle/paddle:-gpu` - - GPU/no-AVX::code:`paddlepaddle/paddle:-gpu-noavx` - - CPU/AVX::code:`paddlepaddle/paddle:` - - CPU/no-AVX::code:`paddlepaddle/paddle:-noavx` - - Please be aware that the CPU-only and the GPU images both use the - AVX instruction set, but old computers produced before 2008 do not - support AVX. The following command checks if your Linux computer - supports AVX: - - .. code-block:: bash - - if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi - - **NOTE:versions after 0.10.0 will automatically detect system AVX support, so manual detect is not needed in this case.** - To run the CPU-only image as an interactive container: - - .. code-block:: bash - - docker run -it --rm paddlepaddle/paddle:0.10.0 /bin/bash - - Above method work with the GPU image too -- the recommended way is - using `nvidia-docker `_. - - Please install nvidia-docker first following this `tutorial - `_. - - Now you can run a GPU image: - - .. code-block:: bash - - nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0-gpu /bin/bash - - -Train Model Using Python API ----------------------------- - -Our official docker image provides a runtime for PaddlePaddle -programs. The typical workflow will be as follows: - -Create a directory as workspace: - -.. code-block:: bash - - mkdir ~/workspace - -Edit a PaddlePaddle python program using your favourite editor - -.. code-block:: bash - - emacs ~/workspace/example.py - -Run the program using docker: - -.. code-block:: bash - - docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0 python /workspace/example.py - -Or if you are using GPU for training: + docker pull paddlepaddle/paddle:latest-gpu + docker pull docker.paddlepaddle.org/paddle:latest-gpu -.. code-block:: bash +Choose between different BLAS version: - nvidia-docker run --rm -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0-gpu python /workspace/example.py - -Above commands will start a docker container by running :code:`python -/workspace/example.py`. It will stop once :code:`python -/workspace/example.py` finishes. - -Another way is to tell docker to start a :code:`/bin/bash` session and -run PaddlePaddle program interactively: - -.. code-block:: bash - - docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0 /bin/bash - # now we are inside docker container - cd /workspace - python example.py - -Running with GPU is identical: - -.. code-block:: bash - - nvidia-docker run -it -v ~/workspace:/workspace paddlepaddle/paddle:0.10.0-gpu /bin/bash - # now we are inside docker container - cd /workspace - python example.py - - -Develop PaddlePaddle or Train Model Using C++ API ---------------------------------------------------- - -We will be using PaddlePaddle development image since it contains all -compiling tools and dependencies. + .. code-block:: bash -1. Build PaddlePaddle develop image + # image using MKL by default + docker pull paddlepaddle/paddle + # image using OpenBLAS + docker pull paddlepaddle/paddle:latest-openblas - Use following command to build PaddlePaddle develop image: - .. code-block:: bash +If you want to use legacy versions, choose a tag from +`DockerHub `_ +and run: - git clone https://github.com/PaddlePaddle/Paddle.git && cd Paddle - docker build -t paddle:dev . - -2. Build PaddlePaddle production image + .. code-block:: bash - There are two steps for building production image, the first step is to run: + docker pull paddlepaddle/paddle:[tag] + # i.e. + docker pull docker.paddlepaddle.org/paddle:0.10.0-gpu - .. code-block:: bash +.. _docker_run: - docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=OFF" -e "WITH_TEST=ON" paddle:dev +Launch your training program in Docker +------------------------------ - The above command will compile PaddlePaddle and create a Dockerfile for building production image. All the generated files are in the build directory. "WITH_GPU" controls if the generated production image supports GPU. "WITH_AVX" controls if the generated production image supports AVX. "WITH_TEST" controls if the unit test will be generated. +Assume that you have already written a PaddlePaddle program +named :code:`train.py` under directory :code:`/home/work` (refer to +`PaddlePaddleBook `_ +for more samples), then run the following command: - The second step is to run: + .. code-block:: bash - .. code-block:: bash + cd /home/work + docker run -it -v $PWD:/work paddlepaddle/paddle /work/train.py - docker build -t paddle:prod -f build/Dockerfile ./build +In the above command, :code:`-it` means run the container interactively; +:code:`-v $PWD:/work` means mount the current directory ($PWD will expand +to current absolute path in Linux) under :code:`/work` in the container. +:code:`paddlepaddle/paddle` to specify image to use; finnally +:code:`/work/train.py` is the command to run inside docker. - The above command will generate the production image by copying the compiled PaddlePaddle program into the image. +Also, you can go into the container shell, run or debug your code +interactively: -3. Run unit test + .. code-block:: bash + docker run -it -v $PWD:/work paddlepaddle/paddle /bin/bash + cd /work + python train.py - Following command will run unit test: +**NOTE: We did not install vim in the default docker image to reduce the image size, you can run** :code:`apt-get install -y vim` **to install it if you need to edit python files.** - .. code-block:: bash - - docker run -it -v $(pwd):/paddle paddle:dev bash -c "cd /paddle/build && ctest" +.. _docker_run_book: PaddlePaddle Book ------------------ -The Jupyter Notebook is an open-source web application that allows -you to create and share documents that contain live code, equations, -visualizations and explanatory text in a single browser. - -PaddlePaddle Book is an interactive Jupyter Notebook for users and developers. -We already exposed port 8888 for this book. If you want to +You can create a container serving PaddlePaddle Book using Jupyter Notebook in +one minute using Docker. PaddlePaddle Book is an interactive Jupyter Notebook +for users and developers.If you want to dig deeper into deep learning, PaddlePaddle Book definitely is your best choice. We provide a packaged book image, simply issue the command: -.. code-block:: bash + .. code-block:: bash - docker run -p 8888:8888 paddlepaddle/book + docker run -p 8888:8888 paddlepaddle/book Then, you would back and paste the address into the local browser: -.. code-block:: text + .. code-block:: text - http://localhost:8888/ + http://localhost:8888/ That's all. Enjoy your journey! +.. _docker_run_gpu: -Documentation -------------- +Train with Docker with GPU +------------------------------ -Paddle Docker images include an HTML version of C++ source code -generated using `woboq code browser -`_. This makes it easy -for users to browse and understand the C++ source code. +We recommend using +`nvidia-docker `_ +to run GPU training jobs. Please ensure you have latest +GPU driver installed before move on. -As long as we give the Paddle Docker container a name, we can run an -additional Nginx Docker container to serve the volume from the Paddle -container: + .. code-block:: bash -.. code-block:: bash + nvidia-docker run -it -v $PWD:/work paddledev/paddle:latest-gpu /bin/bash - docker run -d --name paddle-cpu-doc paddle: - docker run -d --volumes-from paddle-cpu-doc -p 8088:80 nginx +**NOTE: If you don't have nvidia-docker installed, try the following method to mount CUDA libs and devices into the container.** + .. code-block:: bash -Then we can direct our Web browser to the HTML version of source code -at http://localhost:8088/paddle/ + export CUDA_SO="$(\ls /usr/lib64/libcuda* | xargs -I{} echo '-v {}:{}') $(\ls /usr/lib64/libnvidia* | xargs -I{} echo '-v {}:{}')" + export DEVICES=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') + docker run ${CUDA_SO} ${DEVICES} -it paddledev/paddle:latest-gpu + +**About AVX:** + +AVX is a kind of CPU instruction can accelerate PaddlePaddle's calculations. +The latest PaddlePaddle Docker image turns AVX on by default, so, if your +computer doesn't support AVX, you'll probably need to +`build <./build_from_source_en.rst>`_ with :code:`WITH_AVX=OFF`. + +The following command will tell you whether your computer supports AVX. + + .. code-block:: bash + + if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi diff --git a/doc/getstarted/build_and_install/index_cn.rst b/doc/getstarted/build_and_install/index_cn.rst index dd9923697ab85825557aa89a08870bece7c76673..88c5142ddee994ed0c0dc520195311e97f5a549e 100644 --- a/doc/getstarted/build_and_install/index_cn.rst +++ b/doc/getstarted/build_and_install/index_cn.rst @@ -6,12 +6,13 @@ 安装流程 ++++++++ -PaddlePaddle提供Docker镜像来部署环境。 +PaddlePaddle提供pip和Docker的安装方式: .. toctree:: :maxdepth: 1 - - docker_install_cn.rst + + pip_install_cn.rst + docker_install_cn.rst 编译流程 @@ -19,9 +20,14 @@ PaddlePaddle提供Docker镜像来部署环境。 .. warning:: - 编译流程主要推荐高级用户查看,普通用户请走安装流程。 + 建议直接使用上述安装流程,方便快速安装。只有在遇到需要独立定制的二进制时才需要编译。 .. toctree:: :maxdepth: 1 - cmake/build_from_source_cn.rst + build_from_source_cn.rst + +常见问题解答 +++++++++++ + +`常见问题解答 `_ diff --git a/doc/getstarted/build_and_install/index_en.rst b/doc/getstarted/build_and_install/index_en.rst index 8a53588e0439df8f4d5fd529b7a20262c67d4e58..c8b60d03578ba6a9b73134ec53b440d057e36079 100644 --- a/doc/getstarted/build_and_install/index_en.rst +++ b/doc/getstarted/build_and_install/index_en.rst @@ -1,22 +1,33 @@ Install and Build ================= -Install PaddlePaddle ----------------------- +.. _install_steps: -.. toctree:: - :maxdepth: 1 +Install Steps +++++++++ + +You can choose either pip or Docker to complete your install: + +.. toctree:: + :maxdepth: 1 + + pip_install_en.rst + docker_install_en.rst - docker_install_en.rst Build from Source ----------------- .. warning:: - Please use :code:`docker` image to install paddle. The building guide is used for hacking or contributing PaddlePaddle source code. + We recommend to directly install via above installation steps, you'll only need to build PaddlePaddle from source when you need a modifed binary. .. toctree:: :maxdepth: 1 build_from_source_en.md + +FAQ +++++++++++ + +`FAQ `_ diff --git a/doc/getstarted/build_and_install/paddleci.png b/doc/getstarted/build_and_install/paddleci.png new file mode 100644 index 0000000000000000000000000000000000000000..16087ce059aa3c07ce8c927d983eb86351915825 Binary files /dev/null and b/doc/getstarted/build_and_install/paddleci.png differ diff --git a/doc/getstarted/build_and_install/pip_install_cn.rst b/doc/getstarted/build_and_install/pip_install_cn.rst new file mode 100644 index 0000000000000000000000000000000000000000..41312da48c055826186a560ef9653653e45d1047 --- /dev/null +++ b/doc/getstarted/build_and_install/pip_install_cn.rst @@ -0,0 +1,86 @@ +使用pip安装PaddlePaddle +================================ + +PaddlePaddle可以使用常用的Python包管理工具 +`pip `_ +完成安装,并可以在大多数主流的Linux操作系统以及MacOS上执行。 + +.. _pip_install: + +使用pip安装 +------------------------------ + + +执行下面的命令即可在当前机器上安装PaddlePaddle的运行时环境,并自动下载安装依赖软件。 + + .. code-block:: bash + + pip install paddlepaddle + + +如果需要安装支持GPU的版本,需要执行: + + .. code-block:: bash + + pip install paddlepaddle-gpu + +如果需要获取并安装最新的(开发分支)PaddlePaddle,可以从我们的CI系统中下载最新的whl安装包和c-api开发包并安装, +您可以从下面的表格中找到需要的版本: + +如果在点击下面链接时出现如下登陆界面,点击“Log in as guest”即可开始下载: + +.. image:: paddleci.png + :scale: 50 % + :align: center + +.. csv-table:: 各个版本最新的whl包 + :header: "版本说明", "cp27-cp27mu", "cp27-cp27mu", "C-API" + :widths: 1, 3, 3, 3 + + "cpu_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + "cpu_avx_openblas", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "暂无" + "cuda7.5_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + +.. _pip_dependency: + +运行环境依赖 +------------------------------ + +PaddlePaddle安装包由于不仅仅包含.py程序,而且包含了C++编写的部分,所以我们确保发布的二进制包可以支持主流的Linux操作系统,比如CentOS 6以上,Ubuntu 14.04以上,MacOS 10.12以上。 + +PaddlePaddle发布的安装包会尽量对齐 `manylinux1 `_ 标准,通常使用CentOS 5作为编译环境。但由于CUDA库通常需要CentOS 6以上,而且CentOS 5即将停止维护,所以我们默认使用CentOS 6作为标准编译环境。 + +.. csv-table:: PaddlePaddle环境依赖 + :header: "依赖", "版本", "说明" + :widths: 10, 15, 30 + + "操作系统", "Linux, MacOS", "CentOS 6以上,Ubuntu 14.04以上,MacOS 10.12以上" + "Python", "2.7.x", "暂时不支持Python3" + "libc.so", "GLIBC_2.7", "glibc至少包含GLIBC_2.7以上的符号" + "libstdc++.so", "GLIBCXX_3.4.11, CXXABI_1.3.3", "至少包含GLIBCXX_3.4.11, CXXABI_1.3.3以上的符号" + "libgcc_s.so", "GCC_3.3", "至少包含GCC_3.3以上的符号" + +.. _pip_faq: + +安装常见问题和解决方法 +------------------------------ + +- paddlepaddle*.whl is not a supported wheel on this platform. + + 出现这个问题的主要原因是,没有找到和当前系统匹配的paddlepaddle安装包。请检查Python版本是否为2.7系列。另外最新的pip官方源中的安装包默认是manylinux1标准,需要使用最新的pip (>9.0.0) 才可以安装。可以使用下面的命令更新您的pip: + + .. code-block:: bash + + pip install --upgrade pip + + 如果仍然存在问题,可以执行: + + .. code-block:: bash + + python -c "import pip; print(pip.pep425tags.get_supported())" + + 获取当前系统支持的安装包格式,并检查和需安装的包是否匹配。pypi安装包可以在 `这个 `_ 链接中找到。 + + 如果系统支持的是 linux_x86_64 而安装包是 manylinux1_x86_64 ,需要升级pip版本到最新; 如果系统支持 manylinux1_x86_64 而安装包(本地)是 linux_x86_64 ,可以重命名这个whl包为 manylinux1_x86_64 再安装。 \ No newline at end of file diff --git a/doc/getstarted/build_and_install/pip_install_en.rst b/doc/getstarted/build_and_install/pip_install_en.rst new file mode 100644 index 0000000000000000000000000000000000000000..4f295e14baa1465a93b8eef1b3f3b6b47eeea905 --- /dev/null +++ b/doc/getstarted/build_and_install/pip_install_en.rst @@ -0,0 +1,104 @@ +Install PaddlePaddle Using pip +================================ + +You can use current widely used Python package management +tool `pip `_ +to install PaddlePaddle. This method can be used in +most of current Linux systems or MacOS. + +.. _pip_install: + +Install Using pip +------------------------------ + +Run the following command to install PaddlePaddle on the current +machine, it will also download requirements. + + .. code-block:: bash + + pip install paddlepaddle + + +If you wish to install GPU version, just run: + + .. code-block:: bash + + pip install paddlepaddle-gpu + +If you wish to install the latest develop branch PaddlePaddle, +you can download the latest whl package from our CI system. Access +the below links, log in as guest, then click at the "Artifact" +tab, you'll find the download link of whl packages. + +If the links below shows up the login form, just click "Log in as guest" to start the download: + +.. image:: paddleci.png + :scale: 50 % + :align: center + +.. csv-table:: whl package of each version + :header: "version", "cp27-cp27mu", "cp27-cp27mu", "C-API" + :widths: 1, 3, 3, 3 + + "cpu_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + "cpu_avx_openblas", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "Not Available" + "cuda7.5_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl `_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl `_", "`paddle.tgz `_" + +.. _pip_dependency: + +Runtime Dependency +------------------------------ + +PaddlePaddle installation packages (whl) does not only contain .py files, +but also binaries built from C++ code. We ensure that PaddlePaddle can +run on current mainline Linux distributions, like CentOS 6, Ubuntu 14.04 +and MacOS 10.12. + +PaddlePaddle whl packages are trying to satisfy +`manylinux1 `_ +standard, which uses CentOS 5 as default build environment. But CUDA libraries +seems only run on CentOS 6 at least, also, CentOS 5 is about to end its lifetime, +so we use CentOS 6 as default build environment. + +.. csv-table:: PaddlePaddle Runtime Deps + :header: "Dependency", "version", "description" + :widths: 10, 15, 30 + + "OS", "Linux, MacOS", "CentOS 6 or later,Ubuntu 14.04 or later,MacOS 10.12 or later" + "Python", "2.7.x", "Currently Python3 is not supported" + "libc.so", "GLIBC_2.7", "glibc at least include GLIBC_2.7 symbols" + "libstdc++.so", "GLIBCXX_3.4.11, CXXABI_1.3.3", "At least include GLIBCXX_3.4.11, CXXABI_1.3.3 symbols" + "libgcc_s.so", "GCC_3.3", "At least include GCC_3.3 symbols" + +.. _pip_faq: + +FAQ +------------------------------ + +- paddlepaddle*.whl is not a supported wheel on this platform. + + The main cause of this issue is that your current platform is + not supported. Please check that you are using Python 2.7 series. + Besides, pypi only supports manylinux1 standard, you'll need to + upgrade your pip to >9.0.0. Then run the below command: + + .. code-block:: bash + + pip install --upgrade pip + + If the problem still exists, run the following command: + + .. code-block:: bash + + python -c "import pip; print(pip.pep425tags.get_supported())" + + Then you'll get supported package suffixes, then check if it matches + the file name of the whl package. You can find default whl package at + `here `_ + + If your system supports linux_x86_64 but the whl package is manylinux1_x86_64, + you'll need to update pip to the latest version; If your system supports + manylinux1_x86_64 but the whl package is linux_x86_64 you can rename the + file to manylinux1_x86_64 suffix and then install. diff --git a/doc/getstarted/index_cn.rst b/doc/getstarted/index_cn.rst index aa418c657a4ba16cce61c030066f4d3e14e891cc..a9087be6f350c5656cabb0c64ba0f200d1c666cc 100644 --- a/doc/getstarted/index_cn.rst +++ b/doc/getstarted/index_cn.rst @@ -1,10 +1,61 @@ 新手入门 ============ +.. _quick_install: + +快速安装 +++++++++ + +PaddlePaddle支持使用pip快速安装,目前支持CentOS 6以上, Ubuntu 14.04以及MacOS 10.12,并安装有Python2.7。 +执行下面的命令完成快速安装: + + .. code-block:: bash + + pip install paddlepaddle + +如果需要安装支持GPU的版本,需要执行: + + .. code-block:: bash + + pip install paddlepaddle-gpu + +更详细的安装和编译方法参考: + .. toctree:: :maxdepth: 1 build_and_install/index_cn.rst - concepts/use_concepts_cn.rst -- `深度学习入门课程 `_ +.. _quick_start: + +快速开始 +++++++++ + +创建一个 housing.py 并粘贴此Python代码: + + .. code-block:: python + + import paddle.v2 as paddle + + # Initialize PaddlePaddle. + paddle.init(use_gpu=False, trainer_count=1) + + # Configure the neural network. + x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) + y_predict = paddle.layer.fc(input=x, size=1, act=paddle.activation.Linear()) + + # Infer using provided test data. + probs = paddle.infer( + output_layer=y_predict, + parameters=paddle.dataset.uci_housing.model(), + input=[item for item in paddle.dataset.uci_housing.test()()]) + + for i in xrange(len(probs)): + print 'Predicted price: ${:,.2f}'.format(probs[i][0] * 1000) + +执行 :code:`python housing.py` 瞧! 它应该打印出预测住房数据的清单。 + +.. toctree:: + :maxdepth: 1 + + concepts/use_concepts_cn.rst diff --git a/doc/getstarted/index_en.rst b/doc/getstarted/index_en.rst index be3253e3d41b99a2b696e2c5ef6463ed49680d69..d14e3f5c0cc90792fce9cb82e65da482c44dc433 100644 --- a/doc/getstarted/index_en.rst +++ b/doc/getstarted/index_en.rst @@ -1,9 +1,61 @@ GET STARTED ============ +.. _quick_install: + +Quick Install +---------------------- + +You can use pip to install PaddlePaddle with a single command, supports +CentOS 6 above, Ubuntu 14.04 above or MacOS 10.12, with Python 2.7 installed. +Simply run the following command to install: + + .. code-block:: bash + + pip install paddlepaddle + +If you need to install GPU version, run: + + .. code-block:: bash + + pip install paddlepaddle-gpu + +For more details about installation and build: + .. toctree:: :maxdepth: 1 build_and_install/index_en.rst -- `Deep Learning 101 `_ + +.. _quick_start: + +Quick Start +++++++++ + +Create a new file called housing.py, and paste this Python +code: + + + .. code-block:: python + + import paddle.v2 as paddle + + # Initialize PaddlePaddle. + paddle.init(use_gpu=False, trainer_count=1) + + # Configure the neural network. + x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) + y_predict = paddle.layer.fc(input=x, size=1, act=paddle.activation.Linear()) + + # Infer using provided test data. + probs = paddle.infer( + output_layer=y_predict, + parameters=paddle.dataset.uci_housing.model(), + input=[item for item in paddle.dataset.uci_housing.test()()]) + + for i in xrange(len(probs)): + print 'Predicted price: ${:,.2f}'.format(probs[i][0] * 1000) + +Run :code:`python housing.py` and voila! It should print out a list of predictions +for the test housing data. diff --git a/doc/howto/optimization/cpu_profiling.md b/doc/howto/optimization/cpu_profiling.md new file mode 100644 index 0000000000000000000000000000000000000000..32d89a7c183d57e0e69039dfb2c78703d9866f7c --- /dev/null +++ b/doc/howto/optimization/cpu_profiling.md @@ -0,0 +1,163 @@ +此教程会介绍如何使用Python的cProfile包,与Python库yep,google perftools来运行性能分析(Profiling)与调优。 + +运行性能分析可以让开发人员科学的,有条不紊的对程序进行性能优化。性能分析是性能调优的基础。因为在程序实际运行中,真正的瓶颈可能和程序员开发过程中想象的瓶颈相去甚远。 + +性能优化的步骤,通常是循环重复若干次『性能分析 --> 寻找瓶颈 ---> 调优瓶颈 --> 性能分析确认调优效果』。其中性能分析是性能调优的至关重要的量化指标。 + +Paddle提供了Python语言绑定。用户使用Python进行神经网络编程,训练,测试。Python解释器通过`pybind`和`swig`调用Paddle的动态链接库,进而调用Paddle C++部分的代码。所以Paddle的性能分析与调优分为两个部分: + +* Python代码的性能分析 +* Python与C++混合代码的性能分析 + + +## Python代码的性能分析 + +### 生成性能分析文件 + +Python标准库中提供了性能分析的工具包,[cProfile](https://docs.python.org/2/library/profile.html)。生成Python性能分析的命令如下: + +```bash +python -m cProfile -o profile.out main.py +``` + +其中`-o`标识了一个输出的文件名,用来存储本次性能分析的结果。如果不指定这个文件,`cProfile`会打印一些统计信息到`stdout`。这不方便我们进行后期处理(进行`sort`, `split`, `cut`等等)。 + +### 查看性能分析文件 + +当main.py运行完毕后,性能分析结果文件`profile.out`就生成出来了。我们可以使用[cprofilev](https://github.com/ymichael/cprofilev)来查看性能分析结果。`cprofilev`是一个Python的第三方库。使用它会开启一个HTTP服务,将性能分析结果以网页的形式展示出来。 + +使用`pip install cprofilev`安装`cprofilev`工具。安装完成后,使用如下命令开启HTTP服务 + +```bash +cprofilev -a 0.0.0.0 -p 3214 -f profile.out main.py +``` + +其中`-a`标识HTTP服务绑定的IP。使用`0.0.0.0`允许外网访问这个HTTP服务。`-p`标识HTTP服务的端口。`-f`标识性能分析的结果文件。`main.py`标识被性能分析的源文件。 + +访问对应网址,即可显示性能分析的结果。性能分析结果格式如下: + +```text + ncalls tottime percall cumtime percall filename:lineno(function) + 1 0.284 0.284 29.514 29.514 main.py:1() + 4696 0.128 0.000 15.748 0.003 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/executor.py:20(run) + 4696 12.040 0.003 12.040 0.003 {built-in method run} + 1 0.144 0.144 6.534 6.534 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/__init__.py:14() +``` + +每一列的含义是: + +| 列名 | 含义 | +| --- | --- | +| ncalls | 函数的调用次数 | +| tottime | 函数实际使用的总时间。该时间去除掉本函数调用其他函数的时间 | +| percall | tottime的每次调用平均时间 | +| cumtime | 函数总时间。包含这个函数调用其他函数的时间 | +| percall | cumtime的每次调用平均时间 | +| filename:lineno(function) | 文件名, 行号,函数名 | + + +### 寻找性能瓶颈 + +通常`tottime`和`cumtime`是寻找瓶颈的关键指标。这两个指标代表了某一个函数真实的运行时间。 + +将性能分析结果按照tottime排序,效果如下: + +```text + 4696 12.040 0.003 12.040 0.003 {built-in method run} + 300005 0.874 0.000 1.681 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/dataset/mnist.py:38(reader) + 107991 0.676 0.000 1.519 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:219(__init__) + 4697 0.626 0.000 2.291 0.000 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:428(sync_with_cpp) + 1 0.618 0.618 0.618 0.618 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/__init__.py:1() + +``` + +可以看到最耗时的函数是C++端的`run`函数。这需要联合我们第二节`Python与C++混合代码的性能分析`来进行调优。而`sync_with_cpp`函数的总共耗时很长,每次调用的耗时也很长。于是我们可以点击`sync_with_cpp`的详细信息,了解其调用关系。 + +```text +Called By: + + Ordered by: internal time + List reduced from 4497 to 2 due to restriction <'sync_with_cpp'> + +Function was called by... + ncalls tottime cumtime +/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:428(sync_with_cpp) <- 4697 0.626 2.291 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:562(sync_with_cpp) +/home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:562(sync_with_cpp) <- 4696 0.019 2.316 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:487(clone) + 1 0.000 0.001 /home/yuyang/perf_test/.env/lib/python2.7/site-packages/paddle/v2/fluid/framework.py:534(append_backward) + + +Called: + + Ordered by: internal time + List reduced from 4497 to 2 due to restriction <'sync_with_cpp'> +``` + +通常观察热点函数间的调用关系,和对应行的代码,就可以了解到问题代码在哪里。当我们做出性能修正后,再次进行性能分析(profiling)即可检查我们调优后的修正是否能够改善程序的性能。 + + + +## Python与C++混合代码的性能分析 + +### 生成性能分析文件 + +C++的性能分析工具非常多。常见的包括`gprof`, `valgrind`, `google-perftools`。但是调试Python中使用的动态链接库与直接调试原始二进制相比增加了很多复杂度。幸而Python的一个第三方库`yep`提供了方便的和`google-perftools`交互的方法。于是这里使用`yep`进行Python与C++混合代码的性能分析 + +使用`yep`前需要安装`google-perftools`与`yep`包。ubuntu下安装命令为 + +```bash +apt install libgoogle-perftools-dev +pip install yep +``` + +安装完毕后,我们可以通过 + +```bash +python -m yep -v main.py +``` + +生成性能分析文件。生成的性能分析文件为`main.py.prof`。 + +命令行中的`-v`指定在生成性能分析文件之后,在命令行显示分析结果。我们可以在命令行中简单的看一下生成效果。因为C++与Python不同,编译时可能会去掉调试信息,运行时也可能因为多线程产生混乱不可读的性能分析结果。为了生成更可读的性能分析结果,可以采取下面几点措施: + +1. 编译时指定`-g`生成调试信息。使用cmake的话,可以将CMAKE_BUILD_TYPE指定为`RelWithDebInfo`。 +2. 编译时一定要开启优化。单纯的`Debug`编译性能会和`-O2`或者`-O3`有非常大的差别。`Debug`模式下的性能测试是没有意义的。 +3. 运行性能分析的时候,先从单线程开始,再开启多线程,进而多机。毕竟如果单线程调试更容易。可以设置`OMP_NUM_THREADS=1`这个环境变量关闭openmp优化。 + +### 查看性能分析文件 + +在运行完性能分析后,会生成性能分析结果文件。我们可以使用[pprof](https://github.com/google/pprof)来显示性能分析结果。注意,这里使用了用`Go`语言重构后的`pprof`,因为这个工具具有web服务界面,且展示效果更好。 + +安装`pprof`的命令和一般的`Go`程序是一样的,其命令如下: + +```bash +go get github.com/google/pprof +``` + +进而我们可以使用如下命令开启一个HTTP服务: + +```bash +pprof -http=0.0.0.0:3213 `which python` ./main.py.prof +``` + +这行命令中,`-http`指开启HTTP服务。`which python`会产生当前Python二进制的完整路径,进而指定了Python可执行文件的路径。`./main.py.prof`输入了性能分析结果。 + +访问对应的网址,我们可以查看性能分析的结果。结果如下图所示: + +![result](./pprof_1.png) + + +### 寻找性能瓶颈 + +与寻找Python代码的性能瓶颈类似,寻找Python与C++混合代码的性能瓶颈也是要看`tottime`和`cumtime`。而`pprof`展示的调用图也可以帮助我们发现性能中的问题。 + +例如下图中, + +![kernel_perf](./pprof_2.png) + +在一次训练中,乘法和乘法梯度的计算占用2%-4%左右的计算时间。而`MomentumOp`占用了17%左右的计算时间。显然,`MomentumOp`的性能有问题。 + +在`pprof`中,对于性能的关键路径都做出了红色标记。先检查关键路径的性能问题,再检查其他部分的性能问题,可以更有次序的完成性能的优化。 + +## 总结 + +至此,两种性能分析的方式都介绍完毕了。希望通过这两种性能分析的方式,Paddle的开发人员和使用人员可以有次序的,科学的发现和解决性能问题。 diff --git a/doc/howto/optimization/pprof_1.png b/doc/howto/optimization/pprof_1.png new file mode 100644 index 0000000000000000000000000000000000000000..8e9edbf377672d0ef40f2fc7bd39e746923550cb Binary files /dev/null and b/doc/howto/optimization/pprof_1.png differ diff --git a/doc/howto/optimization/pprof_2.png b/doc/howto/optimization/pprof_2.png new file mode 100644 index 0000000000000000000000000000000000000000..172ba20399ba974d27f4c072425277b69b02520b Binary files /dev/null and b/doc/howto/optimization/pprof_2.png differ diff --git a/paddle/capi/Matrix.cpp b/paddle/capi/Matrix.cpp index d5b55e1c95f248f551e6a0a3b39123169dd7784f..30f3a766f0c65187c8f2dd4603e3d26c9b9a6a3d 100644 --- a/paddle/capi/Matrix.cpp +++ b/paddle/capi/Matrix.cpp @@ -55,7 +55,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat, } PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat, - paddle_real* value) { + paddle_real* value) { if (mat == nullptr || value == nullptr) return kPD_NULLPTR; auto ptr = cast(mat); if (ptr->mat == nullptr) return kPD_NULLPTR; @@ -75,7 +75,7 @@ PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat, } PD_API paddle_error paddle_matrix_get_value(paddle_matrix mat, - paddle_real* result) { + paddle_real* result) { if (mat == nullptr || result == nullptr) return kPD_NULLPTR; auto ptr = cast(mat); if (ptr->mat == nullptr) return kPD_NULLPTR; diff --git a/paddle/capi/examples/model_inference/dense/main.c b/paddle/capi/examples/model_inference/dense/main.c index 876af2aa7615c098d225b56ce2ea0b1529a6e3c6..5eeaf7e31fac7c9ed0b9269e74a7e467bde155ef 100644 --- a/paddle/capi/examples/model_inference/dense/main.c +++ b/paddle/capi/examples/model_inference/dense/main.c @@ -1,5 +1,6 @@ #include #include + #include "../common/common.h" #define CONFIG_BIN "./trainer_config.bin" @@ -27,20 +28,19 @@ int main() { CHECK(paddle_arguments_resize(in_args, 1)); // Create input matrix. - paddle_matrix mat = paddle_matrix_create(/* sample_num */ 10, + paddle_matrix mat = paddle_matrix_create(/* sample_num */ 1, /* size */ 784, /* useGPU */ false); srand(time(0)); - std::vector input; - input.resize(784 * 10); + paddle_real* array; + + // Get First row. + CHECK(paddle_matrix_get_row(mat, 0, &array)); - for (int i = 0; i < input.size(); ++i) { - input[i] = rand() / ((float)RAND_MAX); + for (int i = 0; i < 784; ++i) { + array[i] = rand() / ((float)RAND_MAX); } - - // Set value for the input matrix - CHECK(paddle_matrix_set_value(mat, input.data())); CHECK(paddle_arguments_set_value(in_args, 0, mat)); @@ -53,17 +53,18 @@ int main() { CHECK(paddle_arguments_get_value(out_args, 0, prob)); - std::std::vector result; - int height; - int width; + uint64_t height; + uint64_t width; - CHECK(paddle_matrix_get_shape(prob, &height, &width); - result.resize(height * width); - CHECK(paddle_matrix_get_value(prob, result.data())); + CHECK(paddle_matrix_get_shape(prob, &height, &width)); + CHECK(paddle_matrix_get_row(prob, 0, &array)); - printf("Prob: "); + printf("Prob: \n"); for (int i = 0; i < height * width; ++i) { - printf("%.2f ", result[i]); + printf("%.4f ", array[i]); + if ((i + 1) % width == 0) { + printf("\n"); + } } printf("\n"); diff --git a/paddle/capi/matrix.h b/paddle/capi/matrix.h index 01b8bad2ee9f528f8622346f43b9ff82225a7e73..8cc3e0034e058daefc63c69efe0b1f575c586897 100644 --- a/paddle/capi/matrix.h +++ b/paddle/capi/matrix.h @@ -79,7 +79,7 @@ PD_API paddle_error paddle_matrix_set_row(paddle_matrix mat, * @note value should contain enough element of data to init the mat */ PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat, - paddle_real* value); + paddle_real* value); /** * @brief PDMatGetRow Get raw row buffer from matrix @@ -93,14 +93,14 @@ PD_API paddle_error paddle_matrix_get_row(paddle_matrix mat, paddle_real** rawRowBuffer); /** - * @brief copy data from the matrix + * @brief copy data from the matrix * @param [in] mat Target matrix - * @param [out] result pointer to store the matrix data + * @param [out] result pointer to store the matrix data * @return paddle_error * @note the space of the result should allocated before invoke this API */ PD_API paddle_error paddle_matrix_get_value(paddle_matrix mat, - paddle_real* result); + paddle_real* result); /** * @brief PDMatCreateNone Create None Matrix * @return diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index c08e844847737b1172f6453767cc7f5e7b1a2bda..4b0eff3adb6fff0c9599b8613c5f19daea840674 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -6,7 +6,10 @@ cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context) + cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) +cc_test(tensor_util_test SRCS tensor_util_test.cc DEPS tensor) + cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto) @@ -51,10 +54,6 @@ cc_library(executor SRCS executor.cc DEPS op_registry device_context scope frame cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) - -cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor) -cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place) - cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry proto_desc) cc_library(selected_rows SRCS selected_rows.cc DEPS tensor) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index b9018ecdba8303fd6b37c87edd99e192aa604228..8fd2906107c490eee129fc10262df28bfa67800b 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -22,7 +22,6 @@ #include "paddle/framework/block_desc.h" #include "paddle/framework/op_registry.h" -#include "paddle/operators/dynamic_recurrent_op.h" #include "paddle/operators/net_op.h" namespace paddle { @@ -218,21 +217,6 @@ static std::unique_ptr BackwardRecursive( return false; }); - // process recurrent gradient op as a special operator. - if (forwardOp.Type() == "dynamic_recurrent") { - // NOTE clean up cycle call somewhere (RNN's stepnet constains itself), - // or this will result in infinite loop. - const auto& rnnop = - *static_cast(&forwardOp); - auto rnn_grad_op = - static_cast(grad_op.get()); - const auto& stepnet_op = - *static_cast(&rnnop.rnn.GetStepUnit()); - // create stepnet's gradient op - rnn_grad_op->rnn.SetStepUnit( - BackwardRecursive(stepnet_op, no_grad_names, grad_to_var, uniq_id)); - } - if (net->ops_.empty()) { // Current no aux op is added to network return grad_op; } @@ -522,7 +506,7 @@ ParamGradInfoMap AppendBackward( new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}}, {{"shape", std::vector{1}}, {"value", static_cast(1.0)}, - {"data_type", target.GetDataType()}})); + {"dtype", target.GetDataType()}})); // infer var type of fill_one_op fill_one_op->InferVarType(root_block); diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index adedd8cb0e8504fd6fc924e62a2ede3c1c7ce698..2ffb5b7dbb27b561092856eac0de23d0c3788f75 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -120,7 +120,7 @@ void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id, for (auto& op_desc : block.AllOps()) { auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); - VLOG(10) << op->DebugString(); + VLOG(3) << op->DebugString(); op->Run(*local_scope, *device); } if (create_local_scope) { diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 7f8a51cc581e759bc707e506ac7cdeb3680f40ac..21bdfca1111f16d5b8ea71be004ddb8da12fd03c 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -24,6 +24,7 @@ #include #include "paddle/framework/ddim.h" #include "paddle/framework/tensor.h" +#include "paddle/framework/tensor_util.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" @@ -175,9 +176,9 @@ LoDTensor LodExpand(const LoDTensor& source, const LoD& lod, size_t level, PADDLE_ENFORCE_EQ(num_instances, lod_level.size() - 1); for (size_t ins = 0; ins < num_instances; ins++) { for (size_t elem = lod_level[ins]; elem < lod_level[ins + 1]; elem++) { - tensor.Slice(elem, elem + 1) - .CopyFrom(source.Slice(ins, ins + 1), platform::CPUPlace(), - platform::CPUDeviceContext()); + auto slice = tensor.Slice(elem, elem + 1); + CopyFrom(source.Slice(ins, ins + 1), platform::CPUPlace(), + platform::CPUDeviceContext(), &slice); } } return tensor; diff --git a/paddle/framework/prune.cc b/paddle/framework/prune.cc index bf3066983cdcf44ae84f236ac72486e5d4fd5b92..da76052eb4d3067214841af72a35cebb26477e7f 100644 --- a/paddle/framework/prune.cc +++ b/paddle/framework/prune.cc @@ -26,6 +26,8 @@ namespace framework { const std::string kFeedOpType = "feed"; const std::string kFetchOpType = "fetch"; +const std::string kDropOutOpType = "dropout"; +const std::string kBatchNormOpType = "batch_norm"; bool HasDependentVar(const OpDesc& op_desc, const std::set& dependent_vars) { @@ -106,5 +108,26 @@ void Prune(const ProgramDesc& input, ProgramDesc* output) { prune_impl(input, output, 0); } +void inference_optimize_impl(const ProgramDesc& input, ProgramDesc* output, + int block_id) { + *output = input; + auto* op_field = output->mutable_blocks(block_id)->mutable_ops(); + for (auto& op_desc : *op_field) { + if (op_desc.type() == kDropOutOpType || + op_desc.type() == kBatchNormOpType) { + for (auto& attr : *op_desc.mutable_attrs()) { + if (attr.name() == "is_test") { + attr.set_b(true); + break; + } + } + } + } +} + +void InferenceOptimize(const ProgramDesc& input, ProgramDesc* output) { + inference_optimize_impl(input, output, 0); +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/prune.h b/paddle/framework/prune.h index 8cfb16343aa44dcc8a3349b01adecce33f1c2b5b..23db014894348094a98e043aa744c6f0d27b2640 100644 --- a/paddle/framework/prune.h +++ b/paddle/framework/prune.h @@ -22,5 +22,7 @@ namespace framework { void Prune(const ProgramDesc& input, ProgramDesc* output); +void InferenceOptimize(const ProgramDesc& input, ProgramDesc* output); + } // namespace framework } // namespace paddle diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 28d0fcf94ec31c82476e093f93ccee222a0c9d9a..6a0c5133c9a6bb326ca51755242e75b6eb9e5474 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -89,34 +89,6 @@ class Tensor { /*! The internal of two tensors share the same memory block. */ inline Tensor& ShareDataWith(const Tensor& src); - /** - * @brief Copy the content of external tensor to a new place. - * - * @param[in] src The external tensor. - * @param[in] dst_place The dst place. - * @param[in] ctx The device context contains device resources. - * - * @note CopyFrom supports CPU <-> GPU, GPU <-> GPU. - */ - // TODO(qijun): https://github.com/PaddlePaddle/Paddle/issues/4647 - // Remove `CopyFrom` and `CopyFromVector` from Tensor interface - // and make them global functions - inline void CopyFrom(const Tensor& src, const platform::Place& dst_place, - const platform::DeviceContext& ctx); - - /** - * @brief Copy the content of an external vector to a tensor. - * - * @param[in] src The external tensor. - * @param[in] ctx The device context contains device resources. - * - * * @note CopyFromVector assumes that the tensor has been resized - * before invoking. - */ - template - inline void CopyFromVector(const std::vector& src, - const platform::DeviceContext& ctx); - /** * @brief Return a sub-tensor of the given tensor. * @@ -141,7 +113,6 @@ class Tensor { size_t memory_size() const; - private: inline void check_memory_size() const; private: diff --git a/paddle/framework/tensor_array.cc b/paddle/framework/tensor_array.cc deleted file mode 100644 index 0947e33548130a923e998f8bad68db00097af909..0000000000000000000000000000000000000000 --- a/paddle/framework/tensor_array.cc +++ /dev/null @@ -1,444 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - - - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/framework/tensor_array.h" - -#include -#include -#include - -#include "paddle/framework/eigen.h" - -namespace paddle { -namespace framework { - -namespace detail { - -/* - * Offer an iterator over the length-sorted lod-tensor's top level. The top - * level of a lod-tensor stores batch-size of sequences, each top-level sequence - * may contains several lower-level sequences, sort top-level lod by the numbers - * of lower-level sequences in descending order, so that during RNN's running, - * the batch-size will keep decreasing, the short sentences will end at the tail - * of each batch. - * - * Let's take a simple lod-tensor for example - * - * |(0) |(1) top-level has two instances - * ||| ||||| lower-level - * - * sort by lower-level's length - * - * |(1) |(0) - * ||||| ||| - * - * when RNN runs, it get 5 batches (equals the number of elements the longest - * sequence has) - * - * ||||| - * ||| - * - * the first three batches has two elements, the last two elements just has 1 - * element each. - */ -struct DynamicBatchUnpacker { - using value_type = float; - - DynamicBatchUnpacker(const LoDTensor& source, size_t level, - bool descend = true) - : source(&source), level(level) { - BuildLengthSortedMeta(descend); - } - - LoDTensor GetBatch(size_t index); - - std::vector meta; - - LoDTensor const* source; - size_t level; - - protected: - void BuildLengthSortedMeta(bool descend); -}; - -LoDTensor PackDynamicBatch(const std::vector& source, - const std::vector& meta, const LoD& lod, - size_t level); - -std::vector GenDyBatchIndice(const DySeqMetaBatch& meta, int batch_id) { - // collect indice need to copy to the batch - std::vector indice; - for (const auto& seq : meta) { - size_t id = seq.begin + batch_id; - if (id >= seq.end) break; - indice.push_back(id); - } - return indice; -} - -} // namespace detail - -const LoDTensor& TensorArray::Read(size_t index) const { - PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index); - if (index >= size()) { - values_.resize(index + 1); - } - return values_[index]; -} - -void TensorArray::Write(size_t index, const LoDTensor& value) { - PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index); - - if (index >= size()) { - values_.resize(index + 1); - } - - values_[index].set_lod(value.lod()); - values_[index].Resize(value.dims()); - values_[index].mutable_data(value.place()); - values_[index].CopyFrom(value, value.place(), platform::CPUDeviceContext()); -} - -void TensorArray::WriteShared(size_t index, const LoDTensor& value) { - PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index); - if (index >= size()) { - values_.resize(index + 1); - } - - values_[index].set_lod(value.lod()); - values_[index].ShareDataWith(value); -} - -LoDTensor TensorArray::Pack(size_t level, const std::vector& meta, - const LoD& lod) const { - return detail::PackDynamicBatch(values_, meta, lod, level); -} - -DySeqMetaBatch TensorArray::Unpack(const LoDTensor& source, int level, - bool length_desend) { - detail::DynamicBatchUnpacker unpacker(source, level, - length_desend /*descend*/); - - // find max length of all the sequences - size_t max_length = 0; - for (const auto& seq : unpacker.meta) { - max_length = std::max(max_length, seq.end - seq.begin); - } - - // write batches to values - for (size_t batch_id = 0; batch_id < max_length; batch_id++) { - Write(batch_id, unpacker.GetBatch(batch_id)); - } - - PADDLE_ENFORCE(!unpacker.meta.empty()); - return unpacker.meta; -} - -LoDTensor TensorArray::LodPack(size_t level) const { - PADDLE_ENFORCE_GT(size(), 0UL, "no time step exists"); - // the levels should be no less than 2 - LoDTensor merged; - const LoDTensor *pre, *cur; - pre = &Read(0); - - for (size_t step = 1; step < size(); step++) { - cur = &Read(step); - PADDLE_ENFORCE_GT(cur->NumLevels(), 0); - PADDLE_ENFORCE_GT(pre->NumLevels(), 0); - PADDLE_ENFORCE_EQ(pre->NumLevels(), cur->NumLevels()); - PADDLE_ENFORCE_EQ(pre->NumElements(level), cur->NumElements(level)); - - merged = LodPackTwo(*pre, *cur, level); - pre = &merged; - } - return merged; -} - -/* - * NOTE currently, only the lowest level supports packing. - * The lowest LoD will be changed, while the relative offsets in levels above - * stay unchanged. - * - * previous step : [0] [1] [3] - * current step: [0 1 2] [2 3] [] - * packed to - * [0 0] [0 1] [0 2] [1 2] [1 3] [3] - */ -LoDTensor TensorArray::LodPackTwo(const LoDTensor& pre, const LoDTensor& cur, - size_t level) const { - PADDLE_ENFORCE_EQ(pre.NumLevels(), cur.NumLevels()); - PADDLE_ENFORCE_EQ(pre.NumLevels(), level + 1, - "Only the lowest LoD level supports pack temporarily."); - // calculate the result tensor's shape first - size_t num_instances = 0; - for (size_t elem = 0; elem < pre.NumElements(level); elem++) { - size_t prefix_size = pre.NumElements(level, elem); - size_t num_candidates = cur.NumElements(level, elem); - if (num_candidates > 0) { - num_instances += num_candidates * (prefix_size + 1); - } else { - num_instances += prefix_size; - } - } - - auto res_dims = pre.dims(); - res_dims[0] = num_instances; - LoDTensor result; - result.Resize(res_dims); - result.mutable_data(cur.place()); - - Vector last_lod_level; - // copy data - size_t index = 0; - last_lod_level.push_back(index); - for (size_t elem = 0; elem < pre.NumElements(level); elem++) { - size_t prefix_size = pre.NumElements(level, elem); - size_t num_candidates = cur.NumElements(level, elem); - - // slice the prefix Tensor - LoDTensor prefix = pre; - prefix.ShrinkInLevel(level, elem, elem + 1); - LoDTensor candidate = cur; - if (num_candidates > 0) { - candidate.ShrinkInLevel(level, elem, elem + 1); - } else { // just push prefix - result.Slice(index, index + prefix_size) - .CopyFrom(prefix, result.place(), platform::CPUDeviceContext()); - index += prefix_size; - last_lod_level.push_back(index); - } - for (size_t candi = 0; candi < num_candidates; candi++) { - // TODO(superjom) support GPU - result.Slice(index, index + prefix_size) - .CopyFrom(prefix, result.place(), platform::CPUDeviceContext()); - index += prefix_size; - // copy candidate record - result.Slice(index, index + 1) - .CopyFrom(candidate.Slice(candi, candi + 1), result.place(), - platform::CPUDeviceContext()); - index++; - last_lod_level.push_back(index); - } - } - - // update lod - auto lod = cur.lod(); - lod.back() = last_lod_level; - result.set_lod(lod); - return result; -} - -/* - * source [0 1 2] [3 4] [5 6 7] will be transformd to a list of LoDTensors such - * as - * [0 3 5] [1 4 6] [2 7] with 1-level LoDs: - * - [0 1 2 3] - * - [0 1 2 3] - * - [0 1 1 2], the [1,1) here means the second sequence is empty - * - * NOTE Unpack a LoDTensor in this approach may result in a big LoD. - */ -void TensorArray::LodUnpack(const LoDTensor& source, size_t level) { - PADDLE_ENFORCE_EQ(level, source.NumLevels() - 1, - "only the lowest LoD level supports unpack."); - const size_t non_empty_instances = source.dims()[0]; - size_t index = 0; - Vector lowest_lod_level; - lowest_lod_level.push_back(index); - - for (size_t step = 0; step < non_empty_instances; step++) { - size_t num_instances = 0; - for (size_t id = 0; id < source.NumElements(level); id++) { - auto instance = source; - instance.ShrinkInLevel(level, id, id + 1); - if (static_cast(instance.dims()[0]) > step) { - num_instances++; - index++; - } - lowest_lod_level.push_back(index); - } - - // create tensor for this time step - LoDTensor tensor; - auto dims = source.dims(); - dims[0] = num_instances; - // set lod - auto lod = source.lod(); - lod.back() = lowest_lod_level; - tensor.set_lod(lod); - - index = 0; - for (size_t id = 0; id < source.NumElements(level); id++) { - auto instance = source; - instance.ShrinkInLevel(level, id, id + 1); - if (static_cast(instance.dims()[0]) > step) { - // copy this instance - tensor.Slice(index, index + 1) - .CopyFrom(instance.Slice(step, step + 1), tensor.place(), - platform::CPUDeviceContext()); - index++; - } - } - Write(step, tensor); - } -} - -LoDTensor TensorArray::Stack() const { - LoDTensor result; - if (size() == 0) return result; - - const auto& first_dims = values_.front().dims(); - // check all the values have the same shape - // TODO(superjom) check the same dtypes - for (size_t idx = 1; idx < size(); idx++) { - const auto& value_dims = values_[idx].dims(); - PADDLE_ENFORCE_EQ(first_dims, value_dims); - } - - // copy - auto result_dims = vectorize(first_dims); - result_dims.insert(result_dims.begin(), size()); - result.Resize(make_ddim(result_dims)); - result.mutable_data(platform::CPUPlace()); - - for (size_t idx = 0; idx < size(); idx++) { - result.Slice(idx, idx + 1) - .CopyFrom(Read(idx), platform::CPUPlace(), - platform::CPUDeviceContext()); - } - return result; -} - -void TensorArray::Unstack(const LoDTensor& source) const { - Unstack(source, false /*data_shared*/); -} - -void TensorArray::UnstackShared(const LoDTensor& source) const { - Unstack(source, true /*data_shared*/); -} - -void TensorArray::Unstack(const LoDTensor& source, bool data_shared) const { - size_t first_dim = source.dims()[0]; - DDim value_dims = slice_ddim(source.dims(), 1, source.dims().size()); - PADDLE_ENFORCE_GT(first_dim, 0, - "source should have some data to be unstacked"); - - values_.resize(first_dim); - - for (size_t elem = 0; elem < first_dim; elem++) { - // create a new value - auto& value = values_[elem]; - if (data_shared) { - // share memory - value.ShareDataWith(source.Slice(elem, elem + 1)); - } else { - // copy - value.Resize(value_dims); - value.CopyFrom(source.Slice(elem, elem + 1), platform::CPUPlace(), - platform::CPUDeviceContext()); - } - } -} - -size_t TensorArray::size() const { return values_.size(); } - -namespace detail { - -void DynamicBatchUnpacker::BuildLengthSortedMeta(bool descend) { - PADDLE_ENFORCE(meta.empty(), "duplicate build meta"); - // collect meta for each sequence in some level - auto lod = SliceLevels(source->lod(), level, level + 1)[0]; - - for (size_t seq_id = 0; seq_id < lod.size() - 1; seq_id++) { - DySeqMeta seq_meta({lod[seq_id], lod[seq_id + 1], seq_id}); - meta.push_back(seq_meta); - } - - PADDLE_ENFORCE_GT(meta.size(), 0, "meta is empty"); - - // sort by length - sort(meta.begin(), meta.end(), - [descend](const DySeqMeta& a, const DySeqMeta& b) { - bool a_ge_b = (a.end - a.begin) > (b.end - b.begin); - return descend ? a_ge_b : !a_ge_b; - }); -} - -LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { - PADDLE_ENFORCE(!meta.empty(), "should build meta first"); - LoDTensor result; - - auto indice = detail::GenDyBatchIndice(meta, index); - PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index); - - // copy the indice of records in LoDTensor - auto record_dims = slice_ddim(source->dims(), 1, source->dims().size()); - auto record_dims_vec = vectorize(record_dims); - record_dims_vec.insert(record_dims_vec.begin(), indice.size()); - result.Resize(make_ddim(record_dims_vec)); - result.mutable_data(platform::CPUPlace()); - - for (size_t i = 0; i < indice.size(); i++) { - auto index = indice[i]; - auto target = result.Slice(i, i + 1); - auto slice = source->Slice(index, index + 1); - - target.CopyFrom(slice, platform::CPUPlace(), platform::CPUDeviceContext()); - } - - return result; -} - -// TODO(supejom) to cache lod if reasonable -LoDTensor PackDynamicBatch(const std::vector& source, - const std::vector& meta, const LoD& lod, - size_t level) { - PADDLE_ENFORCE(!source.empty()); - PADDLE_ENFORCE(!meta.empty()); - PADDLE_ENFORCE(!lod.empty()); - - LoDTensor result; - - // init result space - auto record_dims = slice_ddim(source[0].dims(), 1, source[0].dims().size()); - auto record_dims_vec = vectorize(record_dims); - auto height = lod[level].back(); - record_dims_vec.insert(record_dims_vec.begin(), height); - result.Resize(make_ddim(record_dims_vec)); - result.mutable_data(platform::CPUPlace()); - - for (size_t batch_id = 0; batch_id < source.size(); batch_id++) { - for (size_t seq_id = 0; seq_id < meta.size(); seq_id++) { - const auto& seq_meta = meta[seq_id]; - // source is source[batch_id][seq_id] - // target is result[index] - auto index = seq_meta.begin + batch_id; - if (index >= seq_meta.end) break; - auto source_ = source[batch_id].Slice(seq_id, seq_id + 1); - auto target = result.Slice(index, index + 1); - target.CopyFrom(source_, platform::CPUPlace(), - platform::CPUDeviceContext()); - } - } - - result.set_lod(lod); - return result; -} - -} // namespace detail - -} // namespace framework -} // namespace paddle diff --git a/paddle/framework/tensor_array.h b/paddle/framework/tensor_array.h deleted file mode 100644 index 78fad8cab7e27a7f07ca542c2a083460ee9e2b79..0000000000000000000000000000000000000000 --- a/paddle/framework/tensor_array.h +++ /dev/null @@ -1,132 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once -#include - -#include "paddle/framework/lod_tensor.h" - -namespace paddle { -namespace framework { - -/* - * DyBatchSeqPosition stores indices of the basic element in tensor. It is used - * after lod-tensor's re-assembling, its info can be used to recover the order - * in original lod-tensor. - */ -struct DySeqMeta { - DySeqMeta(size_t begin, size_t end, size_t ori_idx) - : begin(begin), end(end), ori_idx(ori_idx) {} - - size_t begin; - size_t end; // not included - size_t ori_idx; -}; - -using DySeqMetaBatch = std::vector; - -/* - * Extract the indices of instances. - */ -std::vector GenDyBatchIndice(const DySeqMetaBatch &metas, int batch_id); - -/* - * TensorArray is a C-array-like array of tensors, it is meant to be used with - * dynamic iteration primitives such as while_loop. It is used to segment inputs - * and store states in all time steps. - * - * By providing some methods similar to a C++ array, the difinition of some - * state-based dynamic models such as RNN cound be more natural and highly - * flexible. - */ -class TensorArray { - public: - using value_type = float; - - // max number of values allowed to store. - const size_t MAX_SIZE{100000}; - - /* - * Read the value at location `index` in the `TensorArray`. - */ - const LoDTensor &Read(size_t index) const; - - /* - * Write value into the index of the TensorArray. - */ - void Write(size_t index, const LoDTensor &value); - - /* - * Write value into the index of the TensorArray, with memory shared. - */ - void WriteShared(size_t index, const LoDTensor &value); - - /* - * Recover the original LoD-arranged LoDTensor with the `values`, `level` and - * `indice_map`. - */ - LoDTensor Pack(size_t level, const DySeqMetaBatch &meta, - const LoD &lod) const; - - /* - * Split LoDTensor in some `level` and write the generated batches to - * `values`, if set `desend`, will sort by length in descending order else in - * ascending order. - */ - DySeqMetaBatch Unpack(const LoDTensor &source, int level, bool length_desend); - - /* - * Pack an array of LoDTensors to a LoDTensor. - */ - LoDTensor LodPack(size_t level) const; - - /* - * Unpack a LoDTensor to an array of LoDTensors. - */ - void LodUnpack(const LoDTensor &source, size_t level); - - /* - * Pack the values into a tensor with rank one higher than each tensor in - * values. - */ - LoDTensor Stack() const; - - /* - * Unstacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors. - */ - void Unstack(const LoDTensor &source) const; - - /* - * Unstacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors, - * with memory of tensors shared. - */ - void UnstackShared(const LoDTensor &source) const; - - /* - * Return the number of values. - */ - size_t size() const; - - protected: - void Unstack(const LoDTensor &source, bool data_shared) const; - - LoDTensor LodPackTwo(const LoDTensor &pre, const LoDTensor &cur, - size_t level) const; - - private: - mutable std::vector values_; -}; // class TensorArray - -} // namespace framework -} // namespace paddle diff --git a/paddle/framework/tensor_array_test.cc b/paddle/framework/tensor_array_test.cc deleted file mode 100644 index 83b52b442daf9b2f1fc40f23e458fcb67c5040e8..0000000000000000000000000000000000000000 --- a/paddle/framework/tensor_array_test.cc +++ /dev/null @@ -1,182 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/framework/tensor_array.h" - -#include - -namespace paddle { -namespace framework { - -class TensorArrayTester : public ::testing::Test { - protected: - void SetUp() override { - LoDTensor source; - source.Resize(make_ddim({batch_size, dim})); - int* data = source.mutable_data(platform::CPUPlace()); - for (int i = 0; i < 16 * 32; i++) { - data[i] = i; - } - ta.Unstack(source); - } - - TensorArray ta; - const int batch_size = 16; - const int dim = 32; -}; - -TEST_F(TensorArrayTester, Read) { - for (int i = 0; i < batch_size; i++) { - const auto& tensor = ta.Read(i); - ASSERT_EQ(tensor.dims()[0], 1); - ASSERT_EQ(tensor.dims()[1], dim); - } -} - -TEST_F(TensorArrayTester, Write) { - LoDTensor source; - source.Resize(make_ddim({1, dim})); - for (int i = 0; i < dim; i++) { - *(source.mutable_data(platform::CPUPlace()) + i) = i; - } - - ta.Write(2, source); - - const auto& tensor = ta.Read(2); - for (int i = 0; i < dim; i++) { - EXPECT_EQ(*(tensor.data() + i), *(source.data() + i)); - } -} - -TEST_F(TensorArrayTester, WriteShared) { - LoDTensor source; - source.Resize(make_ddim({1, dim})); - for (int i = 0; i < dim; i++) { - *(source.mutable_data(platform::CPUPlace()) + i) = i; - } - - ta.WriteShared(2, source); - - const auto& tensor = ta.Read(2); - for (int i = 0; i < dim; i++) { - EXPECT_EQ(*(tensor.data() + i), *(source.data() + i)); - } - - EXPECT_EQ(source.data(), tensor.data()); -} - -class TensorArrayPackTester : public ::testing::Test { - protected: - virtual void SetUp() override { - lod.push_back(std::vector{0, 2, 9, 13}); - - source.set_lod(lod); - source.Resize(make_ddim({13, 128})); - source.mutable_data(platform::CPUPlace()); - - // content of each setence: 0 1 2 3 4 - const auto& level = lod.front(); - for (size_t i = 0; i < level.size() - 1; i++) { - size_t begin = level[i]; - size_t end = level[i + 1]; - for (size_t j = begin; j < end; j++) { - auto record = source.Slice(j, j + 1); - for (int dim = 0; dim < 128; dim++) { - record.mutable_data(platform::CPUPlace())[dim] = j - begin; - } - } - } - - // unpack - meta = ta.Unpack(source, 0, true); - } - - LoD lod; - TensorArray ta; - LoDTensor source; - std::vector meta; -}; - -TEST_F(TensorArrayPackTester, Unpack) { - ASSERT_EQ(ta.size(), 7UL); - - const auto& t0 = ta.Read(0); - const auto& t1 = ta.Read(1); - - ASSERT_EQ(t0.data()[0], int(0)); - ASSERT_EQ(t1.data()[0], int(1)); -} - -TEST_F(TensorArrayPackTester, Pack) { - LoDTensor packed = ta.Pack(0, meta, lod); -} - -TEST_F(TensorArrayTester, size) { - ASSERT_EQ(ta.size(), static_cast(batch_size)); -} - -TEST(TensorArray, LodPack) { - // three time steps, each step stores a LoDTensors - // - [0] [1] - // - [2 3], [4 5] - // - [6 7] [] [8], [9, 10] - // try to get a LoDTensor with content: - // - [0 2 6] - // - [0 2 7] - // - [0 3] - // - [1 4 8] - // - [1 5 9] - // - [1 5 10] - std::array tensors; - tensors[0].Resize(make_ddim({2, 1})); - tensors[1].Resize(make_ddim({4, 1})); - tensors[2].Resize(make_ddim({5, 1})); - int index = 0; - for (auto& t : tensors) { - t.mutable_data(platform::CPUPlace()); - for (int i = 0; i < t.dims()[0]; i++) { - t.data()[i] = index; - index++; - } - } - - std::array lods; - std::vector> levels{ - {0, 1, 2}, {0, 2, 4}, {0, 2, 2, 3, 5}}; - for (int i = 0; i < 3; i++) { - lods[i].emplace_back(levels[i].begin(), levels[i].end()); - } - - TensorArray ta; - for (int i = 0; i < 3; i++) { - tensors[i].set_lod(lods[i]); - ta.Write(i, tensors[i]); - } - - auto merged = ta.LodPack(0); - - std::vector target_tensor_data{{0, 2, 6, // 0 - 0, 2, 7, // 1 - 0, 3, // 2 - 1, 4, 8, // 3 - 1, 5, 9, // 5 - 1, 5, 10}}; - EXPECT_EQ(merged.dims()[0], (int)target_tensor_data.size()); - for (size_t i = 0; i < target_tensor_data.size(); i++) { - EXPECT_EQ(target_tensor_data[i], merged.data()[i]); - } -} - -} // namespace framework -} // namespace paddle diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 7e88e039611007d17156d10f852eb46f3ee8e7a3..aba1f9f09329f890ef190f8820b958c56f017e89 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -150,84 +150,6 @@ inline Tensor& Tensor::ShareDataWith(const Tensor& src) { return *this; } -inline void Tensor::CopyFrom(const Tensor& src, - const platform::Place& dst_place, - const platform::DeviceContext& ctx) { - src.check_memory_size(); - Resize(src.dims()); - - auto src_place = src.holder_->place(); - auto src_ptr = src.data(); - - auto dst_ptr = mutable_data(dst_place, src.type()); - - auto size = src.numel() * SizeOfType(src.type()); - - if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { - memory::Copy(boost::get(dst_place), dst_ptr, - boost::get(src_place), src_ptr, size); - } -#ifdef PADDLE_WITH_CUDA - else if (platform::is_gpu_place(src_place) && - platform::is_cpu_place(dst_place)) { - auto src_gpu_place = boost::get(src_place); - auto dst_cpu_place = boost::get(dst_place); - auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); - auto ctx_gpu_place = boost::get(ctx_place); - PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); - memory::Copy( - dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, - reinterpret_cast(ctx).stream()); - } else if (platform::is_cpu_place(src_place) && - platform::is_gpu_place(dst_place)) { - auto src_cpu_place = boost::get(src_place); - auto dst_gpu_place = boost::get(dst_place); - auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); - auto ctx_gpu_place = boost::get(ctx_place); - PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place); - memory::Copy( - dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, - reinterpret_cast(ctx).stream()); - } else if (platform::is_gpu_place(src_place) && - platform::is_gpu_place(dst_place)) { - auto src_gpu_place = boost::get(src_place); - auto dst_gpu_place = boost::get(dst_place); - auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); - auto ctx_gpu_place = boost::get(ctx_place); - PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); - memory::Copy( - dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, - reinterpret_cast(ctx).stream()); - } -#endif -} - -template -inline void Tensor::CopyFromVector(const std::vector& src, - const platform::DeviceContext& ctx) { - auto dst_place = ctx.GetPlace(); - auto src_ptr = static_cast(src.data()); - platform::CPUPlace src_place; - auto dst_ptr = static_cast(mutable_data(dst_place)); - auto size = src.size() * sizeof(T); - - if (platform::is_cpu_place(dst_place)) { - memory::Copy(boost::get(dst_place), dst_ptr, src_place, - src_ptr, size); - } -#ifdef PADDLE_WITH_CUDA - else if (platform::is_gpu_place(dst_place)) { - memory::Copy( - boost::get(dst_place), dst_ptr, src_place, src_ptr, - size, - reinterpret_cast(ctx).stream()); - } -#endif -} - inline Tensor Tensor::Slice(int begin_idx, int end_idx) const { check_memory_size(); PADDLE_ENFORCE_GE(begin_idx, 0, diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 1bb0fb71b079940d35a995b78e04a531c074a8b2..ceca64365a1a628642eb374a3e3bbdff490c955a 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -188,178 +188,6 @@ TEST(Tensor, Slice) { #endif } -TEST(Tensor, CopyFrom) { - using namespace paddle::framework; - using namespace paddle::platform; - { - Tensor src_tensor; - Tensor dst_tensor; - CPUDeviceContext cpu_ctx((CPUPlace())); - - int* src_ptr = src_tensor.mutable_data(make_ddim({3, 3}), CPUPlace()); - - int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - memcpy(src_ptr, arr, 9 * sizeof(int)); - - auto cpu_place = new paddle::platform::CPUPlace(); - dst_tensor.CopyFrom(src_tensor, *cpu_place, cpu_ctx); - - const int* dst_ptr = dst_tensor.data(); - ASSERT_NE(src_ptr, dst_ptr); - for (size_t i = 0; i < 9; ++i) { - EXPECT_EQ(src_ptr[i], dst_ptr[i]); - } - - Tensor slice_tensor = src_tensor.Slice(1, 2); - dst_tensor.CopyFrom(slice_tensor, *cpu_place, cpu_ctx); - const int* slice_ptr = slice_tensor.data(); - dst_ptr = dst_tensor.data(); - ASSERT_NE(dst_ptr, slice_ptr); - for (size_t i = 0; i < 3; ++i) { - EXPECT_EQ(dst_ptr[i], slice_ptr[i]); - } - } -#ifdef PADDLE_WITH_CUDA - { - Tensor src_tensor; - Tensor gpu_tensor; - Tensor dst_tensor; - - int* src_ptr = src_tensor.mutable_data(make_ddim({3, 3}), CPUPlace()); - - int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - memcpy(src_ptr, arr, 9 * sizeof(int)); - - // CPU Tensor to GPU Tensor - auto gpu_place = new paddle::platform::GPUPlace(0); - CUDADeviceContext gpu_ctx(*gpu_place); - gpu_tensor.CopyFrom(src_tensor, *gpu_place, gpu_ctx); - - // GPU Tensor to CPU Tensor - auto cpu_place = new paddle::platform::CPUPlace(); - dst_tensor.CopyFrom(gpu_tensor, *cpu_place, gpu_ctx); - - // Sync before Compare Tensors - gpu_ctx.Wait(); - const int* dst_ptr = dst_tensor.data(); - ASSERT_NE(src_ptr, dst_ptr); - for (size_t i = 0; i < 9; ++i) { - EXPECT_EQ(src_ptr[i], dst_ptr[i]); - } - - Tensor slice_tensor = src_tensor.Slice(1, 2); - - // CPU Slice Tensor to GPU Tensor - gpu_tensor.CopyFrom(slice_tensor, *gpu_place, gpu_ctx); - - // GPU Tensor to CPU Tensor - dst_tensor.CopyFrom(gpu_tensor, *cpu_place, gpu_ctx); - - // Sync before Compare Slice Tensors - gpu_ctx.Wait(); - const int* slice_ptr = slice_tensor.data(); - dst_ptr = dst_tensor.data(); - ASSERT_NE(dst_ptr, slice_ptr); - for (size_t i = 0; i < 3; ++i) { - EXPECT_EQ(dst_ptr[i], slice_ptr[i]); - } - } -#endif -} - -TEST(Tensor, CopyFromVector) { - using namespace paddle::framework; - using namespace paddle::platform; - { - std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - Tensor cpu_tensor; - - // Copy to CPU Tensor - cpu_tensor.Resize(make_ddim({3, 3})); - auto cpu_place = new paddle::platform::CPUPlace(); - CPUDeviceContext cpu_ctx(*cpu_place); - cpu_tensor.CopyFromVector(src_vec, cpu_ctx); - - // Compare Tensors - const int* cpu_ptr = cpu_tensor.data(); - const int* src_ptr = src_vec.data(); - ASSERT_NE(src_ptr, cpu_ptr); - for (size_t i = 0; i < 9; ++i) { - EXPECT_EQ(src_ptr[i], cpu_ptr[i]); - } - - src_vec.erase(src_vec.begin(), src_vec.begin() + 5); - cpu_tensor.Resize(make_ddim({2, 2})); - cpu_tensor.CopyFromVector(src_vec, cpu_ctx); - cpu_ptr = cpu_tensor.data(); - src_ptr = src_vec.data(); - ASSERT_NE(src_ptr, cpu_ptr); - for (size_t i = 0; i < 5; ++i) { - EXPECT_EQ(src_ptr[i], cpu_ptr[i]); - } - - delete cpu_place; - } - -#ifdef PADDLE_WITH_CUDA - { - std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - Tensor cpu_tensor; - Tensor gpu_tensor; - Tensor dst_tensor; - - // Copy to CPU Tensor - cpu_tensor.Resize(make_ddim({3, 3})); - auto cpu_place = new paddle::platform::CPUPlace(); - CPUDeviceContext cpu_ctx(*cpu_place); - cpu_tensor.CopyFromVector(src_vec, cpu_ctx); - - // Copy to GPUTensor - gpu_tensor.Resize(make_ddim({3, 3})); - auto gpu_place = new paddle::platform::GPUPlace(); - CUDADeviceContext gpu_ctx(*gpu_place); - gpu_tensor.CopyFromVector(src_vec, gpu_ctx); - // Copy from GPU to CPU tensor for comparison - dst_tensor.CopyFrom(gpu_tensor, *cpu_place, gpu_ctx); - - // Sync before Compare Tensors - gpu_ctx.Wait(); - const int* src_ptr = src_vec.data(); - const int* cpu_ptr = cpu_tensor.data(); - const int* dst_ptr = dst_tensor.data(); - ASSERT_NE(src_ptr, cpu_ptr); - ASSERT_NE(src_ptr, dst_ptr); - for (size_t i = 0; i < 9; ++i) { - EXPECT_EQ(src_ptr[i], cpu_ptr[i]); - EXPECT_EQ(src_ptr[i], dst_ptr[i]); - } - - src_vec.erase(src_vec.begin(), src_vec.begin() + 5); - - cpu_tensor.Resize(make_ddim({2, 2})); - cpu_tensor.CopyFromVector(src_vec, cpu_ctx); - gpu_tensor.Resize(make_ddim({2, 2})); - gpu_tensor.CopyFromVector(src_vec, gpu_ctx); - dst_tensor.CopyFrom(gpu_tensor, *cpu_place, gpu_ctx); - - // Sync before Compare Tensors - gpu_ctx.Wait(); - src_ptr = src_vec.data(); - cpu_ptr = cpu_tensor.data(); - dst_ptr = dst_tensor.data(); - ASSERT_NE(src_ptr, cpu_ptr); - ASSERT_NE(src_ptr, dst_ptr); - for (size_t i = 0; i < 5; ++i) { - EXPECT_EQ(src_ptr[i], cpu_ptr[i]); - EXPECT_EQ(src_ptr[i], dst_ptr[i]); - } - - delete cpu_place; - delete gpu_place; - } -#endif -} - TEST(Tensor, ReshapeToMatrix) { using namespace paddle::framework; using namespace paddle::platform; diff --git a/paddle/framework/tensor_util.h b/paddle/framework/tensor_util.h new file mode 100644 index 0000000000000000000000000000000000000000..4e34b90d57eed8fea84b83045df61a98483c8849 --- /dev/null +++ b/paddle/framework/tensor_util.h @@ -0,0 +1,152 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include "paddle/framework/tensor.h" + +namespace paddle { +namespace framework { + +/** + * @brief Copy the content of external tensor to a new place. + * + * @param[in] src The external tensor. + * @param[in] dst_place The dst place. + * @param[in] ctx The device context contains device resources. + * + * @note CopyFrom supports CPU <-> GPU, GPU <-> GPU. + */ + +inline void CopyFrom(const Tensor& src, const platform::Place& dst_place, + const platform::DeviceContext& ctx, Tensor* dst) { + src.check_memory_size(); + + dst->Resize(src.dims()); + auto src_place = src.place(); + auto src_ptr = src.data(); + + auto dst_ptr = dst->mutable_data(dst_place, src.type()); + + auto size = src.numel() * SizeOfType(src.type()); + + if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { + memory::Copy(boost::get(dst_place), dst_ptr, + boost::get(src_place), src_ptr, size); + } +#ifdef PADDLE_WITH_CUDA + else if (platform::is_gpu_place(src_place) && // NOLINT + platform::is_cpu_place(dst_place)) { + auto src_gpu_place = boost::get(src_place); + auto dst_cpu_place = boost::get(dst_place); + auto ctx_place = ctx.GetPlace(); + PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); + auto ctx_gpu_place = boost::get(ctx_place); + PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); + memory::Copy( + dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, + reinterpret_cast(ctx).stream()); + } else if (platform::is_cpu_place(src_place) && + platform::is_gpu_place(dst_place)) { + auto src_cpu_place = boost::get(src_place); + auto dst_gpu_place = boost::get(dst_place); + auto ctx_place = ctx.GetPlace(); + PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); + auto ctx_gpu_place = boost::get(ctx_place); + PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place); + memory::Copy( + dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, + reinterpret_cast(ctx).stream()); + } else if (platform::is_gpu_place(src_place) && + platform::is_gpu_place(dst_place)) { + auto src_gpu_place = boost::get(src_place); + auto dst_gpu_place = boost::get(dst_place); + auto ctx_place = ctx.GetPlace(); + PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); + auto ctx_gpu_place = boost::get(ctx_place); + PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place); + memory::Copy( + dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, + reinterpret_cast(ctx).stream()); + } +#endif +} + +/** + * @brief Copy the content of an external vector to a tensor. + * + * @param[in] src The external tensor. + * @param[in] ctx The device context contains device resources. + * + * * @note CopyFromVector assumes that the tensor has been resized + * before invoking. + */ +template +inline void CopyFromVector(const std::vector& src, + const platform::DeviceContext& ctx, Tensor* dst) { + auto dst_place = ctx.GetPlace(); + auto src_ptr = static_cast(src.data()); + platform::CPUPlace src_place; + dst->Resize({static_cast(src.size())}); + auto dst_ptr = static_cast(dst->mutable_data(dst_place)); + auto size = src.size() * sizeof(T); + + if (platform::is_cpu_place(dst_place)) { + memory::Copy(boost::get(dst_place), dst_ptr, src_place, + src_ptr, size); + } +#ifdef PADDLE_WITH_CUDA + else if (platform::is_gpu_place(dst_place)) { // NOLINT + memory::Copy( + boost::get(dst_place), dst_ptr, src_place, src_ptr, + size, + reinterpret_cast(ctx).stream()); + } +#endif +} + +/** + * @brief Copy the content of a tensor to a vector + * + * @param[in] src The external tensor. + * @param[in] ctx The device context contains device resources. + * + * * @note CopyFromVector assumes that the tensor has been resized + * before invoking. + */ +template +inline void CopyToVector(const Tensor& src, const platform::DeviceContext& ctx, + std::vector* dst) { + auto src_ptr = static_cast(src.data()); + auto size = src.numel() * sizeof(T); + + platform::CPUPlace dst_place; + dst->resize(src.numel()); + auto dst_ptr = static_cast(dst->data()); + + if (platform::is_cpu_place(src.place())) { + memory::Copy(dst_place, dst_ptr, + boost::get(src.place()), src_ptr, size); + } +#ifdef PADDLE_WITH_CUDA + else if (platform::is_gpu_place(src.place())) { // NOLINT + memory::Copy( + dst_place, dst_ptr, boost::get(src.place()), + src_ptr, size, + reinterpret_cast(ctx).stream()); + } +#endif +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/tensor_util_test.cc b/paddle/framework/tensor_util_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..03a70de182d0eb499a81413d38229c81c4378b91 --- /dev/null +++ b/paddle/framework/tensor_util_test.cc @@ -0,0 +1,228 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "paddle/framework/tensor_util.h" +#include +#include + +namespace paddle { +namespace framework { +TEST(CopyFrom, Tensor) { + Tensor src_tensor; + Tensor dst_tensor; + platform::CPUDeviceContext cpu_ctx((platform::CPUPlace())); + + int* src_ptr = + src_tensor.mutable_data(make_ddim({3, 3}), platform::CPUPlace()); + + int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + memcpy(src_ptr, arr, 9 * sizeof(int)); + + auto cpu_place = new platform::CPUPlace(); + CopyFrom(src_tensor, *cpu_place, cpu_ctx, &dst_tensor); + + const int* dst_ptr = dst_tensor.data(); + ASSERT_NE(src_ptr, dst_ptr); + for (size_t i = 0; i < 9; ++i) { + EXPECT_EQ(src_ptr[i], dst_ptr[i]); + } + + Tensor slice_tensor = src_tensor.Slice(1, 2); + CopyFrom(slice_tensor, *cpu_place, cpu_ctx, &dst_tensor); + const int* slice_ptr = slice_tensor.data(); + dst_ptr = dst_tensor.data(); + ASSERT_NE(dst_ptr, slice_ptr); + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQ(dst_ptr[i], slice_ptr[i]); + } +#ifdef PADDLE_WITH_CUDA + { + Tensor src_tensor; + Tensor gpu_tensor; + Tensor dst_tensor; + + int* src_ptr = + src_tensor.mutable_data(make_ddim({3, 3}), platform::CPUPlace()); + + int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + memcpy(src_ptr, arr, 9 * sizeof(int)); + + // CPU Tensor to GPU Tensor + auto gpu_place = new platform::GPUPlace(0); + platform::CUDADeviceContext gpu_ctx(*gpu_place); + CopyFrom(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor); + + // GPU Tensor to CPU Tensor + auto cpu_place = new platform::CPUPlace(); + CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); + + // Sync before Compare Tensors + gpu_ctx.Wait(); + const int* dst_ptr = dst_tensor.data(); + ASSERT_NE(src_ptr, dst_ptr); + for (size_t i = 0; i < 9; ++i) { + EXPECT_EQ(src_ptr[i], dst_ptr[i]); + } + + Tensor slice_tensor = src_tensor.Slice(1, 2); + + // CPU Slice Tensor to GPU Tensor + CopyFrom(slice_tensor, *gpu_place, gpu_ctx, &gpu_tensor); + + // GPU Tensor to CPU Tensor + CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); + + // Sync before Compare Slice Tensors + gpu_ctx.Wait(); + const int* slice_ptr = slice_tensor.data(); + dst_ptr = dst_tensor.data(); + ASSERT_NE(dst_ptr, slice_ptr); + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQ(dst_ptr[i], slice_ptr[i]); + } + } +#endif +} + +TEST(CopyFromVector, Tensor) { + using namespace paddle::framework; + using namespace paddle::platform; + { + std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + Tensor cpu_tensor; + + // Copy to CPU Tensor + cpu_tensor.Resize(make_ddim({3, 3})); + auto cpu_place = new paddle::platform::CPUPlace(); + CPUDeviceContext cpu_ctx(*cpu_place); + CopyFromVector(src_vec, cpu_ctx, &cpu_tensor); + + // Compare Tensors + const int* cpu_ptr = cpu_tensor.data(); + const int* src_ptr = src_vec.data(); + ASSERT_NE(src_ptr, cpu_ptr); + for (size_t i = 0; i < 9; ++i) { + EXPECT_EQ(src_ptr[i], cpu_ptr[i]); + } + + src_vec.erase(src_vec.begin(), src_vec.begin() + 5); + cpu_tensor.Resize(make_ddim({2, 2})); + CopyFromVector(src_vec, cpu_ctx, &cpu_tensor); + cpu_ptr = cpu_tensor.data(); + src_ptr = src_vec.data(); + ASSERT_NE(src_ptr, cpu_ptr); + for (size_t i = 0; i < 5; ++i) { + EXPECT_EQ(src_ptr[i], cpu_ptr[i]); + } + + delete cpu_place; + } + +#ifdef PADDLE_WITH_CUDA + { + std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + Tensor cpu_tensor; + Tensor gpu_tensor; + Tensor dst_tensor; + + // Copy to CPU Tensor + cpu_tensor.Resize(make_ddim({3, 3})); + auto cpu_place = new paddle::platform::CPUPlace(); + CPUDeviceContext cpu_ctx(*cpu_place); + CopyFromVector(src_vec, cpu_ctx, &cpu_tensor); + + // Copy to GPUTensor + gpu_tensor.Resize(make_ddim({3, 3})); + auto gpu_place = new paddle::platform::GPUPlace(); + CUDADeviceContext gpu_ctx(*gpu_place); + CopyFromVector(src_vec, gpu_ctx, &gpu_tensor); + // Copy from GPU to CPU tensor for comparison + CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); + + // Sync before Compare Tensors + gpu_ctx.Wait(); + const int* src_ptr = src_vec.data(); + const int* cpu_ptr = cpu_tensor.data(); + const int* dst_ptr = dst_tensor.data(); + ASSERT_NE(src_ptr, cpu_ptr); + ASSERT_NE(src_ptr, dst_ptr); + for (size_t i = 0; i < 9; ++i) { + EXPECT_EQ(src_ptr[i], cpu_ptr[i]); + EXPECT_EQ(src_ptr[i], dst_ptr[i]); + } + + src_vec.erase(src_vec.begin(), src_vec.begin() + 5); + + cpu_tensor.Resize(make_ddim({2, 2})); + CopyFromVector(src_vec, cpu_ctx, &cpu_tensor); + gpu_tensor.Resize(make_ddim({2, 2})); + CopyFromVector(src_vec, gpu_ctx, &gpu_tensor); + CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); + + // Sync before Compare Tensors + gpu_ctx.Wait(); + src_ptr = src_vec.data(); + cpu_ptr = cpu_tensor.data(); + dst_ptr = dst_tensor.data(); + ASSERT_NE(src_ptr, cpu_ptr); + ASSERT_NE(src_ptr, dst_ptr); + for (size_t i = 0; i < 5; ++i) { + EXPECT_EQ(src_ptr[i], cpu_ptr[i]); + EXPECT_EQ(src_ptr[i], dst_ptr[i]); + } + + delete cpu_place; + delete gpu_place; + } +#endif +} + +TEST(CopyToVector, Tensor) { + using namespace paddle::framework; + using namespace paddle::platform; + { + Tensor src; + int* src_ptr = src.mutable_data({3, 3}, CPUPlace()); + for (int i = 0; i < 3 * 3; ++i) { + src_ptr[i] = i; + } + + CPUPlace place; + CPUDeviceContext cpu_ctx(place); + std::vector dst; + CopyToVector(src, cpu_ctx, &dst); + + for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_ptr[i], dst[i]); + } + } +#ifdef PADDLE_WITH_CUDA + { + std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + Tensor gpu_tensor; + GPUPlace place; + CUDADeviceContext gpu_ctx(place); + CopyFromVector(src_vec, gpu_ctx, &gpu_tensor); + + std::vector dst; + CopyToVector(gpu_tensor, gpu_ctx, &dst); + + for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_vec[i], dst[i]); + } + } +#endif +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp index 8b7b2e9b65898950e036ebc023cd28990cef303f..f5a41b66bf09a4abc5ae7b64f227ca52461408f5 100644 --- a/paddle/gserver/activations/ActivationFunction.cpp +++ b/paddle/gserver/activations/ActivationFunction.cpp @@ -212,6 +212,37 @@ Error __must_check backward(Argument& act) { } END_DEFINE_ACTIVATION(sequence_softmax) +/* + * @brief SoftSign Activation. + * \f[ + * f(z) = \frac{z}{1 + |z|} + * \f] + */ +BEGIN_DEFINE_ACTIVATION(softsign) +private: +MatrixPtr denominator_; + +Error __must_check forward(Argument& act) { + size_t height = act.value->getHeight(); + size_t width = act.value->getWidth(); + Matrix::resizeOrCreate( + denominator_, height, width, false, useGpu(act.deviceId)); + denominator_->assign(*act.value); + denominator_->abs2(); + denominator_->add(1.); + + act.value->dotDiv(*act.value, *denominator_); + return Error(); +} + +Error __must_check backward(Argument& act) { + denominator_->square2(); + denominator_->scalarDiv(*denominator_, 1.); + act.grad->dotMul(*act.grad, *denominator_); + return Error(); +} +END_DEFINE_ACTIVATION(softsign) + /** * @brief Relu Activation. * forward. y = max(0, z) diff --git a/paddle/gserver/layers/BatchNormBaseLayer.cpp b/paddle/gserver/layers/BatchNormBaseLayer.cpp index bc7d1c83a48aefeb4bc6d3baa32b78aba712e58d..925af31289d0c8ca534a30a16b14bfd2df90b013 100644 --- a/paddle/gserver/layers/BatchNormBaseLayer.cpp +++ b/paddle/gserver/layers/BatchNormBaseLayer.cpp @@ -41,6 +41,7 @@ bool BatchNormBaseLayer::init(const LayerMap& layerMap, useGlobalStats_ = config_.use_global_stats(); } movingAvgFraction_ = config_.moving_average_fraction(); + epsilon_ = config_.epsilon(); weight_.reset(new Weight(1, channels_, parameters_[0])); movingMean_.reset(new Weight(1, channels_, parameters_[1])); diff --git a/paddle/gserver/layers/BatchNormBaseLayer.h b/paddle/gserver/layers/BatchNormBaseLayer.h index e721d2d267a31cae46407673b8b1281e87055608..2ac3cd9d670d0fcf9c40ad2f117d5a72479663a3 100644 --- a/paddle/gserver/layers/BatchNormBaseLayer.h +++ b/paddle/gserver/layers/BatchNormBaseLayer.h @@ -94,6 +94,8 @@ protected: bool useGlobalStats_; // use to compute moving mean and variance. real movingAvgFraction_; + // Epsilon is a small random noise used in batch normalization for stability. + real epsilon_; }; } // namespace paddle diff --git a/paddle/gserver/layers/BatchNormalizationLayer.cpp b/paddle/gserver/layers/BatchNormalizationLayer.cpp index dacff25e5927daf9c991577a71be86b160228317..25ab5cd927792d18f78bc1fa33eee4029b427cc7 100644 --- a/paddle/gserver/layers/BatchNormalizationLayer.cpp +++ b/paddle/gserver/layers/BatchNormalizationLayer.cpp @@ -22,8 +22,6 @@ namespace paddle { REGISTER_LAYER(batch_norm, BatchNormalizationLayer); -const real BatchNormalizationLayer::EPS = 1E-5; - bool BatchNormalizationLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { /* Initialize the basic parent class */ @@ -53,7 +51,7 @@ void BatchNormalizationLayer::calMeanAndStd(const MatrixPtr& mat) { calMovingMeanAndVar(); - savedInvVar_->subScalar(-EPS); + savedInvVar_->subScalar(-epsilon_); savedInvVar_->sqrt2(*savedInvVar_); } @@ -74,7 +72,7 @@ void BatchNormalizationLayer::setMeanAndStd() { savedInvVar_->copyFrom(*(movingVar_->getW())); savedInvVar_->downClip(real(0.0)); - savedInvVar_->subScalar(-EPS); + savedInvVar_->subScalar(-epsilon_); savedInvVar_->sqrt2(*savedInvVar_); } diff --git a/paddle/gserver/layers/BatchNormalizationLayer.h b/paddle/gserver/layers/BatchNormalizationLayer.h index f6115801fc6b341c0718f8851617de43bdeeec09..1fdb5e2070259a14ab6f70957c9cf03f0699f734 100644 --- a/paddle/gserver/layers/BatchNormalizationLayer.h +++ b/paddle/gserver/layers/BatchNormalizationLayer.h @@ -39,9 +39,6 @@ public: void backward(const UpdateCallback& callback = nullptr) override; protected: - /// Epsilon value used in the batch normalization formula. - static const real EPS; - /// Load pre-calculated mean and std. void setMeanAndStd(); diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp index 49a9540c0b6e36b59ed786287ff5c4569b69a6a5..8390b55026c895b661cb514714ba92c05a7bf02e 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp +++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp @@ -21,8 +21,6 @@ namespace paddle { REGISTER_LAYER(cudnn_batch_norm, CudnnBatchNormLayer); -const double CudnnBatchNormLayer::EPS = 1E-5; - bool CudnnBatchNormLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { /* Initialize the basic parent class */ @@ -61,6 +59,9 @@ void CudnnBatchNormLayer::forward(PassType passType) { real* movingMean = movingMean_->getW()->getData(); real* movingVar = movingVar_->getW()->getData(); + // cuDNN does not allow an epsilon value less than CUDNN_BN_MIN_EPSILON. + eps_ = std::max(CUDNN_BN_MIN_EPSILON, static_cast(epsilon_)); + if (!useGlobalStats_) { REGISTER_TIMER_INFO("CudnnBatchFwTimer", getName().c_str()); real* savedMean = savedMean_->getData(); @@ -75,7 +76,7 @@ void CudnnBatchNormLayer::forward(PassType passType) { 1.0 - movingAvgFraction_, movingMean, movingVar, - EPS, + eps_, savedMean, savedInvVar); } else { @@ -90,7 +91,7 @@ void CudnnBatchNormLayer::forward(PassType passType) { beta, movingMean, movingVar, - EPS); + eps_); } else { // There is a limitation in cudnn library. // When the batch size is larger than 1024 in cuDNN v5.1, @@ -101,7 +102,7 @@ void CudnnBatchNormLayer::forward(PassType passType) { beta, movingMean, movingVar, - EPS, + eps_, batchSize, channels_, imageH_ * imageD_, @@ -128,6 +129,9 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) { real* savedMean = savedMean_->getData(); real* savedInvVar = savedInvVar_->getData(); + // cuDNN does not allow an epsilon value less than CUDNN_BN_MIN_EPSILON. + eps_ = std::max(CUDNN_BN_MIN_EPSILON, static_cast(epsilon_)); + auto create = [](MatrixPtr& m, size_t h, size_t w, real** p) { Matrix::resizeOrCreate(m, h, w, false, true); m->zeroMem(); @@ -157,7 +161,7 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) { gamma, gammaGrad, betaGrad, - EPS, + eps_, savedMean, savedInvVar); diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.h b/paddle/gserver/layers/CudnnBatchNormLayer.h index 413efd4d3ecd734b343efbcf8328ac0592daddda..1a3f0c0cbf8a1540e77cef70c753c91298728484 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.h +++ b/paddle/gserver/layers/CudnnBatchNormLayer.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include "BatchNormBaseLayer.h" #include "Layer.h" #include "paddle/utils/Stat.h" @@ -46,12 +47,9 @@ public: void backward(const UpdateCallback& callback = nullptr) override; protected: - /** - * Epsilon value used in the batch normalization formula. - * Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h. - * Same epsilon value should be used in forward and backward functions. - */ - static const double EPS; + /// Epsilon value used in the batch normalization formula. + /// Same epsilon value should be used in forward and backward functions. + double eps_; /// Input/output tensor descriptor desc hl_tensor_descriptor ioDesc_; diff --git a/paddle/gserver/layers/MKLDNNAddtoLayer.cpp b/paddle/gserver/layers/MKLDNNAddtoLayer.cpp index 0f2b67fd758ec1513f42c4cb1a36f2f3915f4740..39bffc26f7ddcd159130c492115b41080e32ce7f 100644 --- a/paddle/gserver/layers/MKLDNNAddtoLayer.cpp +++ b/paddle/gserver/layers/MKLDNNAddtoLayer.cpp @@ -38,12 +38,13 @@ bool MKLDNNAddtoLayer::init(const LayerMap& layerMap, } void MKLDNNAddtoLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { CHECK_EQ(layerSize_, getSize()) << "this layer size can not be changed"; reshapeInput(bs, ih, iw); ic = inputLayers_[0]->getSize() / ih / iw; CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize()); - CHECK_EQ(inputElemenCnt_, (size_t)bs * ic * ih * iw); + CHECK_EQ(inputLayers_[0]->getOutputValue()->getElementCnt(), + (size_t)bs * ic * ih * iw); for (size_t i = 0; i < inputLayers_.size(); i++) { CHECK_EQ(int64_t(bs), inputLayers_[i]->getOutput().getBatchSize()); CHECK_EQ(layerSize_, inputLayers_[i]->getSize()); @@ -57,47 +58,43 @@ void MKLDNNAddtoLayer::reshape( } void MKLDNNAddtoLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(inVals_, bias, out); - in = inVals_[0]; + resetFwdBuffers(inputs, biasVal_, out); std::shared_ptr fwdPD; std::shared_ptr biasPD; - resetFwdPD(fwdPD, biasPD, inVals_, bias, out); + resetFwdPD(fwdPD, biasPD, inputs, biasVal_, out); - resetFwdPipeline(pipeline, fwdPD, biasPD, inVals_, bias, out); + resetFwdPipeline(pipeline, fwdPD, biasPD, inputs, biasVal_, out); } void MKLDNNAddtoLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetBwdBuffers(inGrads_, bias, out); - in = inGrads_[0]; + resetBwdBuffers(inputs, biasGrad_, out); // backward only need share output grad to input grad - for (size_t i = 0; i < inGrads_.size(); i++) { - if (inGrads_[i] != nullptr) { - inGrads_[i] = out; - inputLayers_[i]->getOutputGrad()->setData(inGrads_[i]->getData()); + for (size_t i = 0; i < inputs.size(); i++) { + if (inputs[i] != nullptr) { + inputs[i] = out; + inputLayers_[i]->getOutputGrad()->setData(inputs[i]->getData()); } } // backward bias bwdBias_ = nullptr; - if (bias) { + if (biasGrad_) { std::vector scales(bs_, 1.0); - std::vector srcPDs(bs_, bias->getPrimitiveDesc()); - auto biasPD = sum::primitive_desc(bias->getMemoryDesc(), scales, srcPDs); + std::vector srcPDs(bs_, + biasGrad_->getPrimitiveDesc()); + auto biasPD = + sum::primitive_desc(biasGrad_->getMemoryDesc(), scales, srcPDs); std::vector srcs; for (size_t i = 0; i < grads_.size(); ++i) { srcs.push_back(*(grads_[i])); } - bwdBias_.reset(new sum(biasPD, srcs, *bias)); + bwdBias_.reset(new sum(biasPD, srcs, *biasGrad_)); pipeline.push_back(*bwdBias_); } } @@ -208,7 +205,7 @@ void MKLDNNAddtoLayer::resetBwdBuffers(std::vector& inputs, inputs.resize(inputLayers_.size()); for (size_t i = 0; i < inputs.size(); i++) { - resetInGrad(inputs[i], inVal_->getPrimitiveDesc(), i); + resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i); CHECK_PRIMITIVE_DESC_EQ(inputs[i], out->getPrimitiveDesc()); } diff --git a/paddle/gserver/layers/MKLDNNAddtoLayer.h b/paddle/gserver/layers/MKLDNNAddtoLayer.h index 24504b7b4f50726e2b2757ca3029461cdc27b411..0ea3e208e5fab8cbed8b53390a9381e6f2bb5733 100644 --- a/paddle/gserver/layers/MKLDNNAddtoLayer.h +++ b/paddle/gserver/layers/MKLDNNAddtoLayer.h @@ -26,9 +26,6 @@ namespace paddle { */ class MKLDNNAddtoLayer : public MKLDNNLayer { protected: - std::vector inVals_; - std::vector inGrads_; - // layer size == ic * ih * iw == oc * oh *ow, and can not be changed size_t layerSize_; @@ -50,52 +47,19 @@ public: const ParameterMap& parameterMap) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void updateWeights(const UpdateCallback& callback) override; - void printValueFormat() override { - for (size_t i = 0; i < inVals_.size(); ++i) { - VLOG(MKLDNN_FMTS) << i << " input: " << inVals_[i]->getFormat() << " >>>"; - } - if (outVal_) { - VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "; - } - if (extOutVal_) { - VLOG(MKLDNN_FMTS) << extOutVal_->getFormat(); - } - } - - void printGradFormat() override { - if (extOutGrad_) { - VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat(); - } - if (outGrad_) { - VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< "; - } - for (size_t i = 0; i < inGrads_.size(); ++i) { - VLOG(MKLDNN_FMTS) << i << " input: " << inGrads_[i]->getFormat() << "<<<"; - } - } - protected: - /** - * Forward functions: reset buffers(inputs, output, bias), - * reset primitive descriptor, - * reset pipeline. - */ void resetFwdBuffers(std::vector& inputs, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); @@ -110,17 +74,10 @@ protected: std::vector& inputs, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - - /** - * Backward functions: reset buffers(inputs, output, bias) - */ void resetBwdBuffers(std::vector& inputs, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - /** - * prepare for bias - */ void prepareBias(MKLDNNMatrixPtr& bias, const MatrixPtr& biasMat, const MKLDNNMatrixPtr& out, diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp index 071bdf54d5dc9538d5ced580a73b9c0fbcea41fb..7faca0f8b7f54fa0a09e8fdab11064c8c26df375 100644 --- a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp +++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp @@ -21,8 +21,6 @@ namespace paddle { REGISTER_LAYER(mkldnn_batch_norm, MKLDNNBatchNormLayer); -const real MKLDNNBatchNormLayer::EPS = 1E-5; - bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { if (!MKLDNNLayer::init(layerMap, parameterMap)) { @@ -50,6 +48,8 @@ bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap, useGlobalStats_ = config_.use_global_stats(); } movingAvgFraction_ = config_.moving_average_fraction(); + epsilon_ = config_.epsilon(); + VLOG(MKLDNN_BASE) << "--- " << (useGlobalStats_ ? "use" : "do not use") << " --- global stats"; VLOG(MKLDNN_BASE) << "Moving average fraction: " << movingAvgFraction_; @@ -116,21 +116,20 @@ void MKLDNNBatchNormLayer::calMovingMeanAndVar() { } void MKLDNNBatchNormLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { reshapeInput(bs, ih, iw); oh = ih; ow = iw; // ic_ and oc can not be changed - CHECK_EQ(inputElemenCnt_ / bs / ih / iw, (size_t)ic) + CHECK_EQ((size_t)ic, + inputLayers_[0]->getOutputValue()->getElementCnt() / bs / ih / iw) << "Input channel can not be changed"; reshapeOutput(oh, ow); resizeOutput(bs, oc * oh * ow); } void MKLDNNBatchNormLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { // In training phase, it will always calculate mean and var, // so useGlobalStats must be false. @@ -140,25 +139,23 @@ void MKLDNNBatchNormLayer::resetFwd(std::vector& pipeline, useGlobalStats_ = false; } - resetFwdBuffers(in, wgt, out); + resetFwdBuffers(inputs[0], wgtVal_, out); - resetFwdPD(fwdPD_, in, wgt, out); + resetFwdPD(fwdPD_, inputs[0], wgtVal_, out); - resetFwdPipeline(pipeline, fwdPD_, in, wgt, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, out); } void MKLDNNBatchNormLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { std::shared_ptr pd; - resetBwdBuffers(in, wgt, out); + resetBwdBuffers(inputs[0], wgtGrad_, out); - resetBwdPD(pd, in, wgt, out); + resetBwdPD(pd, inputs[0], wgtGrad_, out); - resetBwdPipeline(pipeline, pd, in, wgt, out); + resetBwdPipeline(pipeline, pd, inputs[0], wgtGrad_, out); } void MKLDNNBatchNormLayer::forward(PassType passType) { @@ -213,7 +210,7 @@ void MKLDNNBatchNormLayer::resetFwdPD( if (wgt) { flags_ = (flags_ | batch_normalization_flag::use_scale_shift); } - auto fwdDesc = bn_fwd::desc(pk, in->getMemoryDesc(), EPS, flags_); + auto fwdDesc = bn_fwd::desc(pk, in->getMemoryDesc(), epsilon_, flags_); pd.reset(new bn_fwd::primitive_desc(fwdDesc, engine_)); CHECK_PRIMITIVE_DESC_EQ(out, pd->dst_primitive_desc()); if (wgt) { @@ -260,9 +257,9 @@ void MKLDNNBatchNormLayer::resetFwdPipeline( void MKLDNNBatchNormLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& out) { - CHECK(inVal_ && outVal_); + CHECK(inVals_[0] && outVal_); resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVal_->getPrimitiveDesc()); + resetInGrad(in, inVals_[0]->getPrimitiveDesc()); if (gradScaleShift_) { CHECK(wgtVal_); resetWithMatrix(wgt, gradScaleShift_, wgtVal_->getPrimitiveDesc()); @@ -280,7 +277,7 @@ void MKLDNNBatchNormLayer::resetBwdPD( } CHECK_PRIMITIVE_DESC_EQ(out, in->getPrimitiveDesc()); auto md = in->getMemoryDesc(); - auto bwdDesc = bn_bwd::desc(prop_kind::backward, md, md, EPS, flags_); + auto bwdDesc = bn_bwd::desc(prop_kind::backward, md, md, epsilon_, flags_); pd.reset(new bn_bwd::primitive_desc(bwdDesc, engine_, *fwdPD_)); CHECK(pd->weights_primitive_desc() == fwdPD_->weights_primitive_desc()); CHECK_PRIMITIVE_DESC_EQ(wgt, pd->diff_weights_primitive_desc()); @@ -297,11 +294,12 @@ void MKLDNNBatchNormLayer::resetBwdPipeline( if (pd == nullptr) { return; } - CHECK(inVal_); + CHECK(inVals_[0]); bwdData_.reset( wgt && wgtVal_ - ? new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *wgtVal_, *in, *wgt) - : new bn_bwd(*pd, *inVal_, *mean_, *var_, *out, *in)); + ? new bn_bwd( + *pd, *inVals_[0], *mean_, *var_, *out, *wgtVal_, *in, *wgt) + : new bn_bwd(*pd, *inVals_[0], *mean_, *var_, *out, *in)); pipeline.push_back(*bwdData_); } diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.h b/paddle/gserver/layers/MKLDNNBatchNormLayer.h index 456c0424ecb8dde17f98a900c5d77268cc672e34..1cf33cb34fa9cd7c9b8487a0a4a0011fb129e311 100644 --- a/paddle/gserver/layers/MKLDNNBatchNormLayer.h +++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.h @@ -32,7 +32,8 @@ protected: std::shared_ptr fwdPD_; // Epsilon value used in the batch normalization formula. - static const real EPS; + real epsilon_; + // weight and bias in paddle std::unique_ptr weight_; std::unique_ptr biases_; @@ -73,18 +74,14 @@ public: void forward(PassType passType) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void updateWeights(const UpdateCallback& callback) override; @@ -98,11 +95,7 @@ protected: * moving = moving * AvgFraction + local * (1 - AvgFraction) */ void calMovingMeanAndVar(); - /** - * Forward functions: reset buffers(input, weight, output), - * reset primitive descriptor, - * reset pipeline. - */ + void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& out); @@ -115,12 +108,6 @@ protected: MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& out); - - /** - * Backward functions: reset buffers(input, weight, output), - * reset primitive descriptor, - * reset pipeline. - */ void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& out); diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.cpp b/paddle/gserver/layers/MKLDNNConcatLayer.cpp index c9099297cc5c741fbae0b42f21b988e6c561ef11..44bb0883b89c712d70e2d4fdfe16bdfde86f81b7 100644 --- a/paddle/gserver/layers/MKLDNNConcatLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConcatLayer.cpp @@ -32,17 +32,16 @@ bool MKLDNNConcatLayer::init(const LayerMap& layerMap, } void MKLDNNConcatLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { reshapeInput(bs, ih, iw); ic = inputLayers_[0]->getSize() / ih / iw; CHECK_EQ((size_t)ic * ih * iw, inputLayers_[0]->getSize()); - CHECK_EQ(inputElemenCnt_, (size_t)bs * ic * ih * iw); + CHECK_EQ(inputLayers_[0]->getOutputValue()->getElementCnt(), + (size_t)bs * ic * ih * iw); CHECK_GT(inputLayers_.size(), 1UL); channels_.resize(inputLayers_.size()); channels_[0] = ic; - // need change the output channel, so use oc_ instead - // TODO(TJ): change API, use &oc - oc_ = ic; + oc = ic; for (size_t i = 1; i < inputLayers_.size(); i++) { int batchsize, height, witdh; reshapeInput(batchsize, height, witdh, i); @@ -52,37 +51,31 @@ void MKLDNNConcatLayer::reshape( channels_[i] = inputLayers_[i]->getSize() / height / witdh; CHECK_EQ((size_t)channels_[i] * height * witdh, inputLayers_[i]->getSize()); - oc_ += channels_[i]; + oc += channels_[i]; } oh = ih; ow = iw; reshapeOutput(oh, ow); - resizeOutput(bs, oc_ * oh * ow); + resizeOutput(bs, oc * oh * ow); } void MKLDNNConcatLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(inVals_, out); - in = inVals_[0]; + resetFwdBuffers(inputs, out); std::shared_ptr fwdPD; - resetFwdPD(fwdPD, inVals_, out); + resetFwdPD(fwdPD, inputs, out); - resetFwdPipeline(pipeline, fwdPD, inVals_, out); + resetFwdPipeline(pipeline, fwdPD, inputs, out); } void MKLDNNConcatLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetBwdBuffers(inGrads_, out); - in = inGrads_[0]; + resetBwdBuffers(inputs, out); - resetBwdPipeline(pipeline, bwds_, inGrads_, out); + resetBwdPipeline(pipeline, bwds_, inputs, out); } void MKLDNNConcatLayer::resetFwdBuffers(std::vector& inputs, @@ -90,10 +83,7 @@ void MKLDNNConcatLayer::resetFwdBuffers(std::vector& inputs, inputs.resize(inputLayers_.size()); bool has8c = false, has16c = false, hasnc = false; for (size_t i = 0; i < inputs.size(); i++) { - // resetInValue will use ic_ so temporary change as current input's channel - // TODO(TJ): change ic_ as vector then can remove channels_ - ic_ = channels_[i]; - resetInValue(inputs[i], nullptr, i); + resetInValue(inputs[i], nullptr, i, channels_[i]); CHECK(inputs[i]); auto dm = inputs[i]->getDims(); // inputs format can be different, but ndims must equal @@ -114,8 +104,6 @@ void MKLDNNConcatLayer::resetFwdBuffers(std::vector& inputs, has16c = true; } } - // change back, ic_ always save the input 0 size - ic_ = channels_[0]; format outFmt; if (has16c && oc_ % 16 == 0) { @@ -168,14 +156,9 @@ void MKLDNNConcatLayer::resetBwdBuffers(std::vector& inputs, inputs.resize(inputLayers_.size()); for (size_t i = 0; i < inputs.size(); i++) { CHECK(inVals_[i]); - // resetInGrad will use inVal_ - // TODO(TJ): change move inVals_ to MKLDNNLayer ans remove inVal_ - inVal_ = inVals_[i]; resetInGrad(inputs[i], inVals_[i]->getPrimitiveDesc(), i); CHECK_PRIMITIVE_DESC_EQ(inputs[i], inVals_[i]->getPrimitiveDesc()); } - // change back, inVal_ always save the input 0 - inVal_ = inVals_[0]; } void MKLDNNConcatLayer::resetBwdPipeline( diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.h b/paddle/gserver/layers/MKLDNNConcatLayer.h index d5749d327e4259b81541a234f48a4538ab035fe4..37f3a26c5ed5db10cdba507368874c9557fb75ef 100644 --- a/paddle/gserver/layers/MKLDNNConcatLayer.h +++ b/paddle/gserver/layers/MKLDNNConcatLayer.h @@ -26,8 +26,6 @@ namespace paddle { */ class MKLDNNConcatLayer : public MKLDNNLayer { protected: - std::vector inVals_; - std::vector inGrads_; std::vector> bwds_; // input channel numbers std::vector channels_; @@ -47,18 +45,14 @@ public: const ParameterMap& parameterMap) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void printSizeInfo() override { @@ -72,38 +66,16 @@ public: << ", " << ow_; } - void printValueFormat() override { - for (size_t i = 0; i < inVals_.size(); ++i) { - VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() - << ": " << inVals_[i]->getFormat() << " >>>"; - } - if (outVal_) { - VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "; - } - if (extOutVal_) { - VLOG(MKLDNN_FMTS) << extOutVal_->getFormat(); - } - } - - void printGradFormat() override { - if (extOutGrad_) { - VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat(); - } - if (outGrad_) { - VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< "; - } - for (size_t i = 0; i < inGrads_.size(); ++i) { - VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() - << ": " << inGrads_[i]->getFormat() << "<<<"; + size_t keepCondition() { + // reset when the total element size of all inputs changed + size_t totalSize = inputLayers_[0]->getOutputValue()->getElementCnt(); + for (size_t i = 1; i < inputLayers_.size(); ++i) { + totalSize += inputLayers_[i]->getOutputValue()->getElementCnt(); } + return totalSize; } protected: - /** - * Forward functions: reset buffers(inputs, output, bias), - * reset primitive descriptor, - * reset pipeline. - */ void resetFwdBuffers(std::vector& inputs, MKLDNNMatrixPtr& out); void resetFwdPD(std::shared_ptr& pd, @@ -113,11 +85,6 @@ protected: std::shared_ptr& pd, std::vector& inputs, MKLDNNMatrixPtr& out); - - /** - * Backward functions: reset buffers(inputs, output, bias) - * reset primitives and pipeline - */ void resetBwdBuffers(std::vector& inputs, MKLDNNMatrixPtr& out); void resetBwdPipeline(std::vector& pipeline, diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index 8aa54e0a9efa7adb766cbb6009f6a29410c6ae7d..ab1d0f7b049a349c00c6e23deb37d789382de64f 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -90,7 +90,7 @@ void MKLDNNConvLayer::convertWeightsToPaddle() { } void MKLDNNConvLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { reshapeInput(bs, ih, iw); // cal output sizes @@ -105,21 +105,17 @@ void MKLDNNConvLayer::reshape( } void MKLDNNConvLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { resetFwdPD(fwdPD_); - resetFwdBuffers(fwdPD_, in, wgt, bias, out); + resetFwdBuffers(fwdPD_, inputs[0], wgtVal_, biasVal_, out); - resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out); } void MKLDNNConvLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { std::shared_ptr bwdWgtPD; std::shared_ptr bwdDataPD; @@ -128,9 +124,10 @@ void MKLDNNConvLayer::resetBwd(std::vector& pipeline, resetBwdDataPD(bwdDataPD); - resetBwdBuffers(bwdWgtPD, bwdDataPD, in, wgt, bias, out); + resetBwdBuffers(bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out); - resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); + resetBwdPipeline( + pipeline, bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out); } void MKLDNNConvLayer::updateWeights(const UpdateCallback& callback) { @@ -236,14 +233,14 @@ void MKLDNNConvLayer::resetBwdWgtPD( loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); // create backward weight using input, output and weight value memory desc - CHECK(inVal_) << "Should have internal input value"; + CHECK(inVals_[0]) << "Should have internal input value"; CHECK(outVal_) << "Should have internal output value"; CHECK(wgtVal_) << "Should have weight value"; algorithm algo = algorithm::convolution_direct; padding_kind padKind = padding_kind::zero; auto bwdWgtDesc = biasVal_ != nullptr ? conv_bwdWgt::desc(algo, - inVal_->getMemoryDesc(), + inVals_[0]->getMemoryDesc(), wgtVal_->getMemoryDesc(), biasVal_->getMemoryDesc(), outVal_->getMemoryDesc(), @@ -252,7 +249,7 @@ void MKLDNNConvLayer::resetBwdWgtPD( padR, padKind) : conv_bwdWgt::desc(algo, - inVal_->getMemoryDesc(), + inVals_[0]->getMemoryDesc(), wgtVal_->getMemoryDesc(), outVal_->getMemoryDesc(), strides, @@ -260,7 +257,7 @@ void MKLDNNConvLayer::resetBwdWgtPD( padR, padKind); pd.reset(new conv_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); - CHECK_PRIMITIVE_DESC_EQ(inVal_, pd->src_primitive_desc()); + CHECK_PRIMITIVE_DESC_EQ(inVals_[0], pd->src_primitive_desc()); CHECK_PRIMITIVE_DESC_EQ( outVal_, pd->diff_dst_primitive_desc(), @@ -280,12 +277,12 @@ void MKLDNNConvLayer::resetBwdDataPD( memory::dims wgtDims, biasDims, strides, dilations, padL, padR; loadConvSettings(wgtDims, biasDims, strides, dilations, padL, padR); - CHECK(inVal_) << "Should have internal input value"; + CHECK(inVals_[0]) << "Should have internal input value"; CHECK(outVal_) << "Should have internal output value"; // create backward data using input and output value memory desc // but using weight memory desc with any format auto bwdDataDesc = conv_bwdData::desc(algorithm::convolution_direct, - inVal_->getMemoryDesc(), + inVals_[0]->getMemoryDesc(), MKLDNNMatrix::createMemoryDesc(wgtDims), outVal_->getMemoryDesc(), strides, @@ -294,7 +291,7 @@ void MKLDNNConvLayer::resetBwdDataPD( padding_kind::zero); pd.reset(new conv_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_)); CHECK_PRIMITIVE_DESC_EQ( - inVal_, + inVals_[0], pd->diff_src_primitive_desc(), "primitive desc of in value and grad should be equal"); CHECK_PRIMITIVE_DESC_EQ( @@ -346,12 +343,12 @@ void MKLDNNConvLayer::resetBwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_); + CHECK(inVals_[0]); // add bwdWgt handle if (bias) { - bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt, *bias)); + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt, *bias)); } else { - bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVal_, *out, *wgt)); + bwdWgt_.reset(new conv_bwdWgt(*wgtPD, *inVals_[0], *out, *wgt)); } pipeline.push_back(*bwdWgt_); diff --git a/paddle/gserver/layers/MKLDNNConvLayer.h b/paddle/gserver/layers/MKLDNNConvLayer.h index 9c69136684e5f9005860b476ec6ed1bbc9ceff6c..3e754a0e65771879e836c13d63d5a5c8be3a699a 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.h +++ b/paddle/gserver/layers/MKLDNNConvLayer.h @@ -69,18 +69,14 @@ public: const ParameterMap& parameterMap) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void updateWeights(const UpdateCallback& callback) override; @@ -107,48 +103,26 @@ protected: mkldnn::memory::dims& padL, mkldnn::memory::dims& padR); - /** - * reset the forward primitive descriptor. - */ void resetFwdPD(std::shared_ptr& pd); - /** - * reset the MKLDNNMatrix buffers used in forward. - */ void resetFwdBuffers(std::shared_ptr& pd, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - /** - * reset the forward pipeline. - */ void resetFwdPipeline(std::vector& pipeline, std::shared_ptr& pd, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - - /** - * reset the backward weight primitive descriptor. - */ void resetBwdWgtPD(std::shared_ptr& pd); - /** - * reset the backward data primitive descriptor. - */ void resetBwdDataPD(std::shared_ptr& pd); - /** - * reset the MKLDNNMatrix buffers used in backward. - */ void resetBwdBuffers(std::shared_ptr& wgtPD, std::shared_ptr& dataPD, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - /** - * reset the backward pipeline. - */ void resetBwdPipeline(std::vector& pipeline, std::shared_ptr& wgtPD, std::shared_ptr& dataPD, diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index 350ec65fffbc73c3a6e4245f763f4c6aa868f574..c8778bdd077c4b6d170140be92bdcdd7e8e81bb2 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -74,7 +74,7 @@ void MKLDNNFcLayer::convertWeightsToPaddle() { } void MKLDNNFcLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { reshapeInput(bs, ih, iw); CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); @@ -87,32 +87,29 @@ void MKLDNNFcLayer::reshape( } void MKLDNNFcLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(in, wgt, bias, out); + resetFwdBuffers(inputs[0], wgtVal_, biasVal_, out); - resetFwdPD(fwdPD_, in, wgt, bias, out); + resetFwdPD(fwdPD_, inputs[0], wgtVal_, biasVal_, out); - resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], wgtVal_, biasVal_, out); } void MKLDNNFcLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { std::shared_ptr bwdWgtPD; std::shared_ptr bwdDataPD; - resetBwdBuffers(in, wgt, bias, out); + resetBwdBuffers(inputs[0], wgtGrad_, biasGrad_, out); - resetBwdWgtPD(bwdWgtPD, wgt, bias, out); + resetBwdWgtPD(bwdWgtPD, wgtGrad_, biasGrad_, out); - resetBwdDataPD(bwdDataPD, in, out); + resetBwdDataPD(bwdDataPD, inputs[0], out); - resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); + resetBwdPipeline( + pipeline, bwdWgtPD, bwdDataPD, inputs[0], wgtGrad_, biasGrad_, out); } void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) { @@ -193,9 +190,9 @@ void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_ && outVal_); + CHECK(inVals_[0] && outVal_); resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVal_->getPrimitiveDesc()); + resetInGrad(in, inVals_[0]->getPrimitiveDesc()); CHECK(wgtVal_); resetWithMatrix(wgt, weight_->getWGrad(), wgtVal_->getPrimitiveDesc()); @@ -212,14 +209,15 @@ void MKLDNNFcLayer::resetBwdWgtPD( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_); - fc_bwdWgt::desc bwdWgtDesc = bias ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - bias->getMemoryDesc(), - out->getMemoryDesc()) - : fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); + CHECK(inVals_[0]); + fc_bwdWgt::desc bwdWgtDesc = + bias ? fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(), + wgt->getMemoryDesc(), + bias->getMemoryDesc(), + out->getMemoryDesc()) + : fc_bwdWgt::desc(inVals_[0]->getMemoryDesc(), + wgt->getMemoryDesc(), + out->getMemoryDesc()); pd.reset(new fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); } @@ -245,11 +243,11 @@ void MKLDNNFcLayer::resetBwdPipeline( MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - CHECK(inVal_); + CHECK(inVals_[0]); if (bias) { - bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias)); + bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt, *bias)); } else { - bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt)); + bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVals_[0], *out, *wgt)); } pipeline.push_back(*bwdWgt_); diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index ee861763ff3dc10ddb4c119358b80dbe1614aecb..283dc9b540531f6009ae6e2485b7c12d4e5cf2e3 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -52,18 +52,14 @@ public: const ParameterMap& parameterMap) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void updateWeights(const UpdateCallback& callback) override; @@ -73,11 +69,6 @@ public: void convertWeightsToPaddle() override; protected: - /** - * Forward functions: reset buffers(input, output, weight and bias), - * reset primitive descriptor, - * reset pipeline. - */ void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, @@ -93,13 +84,6 @@ protected: MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out); - - /** - * Backward functions: reset buffers(input, output, weight and bias), - * reset primitive descriptor for backward weight, - * reset primitive descriptor for backward data, - * reset pipeline. - */ void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, diff --git a/paddle/gserver/layers/MKLDNNLayer.cpp b/paddle/gserver/layers/MKLDNNLayer.cpp index cf42da0735282d667d6b87061c8c59bf2f96e0be..6fbf3c7fdec2f537769adb660c67c5a597beb609 100644 --- a/paddle/gserver/layers/MKLDNNLayer.cpp +++ b/paddle/gserver/layers/MKLDNNLayer.cpp @@ -48,31 +48,20 @@ void MKLDNNLayer::forward(PassType passType) { REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); CHECK(!inputLayers_.empty()); copySeqInfoToOutputs(); - size_t elemenCnt = inputLayers_[0]->getOutputValue()->getElementCnt(); - if (inputElemenCnt_ != elemenCnt) { + if (condition_ != keepCondition()) { VLOG(MKLDNN_BASE) << getName() << " reset mkldnn forward"; - // reset when input total sizes changed, not only the batchsize - inputElemenCnt_ = elemenCnt; - pipelineFwd_.clear(); + condition_ = keepCondition(); reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_); - // all cpu device output grad or value share output's + printSizeInfo(); + // the output_.value and output_.grad are shared with CPU device shareCPUDevice(); - resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_); - // MKLDNNLayer output value should be MKLDNNMatrix - // so external output value is necessary. - // Then external input value is not necessary, - // since input may be mkldnn internal buffer. - CHECK(extOutVal_) << "external output value is necessary"; - output_.value = std::dynamic_pointer_cast(extOutVal_); - CHECK(inVal_ && outVal_) << "internal memories are necessary"; - if (cvtInVal_) { - pipelineFwd_.insert(pipelineFwd_.begin(), *cvtInVal_); - } - if (cvtOutVal_) { - pipelineFwd_.push_back(*cvtOutVal_); - } + pipelineFwd_.clear(); + inVals_.resize(inputLayers_.size(), nullptr); + extInVals_.resize(inputLayers_.size(), nullptr); + cvtInVals_.resize(inputLayers_.size(), nullptr); + resetFwd(pipelineFwd_, inVals_, outVal_); + prepareValueConversions(pipelineFwd_); convertWeightsFromPaddle(); - printSizeInfo(); printValueFormat(); needResetBwd_ = true; } @@ -80,8 +69,8 @@ void MKLDNNLayer::forward(PassType passType) { if (inputLayers_[0]->getType() == "data" && inputLayers_.size() == 1) { // Update input value data when input layer is "data" type, // since the input value data address might be changed. - CHECK(extInVal_); - extInVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); + CHECK(extInVals_[0]); + extInVals_[0]->setData(getInputValue(0, CPU_DEVICE)->getData()); } if (!outputOnlyMKLDNN_) { @@ -99,22 +88,13 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) { if (needResetBwd_) { VLOG(MKLDNN_BASE) << getName() << " reset mkldnn backward"; pipelineBwd_.clear(); + inGrads_.resize(inputLayers_.size(), nullptr); + extInGrads_.resize(inputLayers_.size(), nullptr); + cvtInGrads_.resize(inputLayers_.size(), nullptr); pipelineMergeGrad_.clear(); mergeGrad_ = nullptr; - resetBwd(pipelineBwd_, inGrad_, wgtGrad_, biasGrad_, outGrad_); - // external output grad is not necessary - // since output may be mkldnn internal buffer or merge them directly. - CHECK(outGrad_) << "internal output grad is necessary"; - if (extOutGrad_) { - CHECK_EQ(extOutGrad_->getData(), output_.grad->getData()) - << "the external buffer should share the same data with output_.grad"; - } - if (cvtOutGrad_) { - pipelineBwd_.insert(pipelineBwd_.begin(), *cvtOutGrad_); - } - if (cvtInGrad_) { - pipelineBwd_.push_back(*cvtInGrad_); - } + resetBwd(pipelineBwd_, inGrads_, outGrad_); + prepareGradConversions(pipelineBwd_); printGradFormat(); needResetBwd_ = false; } @@ -141,8 +121,8 @@ void MKLDNNLayer::backward(const UpdateCallback& callback) { void MKLDNNLayer::reshapeInput(int& batchsize, int& height, int& width, - size_t inputIdx) { - const Argument& input = inputLayers_[inputIdx]->getOutput(); + size_t idx) { + const Argument& input = inputLayers_[idx]->getOutput(); batchsize = input.getBatchSize(); int h = input.getFrameHeight(); int w = input.getFrameWidth(); @@ -176,27 +156,30 @@ void MKLDNNLayer::resetWithMatrix(MKLDNNMatrixPtr& dnn, void MKLDNNLayer::resetInValue( MKLDNNMatrixPtr& in, const std::shared_ptr& intPD, - size_t inputIdx) { - cvtInVal_ = nullptr; - extInVal_ = nullptr; + size_t idx, + int inputChannel) { + cvtInVals_[idx] = nullptr; + extInVals_[idx] = nullptr; in = nullptr; - CHECK_GT(bs_ * ic_ * ih_ * iw_, 0); + inputChannel = inputChannel == 0 ? ic_ : inputChannel; + CHECK_GT(bs_ * inputChannel * ih_ * iw_, 0); auto extPD = MKLDNNMatrix::createPrimitiveDesc( - {bs_, ic_, ih_, iw_}, format::nchw, engine_); - const MatrixPtr& inMat = inputLayers_[inputIdx]->getOutputValue(); - extInVal_ = std::dynamic_pointer_cast(inMat); - CHECK_EQ(inputIsOnlyMKLDNN(), extInVal_ != nullptr); - if (extInVal_ == nullptr || extInVal_->getFormat() == format::nc) { - extInVal_ = MKLDNNMatrix::create(extPD, inMat); + {bs_, inputChannel, ih_, iw_}, format::nchw, engine_); + const MatrixPtr& inMat = inputLayers_[idx]->getOutputValue(); + extInVals_[idx] = std::dynamic_pointer_cast(inMat); + CHECK_EQ(inputIsOnlyMKLDNN(), extInVals_[idx] != nullptr); + if (extInVals_[idx] == nullptr || + extInVals_[idx]->getFormat() == format::nc) { + extInVals_[idx] = MKLDNNMatrix::create(extPD, inMat); } - in = extInVal_; + in = extInVals_[idx]; if (nullptr == intPD || in->getPrimitiveDesc() == *intPD) { return; } // need create reorder in = MKLDNNMatrix::create(*intPD); - cvtInVal_ = MKLDNNMatrix::createReorder(extInVal_, in); - CHECK(cvtInVal_) << "should not be emptry"; + cvtInVals_[idx] = MKLDNNMatrix::createReorder(extInVals_[idx], in); + CHECK(cvtInVals_[idx]) << "should not be emptry"; } void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out, @@ -218,11 +201,11 @@ void MKLDNNLayer::resetOutValue(MKLDNNMatrixPtr& out, void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, memory::primitive_desc intPD, - size_t inputIdx) { - cvtInGrad_ = nullptr; - extInGrad_ = nullptr; + size_t idx) { + cvtInGrads_[idx] = nullptr; + extInGrads_[idx] = nullptr; in = nullptr; - LayerPtr& input = inputLayers_[inputIdx]; + LayerPtr& input = inputLayers_[idx]; if (input->getOutputGrad() == nullptr) { // no need input grad return; @@ -237,23 +220,25 @@ void MKLDNNLayer::resetInGrad(MKLDNNMatrixPtr& in, in = MKLDNNMatrix::create(intPD, inMat); Argument& arg = input->getOutput(this->getName()); arg.grad = std::dynamic_pointer_cast(in); - CHECK_PRIMITIVE_DESC_EQ(inVal_, intPD); + CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD); if (inputIsOnlyMKLDNN()) { return; } - extInGrad_ = in; - if (isPaddleFormat(extInGrad_->getFormat())) { + extInGrads_[idx] = in; + if (isPaddleFormat(extInGrads_[idx]->getFormat())) { return; } // need create reorder - CHECK(extInVal_ != nullptr && isPaddleFormat(extInVal_->getFormat())) + CHECK(extInVals_[idx] != nullptr && + isPaddleFormat(extInVals_[idx]->getFormat())) << "should have external input value and the format must be nchw(nc)"; - extInGrad_ = MKLDNNMatrix::create(extInVal_->getPrimitiveDesc(), inMat); - CHECK_PRIMITIVE_DESC_EQ(inVal_, intPD); + extInGrads_[idx] = + MKLDNNMatrix::create(extInVals_[idx]->getPrimitiveDesc(), inMat); + CHECK_PRIMITIVE_DESC_EQ(inVals_[idx], intPD); in = MKLDNNMatrix::create(intPD); - cvtInGrad_ = MKLDNNMatrix::createReorder(in, extInGrad_); - CHECK(cvtInGrad_); + cvtInGrads_[idx] = MKLDNNMatrix::createReorder(in, extInGrads_[idx]); + CHECK(cvtInGrads_[idx]); } void MKLDNNLayer::resetOutGrad(MKLDNNMatrixPtr& out, @@ -309,22 +294,8 @@ void MKLDNNLayer::resetMergeGrad(MKLDNNMatrixPtr& out) { srcs.push_back(*src); } - // TODO(TJ): remove me when mkldnn sum support different formats - for (size_t i = 1; i < srcPDs.size(); ++i) { - CHECK(srcPDs[0] == srcPDs[i]); - } - tmpOutGrad_ = out; - tmpCvt_ = nullptr; - if (out->getPrimitiveDesc() != srcPDs[0]) { - tmpOutGrad_ = MKLDNNMatrix::create(srcPDs[0]); - tmpCvt_ = MKLDNNMatrix::createReorder(tmpOutGrad_, out); - CHECK(tmpCvt_); - pipelineMergeGrad_.push_back(*tmpCvt_); - } - - auto sumPD = - sum::primitive_desc(tmpOutGrad_->getMemoryDesc(), scales, srcPDs); - mergeGrad_.reset(new sum(sumPD, srcs, *tmpOutGrad_)); + auto sumPD = sum::primitive_desc(out->getMemoryDesc(), scales, srcPDs); + mergeGrad_.reset(new sum(sumPD, srcs, *out)); pipelineMergeGrad_.insert(pipelineMergeGrad_.begin(), *mergeGrad_); } diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h index 4c42df1bee75fa7b28c2001c30797cc0df7c5554..e48b9b5a91f7f17cb3f31e9140f1428ba8954a20 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -34,15 +34,16 @@ typedef std::shared_ptr MKLDNNLayerPtr; */ class MKLDNNLayer : public Layer { protected: - // input value element count - size_t inputElemenCnt_; // batch size int bs_; + // their sizes are always from the first input layer // input image channel, height and width int ic_, ih_, iw_; // output image channel, height and width int oc_, oh_, ow_; + // the condition that forward need be reset + size_t condition_; // backward also need reset after reset forward handle bool needResetBwd_; @@ -67,18 +68,18 @@ protected: * When all layers are mkldnn layers, they could save internal data. */ // below MKLDNNMatrix buffers are all internal buffers - MKLDNNMatrixPtr inVal_; - MKLDNNMatrixPtr inGrad_; + std::vector inVals_; + std::vector inGrads_; MKLDNNMatrixPtr outVal_; MKLDNNMatrixPtr outGrad_; // below are external value and grad - MKLDNNMatrixPtr extInVal_; - MKLDNNMatrixPtr extInGrad_; + std::vector extInVals_; + std::vector extInGrads_; MKLDNNMatrixPtr extOutVal_; MKLDNNMatrixPtr extOutGrad_; // convert handle between external and internal buffers - std::shared_ptr cvtInVal_; - std::shared_ptr cvtInGrad_; + std::vector> cvtInVals_; + std::vector> cvtInGrads_; std::shared_ptr cvtOutVal_; std::shared_ptr cvtOutGrad_; @@ -93,23 +94,11 @@ protected: std::vector pipelineMergeGrad_; // tmp input argument to save input grad, only used to merge grad Argument tmpInArg_; - // since mkldnn sum do not support different formats: - // can refer to https://github.com/01org/mkl-dnn/issues/134 - // so need create reorder manually and save tmp MKLDNNMatrix - MKLDNNMatrixPtr tmpOutGrad_; - std::shared_ptr tmpCvt_; public: explicit MKLDNNLayer(const LayerConfig& config) : Layer(config), - inputElemenCnt_(0), - bs_(0), - ic_(0), - ih_(0), - iw_(0), - oc_(0), - oh_(0), - ow_(0), + condition_(0), needResetBwd_(true), outputOnlyMKLDNN_(false), engine_(mkldnn::engine::cpu, 0), @@ -125,31 +114,28 @@ public: virtual void backward(const UpdateCallback& callback); /** - * reshape the input image sizes - * and reset output image and buffer size - * output channel can not be changed + * reshape the input and output channels and image sizes + * and reset output buffer size */ virtual void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) = 0; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) = 0; /** * reset the mkldnn forward primitve and memories * only would be called when input size changes + * weight and bias buffers should be coverd by child class itself */ virtual void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) = 0; /** * reset the mkldnn backward primitve and memories * only would be called when needed + * weight and bias buffers should be coverd by child class itself */ virtual void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) = 0; /** @@ -175,13 +161,19 @@ public: void addOutputArgument(int deviceId) { Layer::addOutputArgument(deviceId); } protected: + /** + * Some layers may have different condition to reset the forward. + * The function returns the condition that do not need reset forward. + */ + inline virtual size_t keepCondition() { + // reset when the first input element size changed, not only the batchsize + return inputLayers_[0]->getOutputValue()->getElementCnt(); + } + /** * reshape the input image sizes and input batchsize */ - void reshapeInput(int& batchsize, - int& height, - int& width, - size_t inputIdx = 0); + void reshapeInput(int& batchsize, int& height, int& width, size_t idx = 0); /** * reshape output image sizes @@ -199,11 +191,13 @@ protected: /** * reset input value from input MKLDNNMatrix and internal primitive desc. * reset both internal and external buffer and create reorder if necessary. + * input channel may be different in concat. */ void resetInValue( MKLDNNMatrixPtr& in, const std::shared_ptr& intPD = nullptr, - size_t inputIdx = 0); + size_t idx = 0, + int inputChannel = 0); /** * reset output value from internal primitive desc. @@ -218,7 +212,7 @@ protected: */ void resetInGrad(MKLDNNMatrixPtr& in, mkldnn::memory::primitive_desc intPD, - size_t inputIdx = 0); + size_t idx = 0); /** * reset output grad from internal primitive desc. @@ -296,17 +290,19 @@ protected: * print the mkldnn memory format of value */ virtual void printValueFormat() { - if (extInVal_) { - VLOG(MKLDNN_FMTS) << extInVal_->getFormat() << " >>> "; - } - if (inVal_) { - VLOG(MKLDNN_FMTS) << inVal_->getFormat() << " >>>"; + for (size_t i = 0; i < inVals_.size(); ++i) { + if (!inVals_[i]) { + continue; + } + VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() + << ": " << (extInVals_[i] ? extInVals_[i]->getFormat() + : inVals_[i]->getFormat()) + << " >>> " << inVals_[i]->getFormat() << " >>>"; } if (outVal_) { - VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> "; - } - if (extOutVal_) { - VLOG(MKLDNN_FMTS) << extOutVal_->getFormat(); + VLOG(MKLDNN_FMTS) << outVal_->getFormat() << " >>> " + << (extOutVal_ ? extOutVal_->getFormat() + : outVal_->getFormat()); } if (wgtVal_) { VLOG(MKLDNN_FMTS) << "Weight value format: " << wgtVal_->getFormat(); @@ -320,17 +316,19 @@ protected: * print the mkldnn memory format of grad */ virtual void printGradFormat() { - if (extOutGrad_) { - VLOG(MKLDNN_FMTS) << extOutGrad_->getFormat(); - } if (outGrad_) { - VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< "; + VLOG(MKLDNN_FMTS) << outGrad_->getFormat() << " <<< " + << (extOutGrad_ ? extOutGrad_->getFormat() + : outGrad_->getFormat()); } - if (inGrad_) { - VLOG(MKLDNN_FMTS) << inGrad_->getFormat() << " <<<"; - } - if (extInGrad_) { - VLOG(MKLDNN_FMTS) << extInGrad_->getFormat() << " <<< "; + for (size_t i = 0; i < inGrads_.size(); ++i) { + if (!inGrads_[i]) { + continue; + } + VLOG(MKLDNN_FMTS) << "Input " << i << ", " << inputLayers_[i]->getName() + << ": " << (extInGrads_[i] ? extInGrads_[i]->getFormat() + : inGrads_[i]->getFormat()) + << " <<< " << inGrads_[i]->getFormat() << " <<<"; } if (wgtGrad_) { VLOG(MKLDNN_FMTS) << "Weight grad format: " << wgtGrad_->getFormat(); @@ -437,6 +435,41 @@ private: outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims; } } + + void prepareValueConversions(std::vector& pipeline) { + // MKLDNNLayer output value should be MKLDNNMatrix + // so external output value is necessary. + // Then external input value is not necessary, + // since input may be mkldnn internal buffer. + CHECK(extOutVal_) << "external output value is necessary"; + output_.value = std::dynamic_pointer_cast(extOutVal_); + CHECK(inVals_[0] && outVal_) << "internal memories are necessary"; + for (size_t i = 0; i < cvtInVals_.size(); ++i) { + if (cvtInVals_[i]) { + pipeline.insert(pipeline.begin(), *cvtInVals_[i]); + } + } + if (cvtOutVal_) { + pipeline.push_back(*cvtOutVal_); + } + } + void prepareGradConversions(std::vector& pipeline) { + // external output grad is not necessary + // since output may be mkldnn internal buffer or merge them directly. + CHECK(outGrad_) << "internal output grad is necessary"; + if (extOutGrad_) { + CHECK_EQ(extOutGrad_->getData(), output_.grad->getData()) + << "the external buffer should share the same data with output_.grad"; + } + if (cvtOutGrad_) { + pipeline.insert(pipeline.begin(), *cvtOutGrad_); + } + for (size_t i = 0; i < cvtInGrads_.size(); ++i) { + if (cvtInGrads_[i]) { + pipeline.push_back(*cvtInGrads_[i]); + } + } + } }; } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/gserver/layers/MKLDNNPoolLayer.cpp index a18c455beab96ef25b5545281bae4d48cec98d9e..a8252593c8fbb8013ab909e74a057850ba54bcaa 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp +++ b/paddle/gserver/layers/MKLDNNPoolLayer.cpp @@ -58,10 +58,11 @@ bool MKLDNNPoolLayer::init(const LayerMap& layerMap, } void MKLDNNPoolLayer::reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) { + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) { reshapeInput(bs, ih, iw); // ic_ and oc can not be changed - CHECK_EQ(inputElemenCnt_ / bs / ih / iw, (size_t)ic) + CHECK_EQ((size_t)ic, + inputLayers_[0]->getOutputValue()->getElementCnt() / bs / ih / iw) << "Input channel can not be changed"; // cal output sizes @@ -74,29 +75,25 @@ void MKLDNNPoolLayer::reshape( } void MKLDNNPoolLayer::resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { - resetFwdBuffers(in, out); + resetFwdBuffers(inputs[0], out); - resetFwdPD(fwdPD_, in, out); + resetFwdPD(fwdPD_, inputs[0], out); - resetFwdPipeline(pipeline, fwdPD_, in, out); + resetFwdPipeline(pipeline, fwdPD_, inputs[0], out); } void MKLDNNPoolLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) { std::shared_ptr pd; - resetBwdBuffers(in, out); + resetBwdBuffers(inputs[0], out); - resetBwdPD(pd, in, out); + resetBwdPD(pd, inputs[0], out); - resetBwdPipeline(pipeline, pd, in, out); + resetBwdPipeline(pipeline, pd, inputs[0], out); } void MKLDNNPoolLayer::resetFwdBuffers(MKLDNNMatrixPtr& in, @@ -151,9 +148,9 @@ void MKLDNNPoolLayer::resetFwdPipeline( void MKLDNNPoolLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out) { - CHECK(inVal_ && outVal_); + CHECK(inVals_[0] && outVal_); resetOutGrad(out, outVal_->getPrimitiveDesc()); - resetInGrad(in, inVal_->getPrimitiveDesc()); + resetInGrad(in, inVals_[0]->getPrimitiveDesc()); } void MKLDNNPoolLayer::resetBwdPD(std::shared_ptr& pd, diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.h b/paddle/gserver/layers/MKLDNNPoolLayer.h index c5ec87828bfb28b4502b4ec6b47287089c514204..dad60156f0ef7caa059ff6c70d1040e7e34c938f 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.h +++ b/paddle/gserver/layers/MKLDNNPoolLayer.h @@ -53,18 +53,14 @@ public: const ParameterMap& parameterMap) override; void reshape( - int& bs, int& ic, int& ih, int& iw, int oc, int& oh, int& ow) override; + int& bs, int& ic, int& ih, int& iw, int& oc, int& oh, int& ow) override; void resetFwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, + std::vector& inputs, MKLDNNMatrixPtr& out) override; void printSizeInfo() override { @@ -75,11 +71,6 @@ public: } protected: - /** - * Forward functions: reset buffers(input, output), - * reset primitive descriptor, - * reset pipeline. - */ void resetFwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); void resetFwdPD(std::shared_ptr& pd, MKLDNNMatrixPtr in, @@ -88,12 +79,6 @@ protected: std::shared_ptr& pd, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); - - /** - * Backward functions: reset buffers(input, output), - * reset primitive descriptor, - * reset pipeline. - */ void resetBwdBuffers(MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& out); void resetBwdPD(std::shared_ptr& pd, MKLDNNMatrixPtr& in, diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 02402894d3354a6af221948a3360ef830881bf39..2c8256b91c97b513ce7237b8174c522430094926 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ROIPoolLayer.h" +#include namespace paddle { @@ -126,10 +127,8 @@ void ROIPoolLayer::forward(PassType passType) { bool isEmpty = (hend <= hstart) || (wend <= wstart); size_t poolIndex = ph * pooledWidth_ + pw; - if (isEmpty) { - outputData[poolIndex] = 0; - argmaxData[poolIndex] = -1; - } + outputData[poolIndex] = isEmpty ? 0 : -FLT_MAX; + argmaxData[poolIndex] = -1; for (size_t h = hstart; h < hend; ++h) { for (size_t w = wstart; w < wend; ++w) { diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp index 42644e9601a82ea81c417adc6441edeb036998e2..56b523f220c2a405851b89db5f63e9aa50bfaaf7 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -315,7 +315,7 @@ TEST(MKLDNNLayer, AddtoLayer) { static void getMKLDNNConcatConfig(TestConfig& cfg, const std::vector& inputs) { - CHECK_GE(inputs.size(), 2) << "at least two inputs"; + CHECK_GE(inputs.size(), 2UL) << "at least two inputs"; int oc = inputs[0].ic; for (size_t i = 1; i < inputs.size(); ++i) { CHECK_EQ(inputs[i].bs, inputs[0].bs); diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt index aed5275dbf9be707cc6e19e729133ba8eab58195..8841c14ee083fccfd2271efd0c331805919a09d9 100644 --- a/paddle/memory/CMakeLists.txt +++ b/paddle/memory/CMakeLists.txt @@ -1,6 +1,6 @@ add_subdirectory(detail) -cc_library(memory SRCS memory.cc DEPS place) +cc_library(memory SRCS memory.cc DEPS place enforce) cc_library(memcpy SRCS memcpy.cc) cc_library(paddle_memory diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index d0fe5b4635174fa0f74658509c4e8ca58a1d056a..a4c4374cf2f8b4b034d05e3a4c2221300a944214 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -73,6 +73,13 @@ function(op_library TARGET) file(APPEND ${pybind_file} "USE_OP(conv2d);\n") endif() + # conv_cudnn_op contains several operators + if ("${TARGET}" STREQUAL "conv_cudnn_op") + set(pybind_flag 1) + # It's enough to just adding one operator to pybind + file(APPEND ${pybind_file} "USE_OP(conv2d_cudnn);\n") + endif() + # pool_op contains several operators if ("${TARGET}" STREQUAL "pool_op") set(pybind_flag 1) @@ -178,12 +185,12 @@ set(DEPS_OPS cond_op cross_entropy_op recurrent_op - dynamic_recurrent_op softmax_with_cross_entropy_op softmax_op sequence_softmax_op sum_op pool_op + maxout_op pool_with_index_op conv_op conv_transpose_op @@ -193,6 +200,7 @@ set(DEPS_OPS lod_rank_table_op lod_tensor_to_array_op array_to_lod_tensor_op + max_sequence_len_op lstm_op tensor_array_read_write_op gru_op @@ -210,10 +218,12 @@ op_library(sgd_op DEPS selected_rows_functor) op_library(adagrad_op DEPS selected_rows_functor) op_library(conv_op DEPS vol2col) op_library(pool_op DEPS pooling) +op_library(maxout_op DEPS maxouting) op_library(pool_with_index_op DEPS pooling) op_library(lod_rank_table_op SRCS lod_rank_table_op.cc DEPS lod_rank_table) op_library(lod_tensor_to_array_op SRCS lod_tensor_to_array_op.cc DEPS lod_rank_table_op) op_library(array_to_lod_tensor_op SRCS array_to_lod_tensor_op.cc DEPS lod_rank_table_op) +op_library(max_sequence_len_op SRCS max_sequence_len_op.cc DEPS lod_rank_table) op_library(tensor_array_read_write_op SRCS tensor_array_read_write_op.cc) if(WITH_GPU) op_library(nccl_op DEPS nccl_common) @@ -223,13 +233,6 @@ op_library(sequence_pool_op DEPS sequence_pooling) op_library(lstm_op DEPS sequence2batch lstm_compute) op_library(conv_transpose_op DEPS vol2col) op_library(gru_op DEPS sequence2batch gru_compute) -if(WITH_TESTING) - op_library(dynamic_recurrent_op SRCS dynamic_recurrent_op.cc rnn/recurrent_op_utils.cc - DEPS net_op tensor_array gtest) -else() - op_library(dynamic_recurrent_op SRCS dynamic_recurrent_op.cc rnn/recurrent_op_utils.cc - DEPS net_op tensor_array) -endif() op_library(recurrent_op SRCS recurrent_op.cc DEPS executor) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) @@ -244,9 +247,6 @@ cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor) cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory) -cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc - rnn/recurrent_op_utils.cc - DEPS dynamic_recurrent_op) if(WITH_GPU) cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) endif() diff --git a/paddle/operators/activation_op.cc b/paddle/operators/activation_op.cc index 83d35a450d0e8ebf5311cdfd948b066642ccec8c..154c618e8e7c4650b7f22684d3357de9c52a416c 100644 --- a/paddle/operators/activation_op.cc +++ b/paddle/operators/activation_op.cc @@ -98,7 +98,6 @@ $y = \max(x, 0)$ } }; -template class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { public: LeakyReluOpMaker(framework::OpProto *proto, @@ -106,8 +105,7 @@ class LeakyReluOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of LeakyRelu operator"); AddOutput("Y", "Output of LeakyRelu operator"); - AddAttr("alpha", "The small negative slope") - .SetDefault(static_cast(0.02f)); + AddAttr("alpha", "The small negative slope").SetDefault(0.02f); AddComment(R"DOC( LeakyRelu Activation Operator. @@ -117,7 +115,6 @@ $y = \max(x, \alpha * x)$ } }; -template class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { public: SoftShrinkOpMaker(framework::OpProto *proto, @@ -125,8 +122,7 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Softshrink operator"); AddOutput("Y", "Output of Softshrink operator"); - AddAttr("lambda", "non-negative offset") - .SetDefault(static_cast(0.5f)); + AddAttr("lambda", "non-negative offset").SetDefault(0.5f); AddComment(R"DOC( Softshrink Activation Operator. @@ -173,7 +169,6 @@ $$y = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ } }; -template class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { public: HardShrinkOpMaker(framework::OpProto *proto, @@ -181,8 +176,8 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of HardShrink operator"); AddOutput("Y", "Output of HardShrink operator"); - AddAttr("threshold", "The value of threshold for HardShrink") - .SetDefault(static_cast(0.5)); + AddAttr("threshold", "The value of threshold for HardShrink") + .SetDefault(0.5f); AddComment(R"DOC( HardShrink Activation Operator. @@ -228,6 +223,51 @@ $y = |x|$ } }; +class CeilOpMaker : public framework::OpProtoAndCheckerMaker { + public: + CeilOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of Ceil operator"); + AddOutput("Y", "Output of Ceil operator"); + AddComment(R"DOC( +Ceil Activation Operator. + +$y = ceil(x)$ + +)DOC"); + } +}; + +class FloorOpMaker : public framework::OpProtoAndCheckerMaker { + public: + FloorOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of Floor operator"); + AddOutput("Y", "Output of Floor operator"); + AddComment(R"DOC( +Floor Activation Operator. + +$y = floor(x)$ + +)DOC"); + } +}; + +class RoundOpMaker : public framework::OpProtoAndCheckerMaker { + public: + RoundOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of Round operator"); + AddOutput("Y", "Output of Round operator"); + AddComment(R"DOC( +Round Activation Operator. + +$y = [x]$ + +)DOC"); + } +}; + class ReciprocalOpMaker : public framework::OpProtoAndCheckerMaker { public: ReciprocalOpMaker(framework::OpProto *proto, @@ -308,17 +348,16 @@ $$y = \frac{x}{1 + |x|}$$ } }; -template class BReluOpMaker : public framework::OpProtoAndCheckerMaker { public: BReluOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of BRelu operator"); AddOutput("Y", "Output of BRelu operator"); - AddAttr("t_min", "The min marginal value of BRelu") - .SetDefault(static_cast(0)); - AddAttr("t_max", "The max marginal value of BRelu") - .SetDefault(static_cast(24)); + AddAttr("t_min", "The min marginal value of BRelu") + .SetDefault(static_cast(0)); + AddAttr("t_max", "The max marginal value of BRelu") + .SetDefault(static_cast(24)); AddComment(R"DOC( BRelu Activation Operator. @@ -328,7 +367,6 @@ $y = \max(\min(x, t_{min}), t_{max})$ } }; -template class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { public: SoftReluOpMaker(framework::OpProto *proto, @@ -336,8 +374,8 @@ class SoftReluOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of SoftRelu operator"); AddOutput("Y", "Output of SoftRelu operator"); - AddAttr("threshold", "The threshold value of SoftRelu") - .SetDefault(static_cast(40)); + AddAttr("threshold", "The threshold value of SoftRelu") + .SetDefault(40.0f); AddComment(R"DOC( SoftRelu Activation Operator. @@ -347,15 +385,13 @@ $y = \ln(1 + \exp(\max(\min(x, threshold), threshold))$ } }; -template class ELUOpMaker : public framework::OpProtoAndCheckerMaker { public: ELUOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of ELU operator"); AddOutput("Y", "Output of ELU operator"); - AddAttr("alpha", "The alpha value of ELU") - .SetDefault(static_cast(1.0f)); + AddAttr("alpha", "The alpha value of ELU").SetDefault(1.0f); AddComment(R"DOC( ELU Activation Operator. @@ -368,15 +404,14 @@ $y = \max(0, x) + \min(0, \alpha * (e^x - 1))$ } }; -template class Relu6OpMaker : public framework::OpProtoAndCheckerMaker { public: Relu6OpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Relu6 operator"); AddOutput("Y", "Output of Relu6 operator"); - AddAttr("threshold", "The threshold value of Relu6") - .SetDefault(static_cast(6)); + AddAttr("threshold", "The threshold value of Relu6") + .SetDefault(6.0f); AddComment(R"DOC( Relu6 Activation Operator. @@ -386,15 +421,13 @@ $y = \min(\max(0, x), 6)$ } }; -template class PowOpMaker : public framework::OpProtoAndCheckerMaker { public: PowOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of Pow operator"); AddOutput("Y", "Output of Pow operator"); - AddAttr("factor", "The exponential factor of Pow") - .SetDefault(static_cast(1)); + AddAttr("factor", "The exponential factor of Pow").SetDefault(1.0f); AddComment(R"DOC( Pow Activation Operator. @@ -404,17 +437,16 @@ $y = x^{factor}$ } }; -template class STanhOpMaker : public framework::OpProtoAndCheckerMaker { public: STanhOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of STanh operator"); AddOutput("Y", "Output of STanh operator"); - AddAttr("scale_a", "The scale parameter of a for the input") - .SetDefault(static_cast(2 / 3)); - AddAttr("scale_b", "The scale parameter of b for the input") - .SetDefault(static_cast(1.7159)); + AddAttr("scale_a", "The scale parameter of a for the input") + .SetDefault(2.0f / 3.0f); + AddAttr("scale_b", "The scale parameter of b for the input") + .SetDefault(1.7159f); AddComment(R"DOC( STanh Activation Operator. @@ -424,7 +456,6 @@ $$y = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ } }; -template class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { public: ThresholdedReluOpMaker(framework::OpProto *proto, @@ -432,8 +463,8 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of ThresholdedRelu operator"); AddOutput("Y", "Output of ThresholdedRelu operator"); - AddAttr("threshold", "The threshold location of activation") - .SetDefault(static_cast(1.0)); + AddAttr("threshold", "The threshold location of activation") + .SetDefault(1.0f); AddComment(R"DOC( ThresholdedRelu Activation Operator. @@ -448,7 +479,6 @@ $$ } }; -template class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { public: HardSigmoidOpMaker(framework::OpProto *proto, @@ -456,10 +486,10 @@ class HardSigmoidOpMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input of HardSigmoid operator"); AddOutput("Y", "Output of HardSigmoid operator"); - AddAttr("slope", "Slope for linear approximation of sigmoid") - .SetDefault(static_cast(0.2)); - AddAttr("offset", "Offset for linear approximation of sigmoid") - .SetDefault(static_cast(0.5)); + AddAttr("slope", "Slope for linear approximation of sigmoid") + .SetDefault(0.2f); + AddAttr("offset", "Offset for linear approximation of sigmoid") + .SetDefault(0.5f); AddComment(R"DOC( HardSigmoid Activation Operator. @@ -499,7 +529,7 @@ REGISTER_OP(tanh, ops::ActivationOp, ops::TanhOpMaker, tanh_grad, REGISTER_OP(tanh_shrink, ops::ActivationOp, ops::TanhShrinkOpMaker, tanh_shrink_grad, ops::ActivationOpGrad); -REGISTER_OP(softshrink, ops::ActivationOp, ops::SoftShrinkOpMaker, +REGISTER_OP(softshrink, ops::ActivationOp, ops::SoftShrinkOpMaker, softshrink_grad, ops::ActivationOpGrad); REGISTER_OP(sqrt, ops::ActivationOp, ops::SqrtOpMaker, sqrt_grad, @@ -508,6 +538,15 @@ REGISTER_OP(sqrt, ops::ActivationOp, ops::SqrtOpMaker, sqrt_grad, REGISTER_OP(abs, ops::ActivationOp, ops::AbsOpMaker, abs_grad, ops::ActivationOpGrad); +REGISTER_OP(ceil, ops::ActivationOp, ops::CeilOpMaker, ceil_grad, + ops::ActivationOpGrad); + +REGISTER_OP(floor, ops::ActivationOp, ops::FloorOpMaker, floor_grad, + ops::ActivationOpGrad); + +REGISTER_OP(round, ops::ActivationOp, ops::RoundOpMaker, round_grad, + ops::ActivationOpGrad); + REGISTER_OP(reciprocal, ops::ActivationOp, ops::ReciprocalOpMaker, reciprocal_grad, ops::ActivationOpGrad); @@ -523,35 +562,34 @@ REGISTER_OP(softplus, ops::ActivationOp, ops::SoftplusOpMaker, softplus_grad, REGISTER_OP(softsign, ops::ActivationOp, ops::SoftsignOpMaker, softsign_grad, ops::ActivationOpGrad); -REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker, brelu_grad, +REGISTER_OP(brelu, ops::ActivationOp, ops::BReluOpMaker, brelu_grad, ops::ActivationOpGrad); -REGISTER_OP(leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker, +REGISTER_OP(leaky_relu, ops::ActivationOp, ops::LeakyReluOpMaker, leaky_relu_grad, ops::ActivationOpGrad); -REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker, - soft_relu_grad, ops::ActivationOpGrad); +REGISTER_OP(soft_relu, ops::ActivationOp, ops::SoftReluOpMaker, soft_relu_grad, + ops::ActivationOpGrad); -REGISTER_OP(elu, ops::ActivationOp, ops::ELUOpMaker, elu_grad, +REGISTER_OP(elu, ops::ActivationOp, ops::ELUOpMaker, elu_grad, ops::ActivationOpGrad); -REGISTER_OP(relu6, ops::ActivationOp, ops::Relu6OpMaker, relu6_grad, +REGISTER_OP(relu6, ops::ActivationOp, ops::Relu6OpMaker, relu6_grad, ops::ActivationOpGrad); -REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker, pow_grad, +REGISTER_OP(pow, ops::ActivationOp, ops::PowOpMaker, pow_grad, ops::ActivationOpGrad); -REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker, stanh_grad, +REGISTER_OP(stanh, ops::ActivationOp, ops::STanhOpMaker, stanh_grad, ops::ActivationOpGrad); -REGISTER_OP(hard_shrink, ops::ActivationOp, ops::HardShrinkOpMaker, +REGISTER_OP(hard_shrink, ops::ActivationOp, ops::HardShrinkOpMaker, hard_shrink_grad, ops::ActivationOpGrad); -REGISTER_OP(thresholded_relu, ops::ActivationOp, - ops::ThresholdedReluOpMaker, thresholded_relu_grad, - ops::ActivationOpGrad); +REGISTER_OP(thresholded_relu, ops::ActivationOp, ops::ThresholdedReluOpMaker, + thresholded_relu_grad, ops::ActivationOpGrad); -REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker, +REGISTER_OP(hard_sigmoid, ops::ActivationOp, ops::HardSigmoidOpMaker, hard_sigmoid_grad, ops::ActivationOpGrad); #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \ diff --git a/paddle/operators/activation_op.h b/paddle/operators/activation_op.h index ceb4b4e40b67473f42e67e3f02f8e012e1b1eb50..8cd3bfbbd3f8f3210f94aef3a1586c8295730c1d 100644 --- a/paddle/operators/activation_op.h +++ b/paddle/operators/activation_op.h @@ -283,6 +283,41 @@ struct SqrtGradFunctor : public BaseActivationFunctor { } }; +// ceil(x) = ceiling(x) +template +struct CeilFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Y y) const { + y.device(d) = x.ceil(); + } +}; + +template +struct ZeroGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Y y, dY dy, dX dx) const { + dx.device(d) = static_cast(0) / x; + } +}; + +// floor(x) = flooring(x) +template +struct FloorFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Y y) const { + y.device(d) = x.ceil(); + } +}; + +// round(x) = [x] +template +struct RoundFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Y y) const { + y.device(d) = x.round(); + } +}; + // abs(x) = |x| template struct AbsFunctor : public BaseActivationFunctor { @@ -677,6 +712,9 @@ struct HardSigmoidGradFunctor : public BaseActivationFunctor { __macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ __macro(sqrt, SqrtFunctor, SqrtGradFunctor); \ __macro(abs, AbsFunctor, AbsGradFunctor); \ + __macro(ceil, CeilFunctor, ZeroGradFunctor); \ + __macro(floor, FloorFunctor, ZeroGradFunctor); \ + __macro(round, RoundFunctor, ZeroGradFunctor); \ __macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ __macro(log, LogFunctor, LogGradFunctor); \ __macro(square, SquareFunctor, SquareGradFunctor); \ diff --git a/paddle/operators/adadelta_op.cc b/paddle/operators/adadelta_op.cc index b717e1647e4b89285b841420650dc69e8a1e0c58..16a7794d5b7bf1d56cd9f5874454c41cab43b41f 100644 --- a/paddle/operators/adadelta_op.cc +++ b/paddle/operators/adadelta_op.cc @@ -109,4 +109,5 @@ paramOut = param + paramUpdate$$ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(adadelta, ops::AdadeltaOp, ops::AdadeltaOpMaker); REGISTER_OP_CPU_KERNEL( - adadelta, ops::AdadeltaOpKernel); + adadelta, ops::AdadeltaOpKernel, + ops::AdadeltaOpKernel); diff --git a/paddle/operators/adadelta_op.cu b/paddle/operators/adadelta_op.cu index 3af1c8c8e9861138a33b3156818f704c3b20363f..9fb61852071f11670b8bc51321bb0881de196777 100644 --- a/paddle/operators/adadelta_op.cu +++ b/paddle/operators/adadelta_op.cu @@ -17,4 +17,5 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( - adadelta, ops::AdadeltaOpKernel); + adadelta, ops::AdadeltaOpKernel, + ops::AdadeltaOpKernel); diff --git a/paddle/operators/adadelta_op.h b/paddle/operators/adadelta_op.h index d29e15c43583bd447fbacb548a326f303f7d1463..a8c5f0c8aa20ce506f5279fa696079ba64034bd5 100644 --- a/paddle/operators/adadelta_op.h +++ b/paddle/operators/adadelta_op.h @@ -33,8 +33,8 @@ class AdadeltaOpKernel : public framework::OpKernel { avg_squared_grad_out_tensor->mutable_data(ctx.GetPlace()); avg_squared_update_out_tensor->mutable_data(ctx.GetPlace()); - float rho = ctx.Attr("rho"); - float epsilon = ctx.Attr("epsilon"); + T rho = static_cast(ctx.Attr("rho")); + T epsilon = static_cast(ctx.Attr("epsilon")); auto param = framework::EigenVector::Flatten( *ctx.Input("Param")); diff --git a/paddle/operators/adagrad_op.cu b/paddle/operators/adagrad_op.cu index 5b869e6bc5f4604ba6055ffd62fa21e4a1f41b93..1c870214b29dbfcabb7414317b1214d6bef369cb 100644 --- a/paddle/operators/adagrad_op.cu +++ b/paddle/operators/adagrad_op.cu @@ -14,8 +14,8 @@ #define EIGEN_USE_GPU #include "paddle/operators/adagrad_op.h" -#include "paddle/operators/math/selected_rows_functor.h" #include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/selected_rows_functor.h" #include "paddle/platform/cuda_helper.h" namespace paddle { @@ -134,8 +134,8 @@ struct SparseAdagradFunctor { T, 256><<(context) .stream()>>>(grad_merge_data, grad_merge->rows().data(), - lr, param_data, - moment_data, grad_width, epsilon); + lr, param_data, moment_data, grad_width, + epsilon); } }; diff --git a/paddle/operators/adam_op.cc b/paddle/operators/adam_op.cc index 97a091ae766abfba5412bbd32c34a6f80701fbf7..03faa2a7c5a486cb0d2b6f2f10d140eeb4c6c04e 100644 --- a/paddle/operators/adam_op.cc +++ b/paddle/operators/adam_op.cc @@ -127,4 +127,5 @@ paramOut = param - learningRate * moment_1/ ($\sqrt{(moment_2)} + \epsilon)$$ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(adam, ops::AdamOp, ops::AdamOpMaker); REGISTER_OP_CPU_KERNEL(adam, - ops::AdamOpKernel); + ops::AdamOpKernel, + ops::AdamOpKernel); diff --git a/paddle/operators/adam_op.cu b/paddle/operators/adam_op.cu index a3def912e540454275350209435eb01ae2151331..6e34f7818ce20c75692fe21776721ce200b7a147 100644 --- a/paddle/operators/adam_op.cu +++ b/paddle/operators/adam_op.cu @@ -17,4 +17,5 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(adam, - ops::AdamOpKernel); + ops::AdamOpKernel, + ops::AdamOpKernel); diff --git a/paddle/operators/adam_op.h b/paddle/operators/adam_op.h index 45938006db1231a7a134964d729df6ca114d4dbe..7f7fa1da1c0d8d81d1bcb18a1bf542838eddccf7 100644 --- a/paddle/operators/adam_op.h +++ b/paddle/operators/adam_op.h @@ -31,9 +31,9 @@ class AdamOpKernel : public framework::OpKernel { moment1_out_tensor->mutable_data(ctx.GetPlace()); moment2_out_tensor->mutable_data(ctx.GetPlace()); - float beta1 = ctx.Attr("beta1"); - float beta2 = ctx.Attr("beta2"); - float epsilon = ctx.Attr("epsilon"); + T beta1 = static_cast(ctx.Attr("beta1")); + T beta2 = static_cast(ctx.Attr("beta2")); + T epsilon = static_cast(ctx.Attr("epsilon")); auto param = framework::EigenVector::Flatten( *ctx.Input("Param")); diff --git a/paddle/operators/adamax_op.cc b/paddle/operators/adamax_op.cc index 14cf3841b33a8153549e4c99ed2b75286e9c64db..d5bbc672e18f392d6a91383b919fefc4b2d8ff0e 100644 --- a/paddle/operators/adamax_op.cc +++ b/paddle/operators/adamax_op.cc @@ -126,4 +126,5 @@ division by 0 error. namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(adamax, ops::AdamaxOp, ops::AdamaxOpMaker); REGISTER_OP_CPU_KERNEL(adamax, - ops::AdamaxOpKernel); + ops::AdamaxOpKernel, + ops::AdamaxOpKernel); diff --git a/paddle/operators/adamax_op.cu b/paddle/operators/adamax_op.cu index fee3b6fc6b656917d79b84f48da8e63be7683890..057ef39025aa23704457ef7bbe54934d06cdc87f 100644 --- a/paddle/operators/adamax_op.cu +++ b/paddle/operators/adamax_op.cu @@ -17,4 +17,5 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(adamax, - ops::AdamaxOpKernel); + ops::AdamaxOpKernel, + ops::AdamaxOpKernel); diff --git a/paddle/operators/adamax_op.h b/paddle/operators/adamax_op.h index 2c99832ec08e9c1d9b5458c467d5238f9b1b3c37..bf36ed78604dd88c537db51fbeb38f43d0c46173 100644 --- a/paddle/operators/adamax_op.h +++ b/paddle/operators/adamax_op.h @@ -31,9 +31,9 @@ class AdamaxOpKernel : public framework::OpKernel { moment_out_tensor->mutable_data(ctx.GetPlace()); inf_norm_out_tensor->mutable_data(ctx.GetPlace()); - float beta1 = ctx.Attr("beta1"); - float beta2 = ctx.Attr("beta2"); - float epsilon = ctx.Attr("epsilon"); + T beta1 = static_cast(ctx.Attr("beta1")); + T beta2 = static_cast(ctx.Attr("beta2")); + T epsilon = static_cast(ctx.Attr("epsilon")); auto param = framework::EigenVector::Flatten( *ctx.Input("Param")); diff --git a/paddle/operators/array_operator.h b/paddle/operators/array_operator.h index 233a81198e336d3190565fb18556f96979cec0ce..1f2b4fdb4b4a99d5baf5de1cc226dc196ab4eb2e 100644 --- a/paddle/operators/array_operator.h +++ b/paddle/operators/array_operator.h @@ -36,7 +36,7 @@ class ArrayOp : public framework::OperatorBase { if (platform::is_gpu_place(i_tensor.place())) { // FIXME: Avoid copy from GPU to CPU framework::Tensor t; - t.CopyFrom(i_tensor, platform::CPUPlace(), dev_ctx); + framework::CopyFrom(i_tensor, platform::CPUPlace(), dev_ctx, &t); dev_ctx.Wait(); offset = static_cast(*t.data()); } else { diff --git a/paddle/operators/array_to_lod_tensor_op.cc b/paddle/operators/array_to_lod_tensor_op.cc index c0903bb4e5ca7f160e19eefab99af7e3e4a8ed76..faeba7f3ed26d05de16775a1de4d42f802111207 100644 --- a/paddle/operators/array_to_lod_tensor_op.cc +++ b/paddle/operators/array_to_lod_tensor_op.cc @@ -102,8 +102,9 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { if (len == 0) { continue; } - out->Slice(out_offset, out_offset + len) - .CopyFrom(x[x_idx].Slice(start_offset, end_offset), place, dev_ctx); + auto slice = out->Slice(out_offset, out_offset + len); + framework::CopyFrom(x[x_idx].Slice(start_offset, end_offset), place, + dev_ctx, &slice); out_offset += len; } } diff --git a/paddle/operators/assign_op.cc b/paddle/operators/assign_op.cc index 609e915b932e2bc4d5abee1e5f868cc07a7619d3..0a37f18729a93b15623c0a17e3689e518c38b844 100644 --- a/paddle/operators/assign_op.cc +++ b/paddle/operators/assign_op.cc @@ -43,7 +43,8 @@ class AssignFunctor { out_rows.set_rows(rows.rows()); out_rows.set_height(rows.height()); auto &t = rows.value(); - out_rows.mutable_value()->CopyFrom(t, t.place(), dev_ctx_); + auto *m = out_rows.mutable_value(); + framework::CopyFrom(t, t.place(), dev_ctx_, m); } template @@ -55,7 +56,7 @@ class AssignFunctor { void copy_tensor(const framework::LoDTensor &lod_tensor, framework::LoDTensor *out) const { auto &out_tensor = *out; - out_tensor.CopyFrom(lod_tensor, lod_tensor.place(), dev_ctx_); + CopyFrom(lod_tensor, lod_tensor.place(), dev_ctx_, &out_tensor); out_tensor.set_lod(lod_tensor.lod()); } diff --git a/paddle/operators/beam_search_decode_op.cc b/paddle/operators/beam_search_decode_op.cc index 3904a97d58166cfeeb2be7d2144700dbd8bc5721..c796a0c5d089499e7858c7a427825fdbeb05cb7f 100644 --- a/paddle/operators/beam_search_decode_op.cc +++ b/paddle/operators/beam_search_decode_op.cc @@ -17,6 +17,36 @@ limitations under the License. */ namespace paddle { namespace operators { +struct BeamSearchDecodeFunctor { + BeamSearchDecodeFunctor(const LoDTensorArray& step_ids, + const LoDTensorArray& step_scores, + LoDTensor* id_tensor, LoDTensor* score_tensor) + : step_ids_(step_ids), + step_scores_(step_scores), + id_tensor_(id_tensor), + score_tensor_(score_tensor) {} + + template + void operator()() const; + + const LoDTensorArray& step_ids_; + const LoDTensorArray& step_scores_; + LoDTensor* id_tensor_; + LoDTensor* score_tensor_; +}; + +template +void BeamSearchDecodeFunctor::operator()() const { + BeamSearchDecoder beam_search_decoder; + beam_search_decoder.PackAllSteps(step_ids_, step_scores_, id_tensor_, + score_tensor_); +} + +template <> +void BeamSearchDecodeFunctor::operator()() const { + PADDLE_THROW("beam search decode op does not support bool!"); +} + class BeamSearchDecodeOp : public framework::OperatorBase { public: BeamSearchDecodeOp(const std::string& type, @@ -45,9 +75,9 @@ class BeamSearchDecodeOp : public framework::OperatorBase { LoDTensor* sentenceIds = ctx.Output("SentenceIds"); LoDTensor* sentenceScores = ctx.Output("SentenceScores"); - BeamSearchDecoder beam_search_decoder; - beam_search_decoder.PackAllSteps(*ids, *scores, sentenceIds, - sentenceScores); + framework::VisitDataType( + framework::ToDataType(scores->at(0).type()), + BeamSearchDecodeFunctor(*ids, *scores, sentenceIds, sentenceScores)); } }; diff --git a/paddle/operators/beam_search_decode_op.h b/paddle/operators/beam_search_decode_op.h index 0f007ec22f9a66572971516a711317f348e1ec5a..3b1c6cd7a1045bfbb896725c79dc1ae2e22f43dc 100644 --- a/paddle/operators/beam_search_decode_op.h +++ b/paddle/operators/beam_search_decode_op.h @@ -232,12 +232,12 @@ void BeamSearchDecoder::ConvertSentenceVectorToLodTensor( id_tensor->set_lod(lod); id_tensor->Resize({static_cast(id_data.size())}); id_tensor->mutable_data(paddle::platform::CPUPlace()); - id_tensor->CopyFromVector(id_data, cpu_ctx); + framework::CopyFromVector(id_data, cpu_ctx, id_tensor); score_tensor->set_lod(lod); score_tensor->Resize({static_cast(score_data.size())}); score_tensor->mutable_data(paddle::platform::CPUPlace()); - score_tensor->CopyFromVector(score_data, cpu_ctx); + framework::CopyFromVector(score_data, cpu_ctx, score_tensor); } template diff --git a/paddle/operators/beam_search_op.cc b/paddle/operators/beam_search_op.cc index 17926a813d5b0b8ace6a1b20066cd0007703c696..8c3e2a303fb8f12a8886c11cf112b859a6db7bcf 100644 --- a/paddle/operators/beam_search_op.cc +++ b/paddle/operators/beam_search_op.cc @@ -139,7 +139,7 @@ bool BeamSearch::NextItemSet(std::vector *items) { items->reserve(framework::product(ids.dims())); for (size_t offset = abs_lod[lod_level_][sent_offset_]; offset < abs_lod[lod_level_][sent_offset_ + 1]; offset++) { - for (int d = 0; d < instance_dim; d++) { + for (size_t d = 0; d < instance_dim; d++) { const size_t dim_offset = offset * instance_dim + d; items->emplace_back(offset, ids_data[dim_offset], scores_data[dim_offset]); diff --git a/paddle/operators/bilinear_tensor_product_op.cc b/paddle/operators/bilinear_tensor_product_op.cc index c65ba7eb262f3aabe2c00837b79806c0b40b60fd..c88b2c9beb4497b617078c8ac5582d2f246f43fd 100644 --- a/paddle/operators/bilinear_tensor_product_op.cc +++ b/paddle/operators/bilinear_tensor_product_op.cc @@ -77,11 +77,19 @@ class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("Out", "The output of bilinear_tensor_product operator."); AddComment(R"DOC( Bilinear Tensor Product operator. -Given input X and Y, a 3D tensor weight, and bias. Each column of the -output is computed by one slice i = 1, . . . , k of the tensor: - - M = (X W_i) \cdot Y - Out_i = \sum_i {M_i} + Bias_i +Given input X and Y, a 3D tensor Weight and a Bias. Each column of the +Output is computed by one slice $i = 1, . . . , k$ of the tensor: + +$$ +M = (X W_i) * Y \\ +Out_i = \sum_j {M_j} + Bias_i +$$ + +Where $W_i$ is the $i$-th slice of Input(Weight); + $M_j$ is the $j$-th column of $M$; + $Out_i$ is the $i$-th column of Output(Out); + $Bias_i$ is a column vector, each element of it is equal to + the $i$-th element of $Bias$; )DOC"); } diff --git a/paddle/operators/cast_op.cc b/paddle/operators/cast_op.cc index 70ee7861bab3a982eae60dd85b10c2e41f5827d0..3082a53ccfbe4f8666cfdfc2efed6b46ffdfede9 100644 --- a/paddle/operators/cast_op.cc +++ b/paddle/operators/cast_op.cc @@ -25,8 +25,8 @@ class CastOpProtoMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The input tensor of cast op"); AddOutput("Out", "The output tensor of cast op"); - AddAttr("out_data_type", "output data type"); - AddAttr("in_data_type", "input data type"); + AddAttr("out_dtype", "output data type"); + AddAttr("in_dtype", "input data type"); AddComment(R"DOC( Cast Operator. @@ -58,8 +58,8 @@ class CastOpGradMaker : public framework::SingleGradOpDescMaker { grad->SetType("cast"); grad->SetInput("X", OutputGrad("Out")); grad->SetOutput("Out", InputGrad("X")); - grad->SetAttr("out_data_type", GetAttr("in_data_type")); - grad->SetAttr("in_data_type", GetAttr("out_data_type")); + grad->SetAttr("out_dtype", GetAttr("in_dtype")); + grad->SetAttr("in_dtype", GetAttr("out_dtype")); return std::unique_ptr(grad); } }; diff --git a/paddle/operators/cast_op.h b/paddle/operators/cast_op.h index ffdbff7030afedab2efc06479ac86ad70c185f48..850dc8e3498351e54d41fcd2b6596c6fe668df14 100644 --- a/paddle/operators/cast_op.h +++ b/paddle/operators/cast_op.h @@ -55,7 +55,7 @@ class CastOpKernel : public framework::OpKernel { auto* in = context.Input("X"); auto* out = context.Output("Out"); framework::VisitDataType( - static_cast(context.Attr("out_data_type")), + static_cast(context.Attr("out_dtype")), CastOpFunctor(in, out, context.device_context())); } }; diff --git a/paddle/operators/conv_cudnn_op.cc b/paddle/operators/conv_cudnn_op.cc index 4c65b60d2349d2989128f4b1da705ea18391b8a3..0dd8c13b2ad6ff206066ccb98a4c009e4c3b4fd0 100644 --- a/paddle/operators/conv_cudnn_op.cc +++ b/paddle/operators/conv_cudnn_op.cc @@ -17,10 +17,10 @@ namespace paddle { namespace operators { -class CudnnConvOpMaker : public Conv2DOpMaker { +class CudnnConv2DOpMaker : public Conv2DOpMaker { public: - CudnnConvOpMaker(framework::OpProto* proto, - framework::OpAttrChecker* op_checker) + CudnnConv2DOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) : Conv2DOpMaker(proto, op_checker) { AddAttr("workspace_size_MB", "workspace size for cudnn, in MB, " @@ -32,15 +32,43 @@ class CudnnConvOpMaker : public Conv2DOpMaker { } }; +class CudnnConv3DOpMaker : public Conv3DOpMaker { + public: + CudnnConv3DOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : Conv3DOpMaker(proto, op_checker) { + AddAttr("workspace_size_MB", + "workspace size for cudnn, in MB, " + "workspace is a section of GPU memory which will be " + "allocated/freed each time the operator runs, larger " + "workspace size can increase performance but also requires " + "better hardware. This size should be chosen carefully.") + .SetDefault(4096); + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP(conv_cudnn, ops::ConvOp, ops::CudnnConvOpMaker, conv_cudnn_grad, - ops::ConvOpGrad); +REGISTER_OP(conv2d_cudnn, ops::ConvOp, ops::CudnnConv2DOpMaker, + conv2d_cudnn_grad, ops::ConvOpGrad); + +REGISTER_OP(conv3d_cudnn, ops::ConvOp, ops::CudnnConv3DOpMaker, + conv3d_cudnn_grad, ops::ConvOpGrad); + +REGISTER_OP_CPU_KERNEL(conv2d_cudnn, + ops::GemmConvKernel, + ops::GemmConvKernel); +REGISTER_OP_CPU_KERNEL( + conv2d_cudnn_grad, + ops::GemmConvGradKernel, + ops::GemmConvGradKernel); -REGISTER_OP_CPU_KERNEL(conv_cudnn, - ops::GemmConvKernel); +REGISTER_OP_CPU_KERNEL(conv3d_cudnn, + ops::GemmConvKernel, + ops::GemmConvKernel); REGISTER_OP_CPU_KERNEL( - conv_cudnn_grad, - ops::GemmConvGradKernel); + conv3d_cudnn_grad, + ops::GemmConvGradKernel, + ops::GemmConvGradKernel); diff --git a/paddle/operators/conv_cudnn_op.cu.cc b/paddle/operators/conv_cudnn_op.cu.cc index 4900f7b086c869b496c492743c71ab7047c5f672..a9763d424801cfced5fe4c4718a335a24b81cfdc 100644 --- a/paddle/operators/conv_cudnn_op.cu.cc +++ b/paddle/operators/conv_cudnn_op.cu.cc @@ -56,6 +56,21 @@ class CudnnConvOpKernel : public framework::OpKernel { ScopedFilterDescriptor filter_desc; ScopedConvolutionDescriptor conv_desc; DataLayout layout = DataLayout::kNCHW; + if (input->dims().size() == 5) { + layout = DataLayout::kNCDHW; + } + + cudnnConvolutionDescriptor_t cudnn_conv_desc = + conv_desc.descriptor(paddings, strides, dilations); + +#if CUDNN_VERSION_MIN(7, 0, 0) + // cudnn 7 can support groups, no need to do it mannually + // FIXME(typhoonzero): find a better way to disable groups + // rather than setting it to 1. + PADDLE_ENFORCE(platform::dynload::cudnnSetConvolutionGroupCount( + cudnn_conv_desc, groups)); + groups = 1; +#endif cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( layout, framework::vectorize2int(input->dims()), groups); @@ -63,19 +78,34 @@ class CudnnConvOpKernel : public framework::OpKernel { layout, framework::vectorize2int(output->dims()), groups); cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor( layout, framework::vectorize2int(filter->dims()), groups); - cudnnConvolutionDescriptor_t cudnn_conv_desc = - conv_desc.descriptor(paddings, strides, dilations); int input_channels = input->dims()[1]; - int input_height = input->dims()[2]; - int input_width = input->dims()[3]; - int output_channels = output->dims()[1]; - int output_height = output->dims()[2]; - int output_width = output->dims()[3]; + int input_height, input_width, input_depth; + if (input->dims().size() == 5) { + input_depth = input->dims()[2]; + input_height = input->dims()[3]; + input_width = input->dims()[4]; + } else { // dim size is enforced in InferShape + input_depth = 1; + input_height = input->dims()[2]; + input_width = input->dims()[3]; + } + int output_channels = filter->dims()[0]; + int output_height, output_width, output_depth; + if (output->dims().size() == 5) { + output_depth = output->dims()[2]; + output_height = output->dims()[3]; + output_width = output->dims()[4]; + } else { + output_depth = 1; + output_height = output->dims()[2]; + output_width = output->dims()[3]; + } - int group_offset_in = input_channels / groups * input_height * input_width; + int group_offset_in = + input_channels / groups * input_height * input_width * input_depth; int group_offset_out = - output_channels / groups * output_height * output_width; + output_channels / groups * output_height * output_width * output_depth; int group_offset_filter = filter->numel() / groups; // ------------------- cudnn conv workspace --------------------- void* cudnn_workspace = nullptr; @@ -138,12 +168,26 @@ class CudnnConvGradOpKernel : public framework::OpKernel { // ------------------- cudnn descriptors --------------------- ScopedTensorDescriptor input_desc; ScopedTensorDescriptor output_grad_desc; - ScopedTensorDescriptor input_grad_desc; ScopedFilterDescriptor filter_desc; ScopedFilterDescriptor filter_grad_desc; ScopedConvolutionDescriptor conv_desc; DataLayout layout = DataLayout::kNCHW; + if (input->dims().size() == 5) { + layout = DataLayout::kNCDHW; + } + + cudnnConvolutionDescriptor_t cudnn_conv_desc = + conv_desc.descriptor(paddings, strides, dilations); + +#if CUDNN_VERSION_MIN(7, 0, 0) + // cudnn 7 can support groups, no need to do it mannually + // FIXME(typhoonzero): find a better way to disable groups + // rather than setting it to 1. + PADDLE_ENFORCE(platform::dynload::cudnnSetConvolutionGroupCount( + cudnn_conv_desc, groups)); + groups = 1; +#endif cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( layout, framework::vectorize2int(input->dims()), groups); @@ -152,22 +196,35 @@ class CudnnConvGradOpKernel : public framework::OpKernel { layout, framework::vectorize2int(output_grad->dims()), groups); cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor( layout, framework::vectorize2int(filter->dims()), groups); - cudnnTensorDescriptor_t cudnn_input_grad_desc = nullptr; - cudnnFilterDescriptor_t cudnn_filter_grad_desc = nullptr; - - cudnnConvolutionDescriptor_t cudnn_conv_desc = - conv_desc.descriptor(paddings, strides, dilations); int input_channels = input->dims()[1]; - int input_height = input->dims()[2]; - int input_width = input->dims()[3]; + int input_height, input_width, input_depth; + if (input->dims().size() == 5) { + input_depth = input->dims()[2]; + input_height = input->dims()[3]; + input_width = input->dims()[4]; + } else { // dim size is enforced in InferShape + input_depth = 1; + input_height = input->dims()[2]; + input_width = input->dims()[3]; + } + int output_grad_channels = filter->dims()[0]; - int output_grad_height = output_grad->dims()[2]; - int output_grad_width = output_grad->dims()[3]; + int output_grad_height, output_grad_width, output_grad_depth; + if (input->dims().size() == 5) { + output_grad_depth = output_grad->dims()[2]; + output_grad_height = output_grad->dims()[3]; + output_grad_width = output_grad->dims()[4]; + } else { + output_grad_depth = 1; + output_grad_height = output_grad->dims()[2]; + output_grad_width = output_grad->dims()[3]; + } - int group_offset_in = input_channels / groups * input_height * input_width; - int group_offset_out = - output_grad_channels / groups * output_grad_height * output_grad_width; + int group_offset_in = + input_channels / groups * input_height * input_width * input_depth; + int group_offset_out = output_grad_channels / groups * output_grad_height * + output_grad_width * output_grad_depth; int group_offset_filter = filter->numel() / groups; // ------------------- cudnn backward algorithm --------------------- cudnnConvolutionBwdDataAlgo_t data_algo; @@ -180,8 +237,6 @@ class CudnnConvGradOpKernel : public framework::OpKernel { auto handle = ctx.cuda_device_context().cudnn_handle(); if (input_grad) { - cudnn_input_grad_desc = input_grad_desc.descriptor( - layout, framework::vectorize2int(input_grad->dims()), groups); PADDLE_ENFORCE( platform::dynload::cudnnGetConvolutionBackwardDataAlgorithm( handle, cudnn_filter_desc, @@ -190,19 +245,17 @@ class CudnnConvGradOpKernel : public framework::OpKernel { cudnn_output_grad_desc, cudnn_conv_desc, // dxDesc: Handle to the previously initialized output tensor // descriptor. - cudnn_input_grad_desc, + cudnn_input_desc, CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, workspace_size_limit, &data_algo)); PADDLE_ENFORCE( platform::dynload::cudnnGetConvolutionBackwardDataWorkspaceSize( handle, cudnn_filter_desc, cudnn_output_grad_desc, - cudnn_conv_desc, cudnn_input_grad_desc, data_algo, &tmp_size)); + cudnn_conv_desc, cudnn_input_desc, data_algo, &tmp_size)); workspace_size_in_bytes = std::max(workspace_size_in_bytes, tmp_size); } if (filter_grad) { - cudnn_filter_grad_desc = filter_grad_desc.descriptor( - layout, framework::vectorize2int(filter_grad->dims()), groups); PADDLE_ENFORCE( platform::dynload::cudnnGetConvolutionBackwardFilterAlgorithm( handle, cudnn_input_desc, cudnn_output_grad_desc, cudnn_conv_desc, @@ -222,7 +275,6 @@ class CudnnConvGradOpKernel : public framework::OpKernel { platform::GPUPlace gpu = boost::get(ctx.GetPlace()); cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes); // ------------------- cudnn conv backward data --------------------- - // FIXME(typhoonzero): template type T may not be the same as cudnn call. T alpha = 1.0f, beta = 0.0f; if (input_grad) { T* input_grad_data = input_grad->mutable_data(ctx.GetPlace()); @@ -233,21 +285,20 @@ class CudnnConvGradOpKernel : public framework::OpKernel { handle, &alpha, cudnn_filter_desc, filter_data + i * group_offset_filter, cudnn_output_grad_desc, output_grad_data + i * group_offset_out, cudnn_conv_desc, data_algo, - cudnn_workspace, workspace_size_in_bytes, &beta, - cudnn_input_grad_desc, input_grad_data + i * group_offset_in)); + cudnn_workspace, workspace_size_in_bytes, &beta, cudnn_input_desc, + input_grad_data + i * group_offset_in)); } } // ------------------- cudnn conv backward filter --------------------- if (filter_grad) { T* filter_grad_data = filter_grad->mutable_data(ctx.GetPlace()); // Because beta is zero, it is unnecessary to reset filter_grad. - for (int i = 0; i < groups; i++) { PADDLE_ENFORCE(platform::dynload::cudnnConvolutionBackwardFilter( handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in, cudnn_output_grad_desc, output_grad_data + i * group_offset_out, cudnn_conv_desc, filter_algo, cudnn_workspace, - workspace_size_in_bytes, &beta, cudnn_filter_grad_desc, + workspace_size_in_bytes, &beta, cudnn_filter_desc, filter_grad_data + i * group_offset_filter)); } } @@ -259,6 +310,16 @@ class CudnnConvGradOpKernel : public framework::OpKernel { } // namespace operators } // namespace paddle -REGISTER_OP_GPU_KERNEL(conv_cudnn, paddle::operators::CudnnConvOpKernel); -REGISTER_OP_GPU_KERNEL(conv_cudnn_grad, - paddle::operators::CudnnConvGradOpKernel); +REGISTER_OP_GPU_KERNEL(conv2d_cudnn, + paddle::operators::CudnnConvOpKernel, + paddle::operators::CudnnConvOpKernel); +REGISTER_OP_GPU_KERNEL(conv2d_cudnn_grad, + paddle::operators::CudnnConvGradOpKernel, + paddle::operators::CudnnConvGradOpKernel); + +REGISTER_OP_GPU_KERNEL(conv3d_cudnn, + paddle::operators::CudnnConvOpKernel, + paddle::operators::CudnnConvOpKernel); +REGISTER_OP_GPU_KERNEL(conv3d_cudnn_grad, + paddle::operators::CudnnConvGradOpKernel, + paddle::operators::CudnnConvGradOpKernel); diff --git a/paddle/operators/conv_op.h b/paddle/operators/conv_op.h index fac5f1d0e25fe205f89fc7eeb9fadfd8431517d5..09bff0a68db82aa723dc08aa83c775910e17c5b8 100644 --- a/paddle/operators/conv_op.h +++ b/paddle/operators/conv_op.h @@ -38,7 +38,7 @@ inline bool IsExpand(std::vector& filter_dim, std::vector& dilations) { bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true; for (size_t j = 0; j < strides.size(); ++j) { - filter_1 = filter_1 && (static_cast(filter_dim[j]) == 1); + filter_1 = filter_1 && (static_cast(filter_dim[j + 2]) == 1); strides_1 = strides_1 && (strides[j] == 1); padding_0 = padding_0 && (paddings[j] == 0); dilation_1 = dilation_1 && (dilations[j] == 1); @@ -91,32 +91,28 @@ class GemmConvKernel : public framework::OpKernel { const int batch_size = static_cast(input->dims()[0]); - // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w} + // filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h, k_w} std::vector filter_shape_vec(framework::vectorize(filter.dims())); - filter_shape_vec.erase(filter_shape_vec.begin(), - filter_shape_vec.begin() + 2); - - // output_shape_vec: {o_h, o_w} or {o_d, o_h, o_w} + // output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h, o_w} std::vector output_shape_vec(framework::vectorize(output->dims())); - output_shape_vec.erase(output_shape_vec.begin(), - output_shape_vec.begin() + 2); // use col_shape in the im2col calculation // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h, k_w, o_d, // o_h, o_w} - std::vector col_shape_vec; - col_shape_vec.push_back(input->dims()[1] / groups); - col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(), - filter_shape_vec.end()); - col_shape_vec.insert(col_shape_vec.end(), output_shape_vec.begin(), - output_shape_vec.end()); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = input->dims()[1] / groups; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; + } framework::DDim col_shape(framework::make_ddim(col_shape_vec)); // use col_matrix_shape in the gemm calculation // size: (i_c/g * k_h * k_w, o_h * o_w) or (i_c/g * k_d * k_h * k_w, o_d * // o_h * o_w) framework::DDim col_matrix_shape = - framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); + framework::flatten_to_2d(col_shape, data_dim + 1); bool is_expand = IsExpand(filter_shape_vec, strides, paddings, dilations); Tensor col; @@ -159,13 +155,13 @@ class GemmConvKernel : public framework::OpKernel { col.ShareDataWith(in_slice); col_matrix.ShareDataWith(col); col_matrix.Resize(col_matrix_shape); - } else if (filter_shape_vec.size() == 2) { + } else if (data_dim == 2U) { // im2col im2col(context.device_context(), in_slice, dilations, strides, std::vector{paddings[0], paddings[1], paddings[0], paddings[1]}, &col); - } else if (filter_shape_vec.size() == 3) { + } else if (data_dim == 3U) { // vol2col vol2col(context.device_context(), in_slice, dilations, strides, paddings, &col); @@ -206,26 +202,22 @@ class GemmConvGradKernel : public framework::OpKernel { const int batch_size = static_cast(input->dims()[0]); - // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w} + // filter_shape_vec: {k_o, k_i, k_h, k_w} or {k_o, k_i, k_d, k_h, k_w} std::vector filter_shape_vec(framework::vectorize(filter.dims())); - filter_shape_vec.erase(filter_shape_vec.begin(), - filter_shape_vec.begin() + 2); - - // output_shape_vec: {o_h, o_w} or {o_d, o_h, o_w} + // output_shape_vec: {o_n, o_c, o_h, o_w} or {o_n, o_c, o_d, o_h, o_w} std::vector output_shape_vec( framework::vectorize(output_grad->dims())); - output_shape_vec.erase(output_shape_vec.begin(), - output_shape_vec.begin() + 2); // use col_shape in the im2col calculation // col_shape_vec: {i_c/g, k_h, k_w, o_h, o_w} or {i_c/g, k_d, k_h, k_w, o_d, // o_h, o_w} - std::vector col_shape_vec; - col_shape_vec.push_back(input->dims()[1] / groups); - col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(), - filter_shape_vec.end()); - col_shape_vec.insert(col_shape_vec.end(), output_shape_vec.begin(), - output_shape_vec.end()); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = input->dims()[1] / groups; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; + } framework::DDim col_shape(framework::make_ddim(col_shape_vec)); // use col_matrix_shape in the gemm calculation @@ -233,7 +225,7 @@ class GemmConvGradKernel : public framework::OpKernel { // or // (i_c/g * k_d * k_h * k_w, o_d * o_h * o_w) framework::DDim col_matrix_shape = - framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); + framework::flatten_to_2d(col_shape, data_dim + 1); framework::DDim input_shape = framework::slice_ddim( input->dims(), 1, static_cast(input->dims().size())); @@ -294,12 +286,12 @@ class GemmConvGradKernel : public framework::OpKernel { out_grad_slice, false, T(1.0), &col_matrix, T(0.0)); - if (is_expand && filter_shape_vec.size() == 2) { + if (is_expand && data_dim == 2U) { col2im(context.device_context(), col, dilations, strides, std::vector{paddings[0], paddings[1], paddings[0], paddings[1]}, &in_grad_slice); - } else if (is_expand && filter_shape_vec.size() == 3) { + } else if (is_expand && data_dim == 3U) { col2vol(context.device_context(), col, dilations, strides, paddings, &in_grad_slice); } @@ -328,12 +320,12 @@ class GemmConvGradKernel : public framework::OpKernel { col.ShareDataWith(in_slice); col_matrix.ShareDataWith(col); col_matrix.Resize(col_matrix_shape); - } else if (filter_shape_vec.size() == 2) { + } else if (data_dim == 2U) { im2col(context.device_context(), in_slice, dilations, strides, std::vector{paddings[0], paddings[1], paddings[0], paddings[1]}, &col); - } else if (filter_shape_vec.size() == 3) { + } else if (data_dim == 3U) { vol2col(context.device_context(), in_slice, dilations, strides, paddings, &col); } diff --git a/paddle/operators/conv_transpose_cudnn_op.cc b/paddle/operators/conv_transpose_cudnn_op.cc index dbd1bc3c3bc2d026f13ddcf62919db6cf7d87bc5..0192178ce3a0a47196232f0723baec8324bea60b 100644 --- a/paddle/operators/conv_transpose_cudnn_op.cc +++ b/paddle/operators/conv_transpose_cudnn_op.cc @@ -61,10 +61,12 @@ REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp, REGISTER_OP_CPU_KERNEL( conv2d_transpose_cudnn, - ops::GemmConvTransposeKernel); + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); REGISTER_OP_CPU_KERNEL( conv2d_transpose_cudnn_grad, - ops::GemmConvTransposeGradKernel); + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp, ops::CudnnConv3DTransposeOpMaker, conv3d_transpose_cudnn_grad, @@ -72,7 +74,9 @@ REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp, REGISTER_OP_CPU_KERNEL( conv3d_transpose_cudnn, - ops::GemmConvTransposeKernel); + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); REGISTER_OP_CPU_KERNEL( conv3d_transpose_cudnn_grad, - ops::GemmConvTransposeGradKernel); + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); diff --git a/paddle/operators/conv_transpose_cudnn_op.cu.cc b/paddle/operators/conv_transpose_cudnn_op.cu.cc index e2ba77086e737a07471f14e483cbd32ab1d4ee12..494904fe524ae30a5032e489a0c5f20179d8e8ce 100644 --- a/paddle/operators/conv_transpose_cudnn_op.cu.cc +++ b/paddle/operators/conv_transpose_cudnn_op.cu.cc @@ -235,11 +235,15 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn, - ops::CudnnConvTransposeOpKernel); + ops::CudnnConvTransposeOpKernel, + ops::CudnnConvTransposeOpKernel); REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn_grad, - ops::CudnnConvTransposeGradOpKernel); + ops::CudnnConvTransposeGradOpKernel, + ops::CudnnConvTransposeGradOpKernel); REGISTER_OP_GPU_KERNEL(conv3d_transpose_cudnn, - ops::CudnnConvTransposeOpKernel); + ops::CudnnConvTransposeOpKernel, + ops::CudnnConvTransposeOpKernel); REGISTER_OP_GPU_KERNEL(conv3d_transpose_cudnn_grad, - ops::CudnnConvTransposeGradOpKernel); + ops::CudnnConvTransposeGradOpKernel, + ops::CudnnConvTransposeGradOpKernel); diff --git a/paddle/operators/conv_transpose_op.h b/paddle/operators/conv_transpose_op.h index ab336ad23ce1c180b68d04e4c85b299e301d5376..0fc0735788c499c2d520c0cc689e1ce07ba67ce8 100644 --- a/paddle/operators/conv_transpose_op.h +++ b/paddle/operators/conv_transpose_op.h @@ -68,30 +68,26 @@ class GemmConvTransposeKernel : public framework::OpKernel { const int batch_size = static_cast(input->dims()[0]); - // input_shape_vec: {h, w} or {d, h, w} + // input_shape_vec: {n, c, h, w} or {n, c, d, h, w} std::vector input_shape_vec = framework::vectorize(input->dims()); - input_shape_vec.erase(input_shape_vec.begin(), input_shape_vec.begin() + 2); - - // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w} + // filter_shape_vec: {k_o, k_c, k_h, k_w} or {k_o, k_c, k_d, k_h, k_w} std::vector filter_shape_vec = framework::vectorize(filter.dims()); - filter_shape_vec.erase(filter_shape_vec.begin(), - filter_shape_vec.begin() + 2); // use col_shape in the im2col and col2im (or vol2col and col2vol) // calculation // col_shape_vec: {c, k_h, k_w, h, w} or {c, k_d, k_h, k_w, d, h, w} - std::vector col_shape_vec; - col_shape_vec.push_back(output->dims()[1]); - col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(), - filter_shape_vec.end()); - col_shape_vec.insert(col_shape_vec.end(), input_shape_vec.begin(), - input_shape_vec.end()); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = output->dims()[1]; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = input_shape_vec[j + 2]; + } DDim col_shape(framework::make_ddim(col_shape_vec)); // use col_matrix_shape in the gemm calculation // size: (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w) - DDim col_matrix_shape = - framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); + DDim col_matrix_shape = framework::flatten_to_2d(col_shape, data_dim + 1); Tensor col; col.mutable_data(col_shape, context.GetPlace()); @@ -136,7 +132,7 @@ class GemmConvTransposeKernel : public framework::OpKernel { input_batch, false, static_cast(1.0), &col_matrix, static_cast(0.0)); - if (filter_shape_vec.size() == 2) { + if (data_dim == 2U) { // col2im: col_matrix -> dy // from (c * k_h * k_w, h * w) to (c, o_h, o_w) col2im(context.device_context(), col, @@ -144,7 +140,7 @@ class GemmConvTransposeKernel : public framework::OpKernel { std::vector{paddings[0], paddings[1], paddings[0], paddings[1]}, &output_batch); - } else if (filter_shape_vec.size() == 3) { + } else if (data_dim == 3U) { // col2vol: col_matrix -> dy // from (c * k_d * k_h * k_w, d * h * w) to (c, o_d, o_h, o_w) col2vol(context.device_context(), col, dilations, strides, paddings, @@ -176,30 +172,26 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { const int batch_size = static_cast(input->dims()[0]); - // input_shape_vec: {h, w} or {d, h, w} + // input_shape_vec: {n, c, h, w} or {n, c, d, h, w} std::vector input_shape_vec = framework::vectorize(input->dims()); - input_shape_vec.erase(input_shape_vec.begin(), input_shape_vec.begin() + 2); - - // filter_shape_vec: {k_h, k_w} or {k_d, k_h, k_w} + // filter_shape_vec: {k_o, k_c, k_h, k_w} or {k_o, k_c, k_d, k_h, k_w} std::vector filter_shape_vec = framework::vectorize(filter.dims()); - filter_shape_vec.erase(filter_shape_vec.begin(), - filter_shape_vec.begin() + 2); // use col_shape in the im2col and col2im (or vol2col and col2vol) // calculation // col_shape_vec: {c, k_h, k_w, h, w} or {c, k_d, k_h, k_w, d, h, w} - std::vector col_shape_vec; - col_shape_vec.push_back(output_grad->dims()[1]); - col_shape_vec.insert(col_shape_vec.end(), filter_shape_vec.begin(), - filter_shape_vec.end()); - col_shape_vec.insert(col_shape_vec.end(), input_shape_vec.begin(), - input_shape_vec.end()); + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = output_grad->dims()[1]; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = input_shape_vec[j + 2]; + } DDim col_shape(framework::make_ddim(col_shape_vec)); // use col_matrix_shape in the gemm calculation // size: (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w) - DDim col_matrix_shape = - framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1); + DDim col_matrix_shape = framework::flatten_to_2d(col_shape, data_dim + 1); // output size: (c, o_h, o_w) or (c, o_d, o_h, o_w) DDim output_shape = framework::slice_ddim(output_grad->dims(), 1, @@ -248,7 +240,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { Tensor output_grad_batch = output_grad->Slice(i, i + 1).Resize(output_shape); - if (filter_shape_vec.size() == 2) { + if (data_dim == 2U) { // im2col: dy -> col matrix // from (c, o_h, o_w) to (c * k_h * k_w, h * w) im2col(context.device_context(), output_grad_batch, @@ -256,7 +248,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { std::vector{paddings[0], paddings[1], paddings[0], paddings[1]}, &col); - } else if (filter_shape_vec.size() == 3) { + } else if (data_dim == 3U) { // vol2col: dy -> col_matrix // from (c, o_d, o_h, o_w) to (c * k_d * k_h * k_w, d * h * w) vol2col(context.device_context(), output_grad_batch, dilations, diff --git a/paddle/operators/dropout_op.cc b/paddle/operators/dropout_op.cc index 818146aca766cb13b93fd024c11c1209655d9e11..932c0bf8fbf6ffdc466516bb7c8578abf0f57209 100644 --- a/paddle/operators/dropout_op.cc +++ b/paddle/operators/dropout_op.cc @@ -30,7 +30,7 @@ class DropoutOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); ctx->SetOutputDim("Out", x_dims); - if (ctx->Attrs().Get("is_training") == true) { + if (ctx->Attrs().Get("is_test") == false) { ctx->SetOutputDim("Mask", x_dims); } ctx->ShareLoD("X", /*->*/ "Out"); @@ -49,7 +49,7 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("dropout_prob", "Probability of setting units to zero.") .SetDefault(.5f); - AddAttr("is_training", "True if in training phase.").SetDefault(true); + AddAttr("is_test", "True if in test phase.").SetDefault(false); AddAttr("seed", "Dropout random seed.").SetDefault(0); AddComment(R"DOC( @@ -71,8 +71,8 @@ class DropoutOpGrad : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->Attrs().Get("is_training"), true, - "GradOp is only callable when is_training is true"); + PADDLE_ENFORCE_EQ(ctx->Attrs().Get("is_test"), false, + "GradOp is only callable when is_test is false"); PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); PADDLE_ENFORCE(ctx->HasInput("Mask"), "Mask must not be null."); diff --git a/paddle/operators/dropout_op.cu b/paddle/operators/dropout_op.cu index 30c769000f2b98c69eaa78a4c139630dd0956386..db3578b9bf4c081e431f202f0828ec6392c924b2 100644 --- a/paddle/operators/dropout_op.cu +++ b/paddle/operators/dropout_op.cu @@ -59,7 +59,7 @@ class GPUDropoutKernel : public framework::OpKernel { auto Y = EigenMatrix::Reshape(*y, 1); auto place = context.GetEigenDevice(); - if (context.Attr("is_training")) { + if (!context.Attr("is_test")) { auto* mask = context.Output("Mask"); auto* mask_data = mask->mutable_data(context.GetPlace()); int size = framework::product(mask->dims()); diff --git a/paddle/operators/dropout_op.h b/paddle/operators/dropout_op.h index 6000b75fecdff74844605215e9364ac8f8a1525a..d9a130fdc040f745b058c39221f0bb9661473388 100644 --- a/paddle/operators/dropout_op.h +++ b/paddle/operators/dropout_op.h @@ -35,7 +35,7 @@ class CPUDropoutKernel : public framework::OpKernel { auto* y_data = y->mutable_data(context.GetPlace()); float dropout_prob = context.Attr("dropout_prob"); - if (context.Attr("is_training")) { + if (!context.Attr("is_test")) { auto* mask = context.Output("Mask"); auto* mask_data = mask->mutable_data(context.GetPlace()); int seed = context.Attr("seed"); @@ -65,8 +65,8 @@ template class DropoutGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - PADDLE_ENFORCE(context.Attr("is_training"), - "GradOp is only callable when is_training is true"); + PADDLE_ENFORCE(!context.Attr("is_test"), + "GradOp is only callable when is_test is false"); auto* grad_x = context.Output(framework::GradVarName("X")); auto* grad_y = context.Input(framework::GradVarName("Out")); diff --git a/paddle/operators/dynamic_recurrent_op.cc b/paddle/operators/dynamic_recurrent_op.cc deleted file mode 100644 index d48cc4e8df587708ab93e7d788145adc01c1d3e5..0000000000000000000000000000000000000000 --- a/paddle/operators/dynamic_recurrent_op.cc +++ /dev/null @@ -1,418 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve . - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/operators/dynamic_recurrent_op.h" - -#include "paddle/framework/op_registry.h" - -namespace paddle { -namespace operators { - -using framework::Scope; -using framework::TensorArray; -using framework::LoDTensor; -using framework::Variable; -using framework::OperatorBase; -using framework::DySeqMetaBatch; - -namespace detail { - -inline void CreateVariables(Scope& scope, - const std::vector& var_names) { - for (const auto& name : var_names) { - scope.Var(name); - } -} - -/* - * The inputs with sequence should be reordered when they are split, so the - * boot_states should be reordered in the same order. - * - * NOTE This may require that the `pre_state` of the first time step should just - * copy the `boot_state` rather than reference it, for that the content should - * be reordered, but the RNN op should not change the `boot_state` as an input - * variable's content. - */ -inline void ReorderInitialState(const DySeqMetaBatch& metas, - const LoDTensor& boot_state, LoDTensor* tensor, - const platform::Place& dst_place) { - for (size_t seq_id = 0; seq_id < metas.size(); seq_id++) { - auto slice = tensor->Slice(seq_id, seq_id + 1); - auto boot_slice = - boot_state.Slice(metas[seq_id].ori_idx, metas[seq_id].ori_idx + 1); - // TODO(superjom) pass in device context as an argument - slice.CopyFrom(boot_slice, dst_place, platform::CPUDeviceContext()); - } -} - -inline void RestoreInitialState(const DySeqMetaBatch& metas, - const LoDTensor& tensor, LoDTensor* boot_state, - const platform::Place& dst_place) { - for (size_t seq_id = 0; seq_id < metas.size(); seq_id++) { - auto slice = tensor.Slice(seq_id, seq_id + 1); - auto boot_slice = - boot_state->Slice(metas[seq_id].ori_idx, metas[seq_id].ori_idx + 1); - boot_slice.CopyFrom(slice, dst_place, platform::CPUDeviceContext()); - } -} - -} // namespace detail - -// Implementation for forward propagation. -template <> -void RNNAlgorithm::Run( - const framework::Scope& scope, const framework::OperatorBase& op, - const platform::DeviceContext& dev_ctx) { - SetComputeMode(ComputeMode::kForward); - cache_.Init(kArgNames[mode_], op, scope, &dev_ctx, &arg_); - SplitInputs(); - CreateScopes(); - WriteStepInputs(); - InitStates(); - WriteStepOutputs(); - RunSteps(); - ConcatOutputs(); -} - -// Implementation for backward propagation. -template <> -void RNNAlgorithm::Run( - const framework::Scope& scope, const framework::OperatorBase& op, - const platform::DeviceContext& dev_ctx) { - SetComputeMode(ComputeMode::kBackward); - cache_.Init(kArgNames[mode_], op, scope, &dev_ctx, &arg_); - SplitInputs(); - WriteStepInputs(); - InitStates(); - WriteStepOutputs(); - RunSteps(); - // copy boot-states' gradients back. - for (const auto& state : arg_.states) { - ExportInitialStateGradient(state); - } - - ConcatOutputs(); -} - -void RNNAlgorithm::SplitInputs() { - // TODO(superjom) make level a config - // TODO(superjom) check all the inputs has the same LoD - int level = 0; - for (const auto& item : cache_.inputs) { - const auto& var = item.second; - const auto& tensor = var->Get(); - TensorArray& ta = step_inputs_[item.first]; - - dy_seq_metas_[item.first] = - ta.Unpack(tensor, level, true /*length_descend*/); - - if (cache_.num_steps) { - PADDLE_ENFORCE_EQ(ta.size(), cache_.num_steps, - "inputs should have the same steps"); - } else { - cache_.num_steps = ta.size(); - } - } -} - -void RNNAlgorithm::WriteStepInputs() { - for (const auto& item : cache_.inputs) { - auto ta_it = step_inputs_.find(item.first); - PADDLE_ENFORCE(ta_it != step_inputs_.end(), - "step_inputs_ not compatible with memory set"); - TensorArray& ta = ta_it->second; - for (size_t step = 0; step < ta.size(); step++) { - auto tensor = ta.Read(step); - auto& step_scope = cache_.GetScope(step); - Variable* var = step_scope.FindVar(item.first); - if (var == nullptr) { - var = step_scope.Var(item.first); - } - var->GetMutable()->ShareDataWith(tensor); - } - } -} - -void RNNAlgorithm::WriteStepOutputs() { - // initialize step outputs - for (const auto& item : cache_.outputs) { - step_outputs_.emplace(item.first, TensorArray()); - } - PADDLE_ENFORCE_GT(step_outputs_.size(), 0UL); -} - -void RNNAlgorithm::CreateScopes() { - PADDLE_ENFORCE_GT(cache_.num_steps, 0); - // resize scopes - size_t num_scopes_need_create = cache_.num_steps - cache_.scopes->size(); - for (size_t i = 0; i < num_scopes_need_create; i++) { - cache_.scopes->emplace_back(&cache_.scope->NewScope()); - } - - // init temporary inputs - PADDLE_ENFORCE_NOT_NULL(step_unit_, "stepnet should be set first"); - std::vector states; - std::vector ex_states; - std::vector step_unit_outputs; - std::transform(arg_.states.begin(), arg_.states.end(), - std::back_inserter(states), - [](const rnn::StateAttr& m) { return m.var; }); - std::transform(arg_.states.begin(), arg_.states.end(), - std::back_inserter(ex_states), - [](const rnn::StateAttr& m) { return m.pre_var; }); - for (const auto& item : step_unit_->Outputs()) { - for (const auto& var : item.second) { - step_unit_outputs.push_back(var); - } - } - - for (size_t step = 0; step < cache_.num_steps; step++) { - auto& scope = cache_.GetScope(step); - detail::CreateVariables(scope, arg_.inlinks); - detail::CreateVariables(scope, arg_.outlinks); - detail::CreateVariables(scope, states); - detail::CreateVariables(scope, ex_states); - detail::CreateVariables(scope, step_unit_outputs); - } -} - -void RNNAlgorithm::ConcatOutputs() { - // TODO(superjom) transform this to a config - int level = 0; - for (size_t step = 0; step < cache_.num_steps; step++) { - auto& scope = cache_.GetScope(step); - for (auto& item : step_outputs_) { - auto* var = scope.FindVar(item.first); - PADDLE_ENFORCE_NOT_NULL(var); - auto* tensor = var->GetMutable(); - tensor->mutable_data(platform::CPUPlace()); - item.second.WriteShared(step, *tensor); - } - } - // the inputs' lods should be the same, so randomly get one lod. - const auto& some_lod = - cache_.scope->FindVar(arg_.inlinks.front())->Get().lod(); - const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()]; - for (auto& item : step_outputs_) { - auto tensor = item.second.Pack(level, some_meta, some_lod); - auto* output = cache_.outputs[item.first]->GetMutable(); - const_cast(output)->ShareDataWith(tensor); - } -} - -void RNNAlgorithm::RunSteps() { - if (IsBackward()) { - // call stepnet in all the time steps reversely - for (int step = cache_.num_steps - 1; step >= 0; step--) { - auto& step_scope = cache_.GetScope(step); - step_unit_->Run(step_scope, *cache_.dev_ctx); - } - } else { - for (size_t step = 0; step < cache_.num_steps; step++) { - auto& step_scope = cache_.GetScope(step); - step_unit_->Run(step_scope, *cache_.dev_ctx); - } - } -} - -void RNNAlgorithm::InitStates() { - for (size_t step = 0; step < cache_.num_steps; step++) { - for (const auto& state : arg_.states) { - CreateState(state, step); - LinkState(state, step); - } - } -} - -void RNNAlgorithm::CreateState(const rnn::StateAttr& state_attr, size_t step) { - auto& scope = cache_.GetScope(step); - auto& state = *cache_.GetTensor(scope, state_attr.var); - auto& boot_state = *cache_.GetTensor(*cache_.scope, state_attr.boot_var); - - size_t num_instances = - step_inputs_[arg_.inlinks.front()].Read(step).dims()[0]; - auto dims = boot_state.dims(); - dims[0] = num_instances; - - state.Resize(dims); - state.mutable_data(platform::CPUPlace()); - states_[state_attr.var].WriteShared(step, state); -} - -void RNNAlgorithm::LinkState(const rnn::StateAttr& state, size_t step) { - auto& scope = cache_.GetScope(step); - auto& state_pre = *cache_.GetTensor(scope, state.pre_var); - - // process the first state's boot-state(the 0-step in forward mode or the - // last step in backward mode) - // Only forward mode need to link the boot-state to the `pre-state` in first - // time step. In backward mode, need to copy the gradient of `pre-state` in - // first time step to the gradient of `boot-state`. - if (step == 0 && IsForward()) { - LinkInitialState(state); - } else { - size_t num_instances = - step_inputs_[arg_.inlinks.front()].Read(step).dims()[0]; - auto* pre_state = cache_.GetTensor(cache_.GetScope(step - 1), state.var); - // shink and share from previous state - auto shrinked_pre_state = pre_state->Slice(0, num_instances); - state_pre.ShareDataWith(shrinked_pre_state); - } -} - -void RNNAlgorithm::LinkInitialState(const rnn::StateAttr& state) { - // all the step_inputs' metas should be the same, just randomly select one - // and get the dyseq meta. - const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()]; - auto& scope = cache_.GetScope(0); - auto& state_pre = *cache_.GetTensor(scope, state.pre_var); - auto* pre_state = cache_.GetTensor(*cache_.scope, state.boot_var); - pre_state->mutable_data(platform::CPUPlace()); - // allocate state - state_pre.Resize(pre_state->dims()); - state_pre.mutable_data(platform::CPUPlace()); - detail::ReorderInitialState(some_meta, *pre_state, &state_pre, - pre_state->place()); -} - -void RNNAlgorithm::ExportInitialStateGradient(const rnn::StateAttr& state) { - // all the step_inputs' metas should be the same, just randomly select one - // and get the dyseq meta. - const auto& some_meta = dy_seq_metas_[arg_.inlinks.front()]; - auto& scope = cache_.GetScope(0); - - auto& state_pre = *cache_.GetTensor(scope, state.pre_var); - auto& pre_state = *cache_.GetTensor(*cache_.scope, state.boot_var); - pre_state.Resize(state_pre.dims()); - detail::RestoreInitialState(some_meta, state_pre, &pre_state, - pre_state.place()); -} - -void RNNAlgorithm::ArgCache::Init(const rnn::ArgumentName& name, - const paddle::framework::OperatorBase& op, - const paddle::framework::Scope& scope, - platform::DeviceContext const* dev_ctx, - rnn::Argument* arg) { - this->scope = &scope; - InitArgument(name, op, arg); - CacheScopes(scope, *arg); - CacheInlinks(scope, arg->inlinks); - CacheOutlinks(scope, arg->outlinks); - this->dev_ctx = dev_ctx; -} - -void RNNAlgorithm::ArgCache::InitArgument(const rnn::ArgumentName& name, - const OperatorBase& op, - rnn::Argument* arg) { - rnn::InitArgument(name, arg, op, false /*is_grad*/); -} - -void RNNAlgorithm::ArgCache::CacheScopes(const Scope& scope, - const rnn::Argument& arg) { - auto scopes_var = scope.FindVar(arg.step_scopes); - PADDLE_ENFORCE(scopes_var != nullptr, - "the step_scopes output argument [%s] should be created first " - "by framework.", - arg.step_scopes); - this->scopes = scopes_var->GetMutable>(); -} - -void RNNAlgorithm::ArgCache::CacheInlinks( - const Scope& scope, const std::vector& names) { - for (auto name : names) { - auto* var = GetVariable(scope, name); - inputs[name] = var; - } -} - -void RNNAlgorithm::ArgCache::CacheOutlinks( - const Scope& scope, const std::vector& names) { - for (auto name : names) { - auto* var = GetVariable(scope, name); - outputs[name] = var; - } -} - -Variable* RNNAlgorithm::ArgCache::GetVariable(const Scope& scope, - const std::string& name) { - auto* var = scope.FindVar(name); - PADDLE_ENFORCE_NOT_NULL(var, "variable [%s] not exist in scope", name); - return var; -} - -LoDTensor* RNNAlgorithm::ArgCache::GetTensor(const framework::Scope& scope, - const std::string& name) { - auto* var = GetVariable(scope, name); - return var->GetMutable(); -} - -const std::array RNNAlgorithm::kArgNames{ - {rnn::ArgumentName{"step_unit", "step_scopes", "inputs", "outputs", - "states", "ex_states", "initial_states"}, - rnn::ArgumentName{"step_unit", "step_scopes@GRAD", "outputs@GRAD", - "inputs@GRAD", "states", "ex_states", - "initial_states@GRAD"}}}; - -void DynamicRecurrentOp::Run(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const { - rnn.Run( - scope, *dynamic_cast(this), dev_ctx); -} - -void DynamicRecurrentGradientOp::Run( - const Scope& scope, const platform::DeviceContext& dev_ctx) const { - rnn.Run( - scope, *dynamic_cast(this), dev_ctx); -} - -class DynamicRecurrentOpProtoAndCheckerMaker - : public framework::OpProtoAndCheckerMaker { - public: - DynamicRecurrentOpProtoAndCheckerMaker(framework::OpProto* proto, - framework::OpAttrChecker* op_checker) - : OpProtoAndCheckerMaker(proto, op_checker) { - const auto& name = - RNNAlgorithm::kArgNames[RNNAlgorithm::ComputeMode::kForward]; - // inputs and outputs stored in proto - AddInput(name.inlinks, - "The inputs that need to be segmented for each step.") - .AsDuplicable(); - AddInput(name.initial_states, "Variables to initialize the states.") - .AsDuplicable(); - - AddOutput(name.outlinks, - "The outputs that need to be concatenated for all steps.") - .AsDuplicable(); - AddOutput(name.step_scopes, "step scopes"); - - // Attributes stored in AttributeMap - AddAttr>(name.ex_states, "names of ex_states"); - AddAttr>(name.states, "names of states"); - - AddComment(R"DOC( -Dynamic Recurrent Operator. - -This is a RNN operator for varience-length sequences. - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -REGISTER_OP(dynamic_recurrent, paddle::operators::DynamicRecurrentOp, - paddle::operators::DynamicRecurrentOpProtoAndCheckerMaker, - dynamic_recurrent_grad, - paddle::operators::DynamicRecurrentGradientOp); diff --git a/paddle/operators/dynamic_recurrent_op.h b/paddle/operators/dynamic_recurrent_op.h deleted file mode 100644 index 5b0548c3a44c9f58838ecc567ee41a587883c26a..0000000000000000000000000000000000000000 --- a/paddle/operators/dynamic_recurrent_op.h +++ /dev/null @@ -1,233 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#ifdef PADDLE_WITH_TESTING -#include "gtest/gtest.h" -#endif - -#include "paddle/framework/lod_tensor.h" -#include "paddle/framework/operator.h" -#include "paddle/framework/tensor_array.h" -#include "paddle/framework/variable.h" -#include "paddle/operators/rnn/recurrent_op_utils.h" - -namespace paddle { -namespace operators { - -class RNNAlgorithm { - public: - enum ComputeMode { kForward = 0, kBackward = 1 }; - static const std::array kArgNames; - using value_type = float; - - /* - * Different `Run` method for forward and backward, `_` is just for template - * specifialization. - */ - template - void Run(const framework::Scope& scope, const framework::OperatorBase& op, - const platform::DeviceContext& dev_ctx); - /* - * Split the inputs(LoDTensors) to segments for each time step. - */ - void SplitInputs(); - - /* - * Create step-scopes to store temporary outputs in each time steps. - */ - void CreateScopes(); - - /* - * Link TensorArray steps to the corresponding variables located in - * step-scopes. - */ - void WriteStepInputs(); - - /* - * Write output of each step to the corresponding TensorArray. - */ - void WriteStepOutputs(); - - /* - * Initialize the states, each state will have a corresponding pre-state, - * which share the memory with the state in the previous time state. The - * pre-state in the first time step will be initialized with an zero tensor or - * a tensor in parent scope if is provided. - */ - void InitStates(); - - /* - * Create state variables for each time step. - */ - void CreateState(const rnn::StateAttr& state, size_t step); - - /* - * Link pre-state variable in current scope to the state variable in the - * previous time step (scope) by reference. - */ - void LinkState(const rnn::StateAttr& state, size_t step); - - /* - * Link the pre-state of the first time step to the `boot-state` in parent's - * scope. - */ - void LinkInitialState(const rnn::StateAttr& state); - - /* - * Copy the gradient from `pre-state` in the first step-scope to the - * `boot-state` in parent's scope. - */ - void ExportInitialStateGradient(const rnn::StateAttr& state); - - /* - * Calculate time steps. - */ - void RunSteps(); - - /* - * Concatenate outputs in each time step and generate a LoDTensor. - */ - void ConcatOutputs(); - - void SetComputeMode(ComputeMode mode) { mode_ = mode; } - bool IsForward() const { return mode_ == ComputeMode::kForward; } - bool IsBackward() const { return mode_ == ComputeMode::kBackward; } - - /* - * set a step unit that is created according to a RecurrentOp's step unit. - */ - void SetStepUnit(std::unique_ptr step_unit) { - PADDLE_ENFORCE_NOT_NULL(step_unit); - step_unit_ = std::move(step_unit); - } - const framework::OperatorBase& GetStepUnit() const { return *step_unit_; } - - const framework::TensorArray& state(const std::string& name) const { - auto it = states_.find(name); - PADDLE_ENFORCE(it != states_.end()); - return it->second; - } - const framework::TensorArray& step_input(const std::string& name) const { - auto it = step_inputs_.find(name); - PADDLE_ENFORCE(it != step_inputs_.end()); - return it->second; - } - const framework::TensorArray& step_output(const std::string& name) const { - auto it = step_outputs_.find(name); - PADDLE_ENFORCE(it != step_outputs_.end()); - return it->second; - } - - protected: - struct ArgCache { - framework::Scope const* scope; - std::vector* scopes; - std::map inputs; - std::map outputs; - platform::DeviceContext const* dev_ctx; - - size_t num_steps{0}; - - void Init(const rnn::ArgumentName& name, const framework::OperatorBase& op, - const framework::Scope& scope, - platform::DeviceContext const* dev_ctx, rnn::Argument* arg); - - framework::Scope& GetScope(size_t index) { - PADDLE_ENFORCE_LT(index, num_steps); - return *scopes->at(index); - } - - framework::LoDTensor* GetTensor(const framework::Scope& scope, - const std::string& name); - - private: - void InitArgument(const rnn::ArgumentName& name, - const framework::OperatorBase& op, rnn::Argument* arg); - void CacheScopes(const framework::Scope& scope, const rnn::Argument& arg); - void CacheInlinks(const framework::Scope& scope, - const std::vector& names); - void CacheOutlinks(const framework::Scope& scope, - const std::vector& names); - framework::Variable* GetVariable(const framework::Scope& scope, - const std::string& name); - }; - - private: - std::unique_ptr step_unit_; - std::map states_; - std::map step_inputs_; - std::map step_outputs_; - std::map> dy_seq_metas_; - rnn::Argument arg_; - ArgCache cache_; - ComputeMode mode_{ComputeMode::kForward}; - -#ifdef PADDLE_WITH_TESTING - // test forward - friend class RNNAlgorithmTestHelper; - FRIEND_TEST(RNNAlgorithmTestHelper, SplitInputs); - FRIEND_TEST(RNNAlgorithmTestHelper, CreateCache); - FRIEND_TEST(RNNAlgorithmTestHelper, CreateScopes); - FRIEND_TEST(RNNAlgorithmTestHelper, WriteStepInputs); - FRIEND_TEST(RNNAlgorithmTestHelper, WriteStepOutputs); - FRIEND_TEST(RNNAlgorithmTestHelper, InitStates); - FRIEND_TEST(RNNAlgorithmTestHelper, ConcatOutputs); -// TODO(superjom) test backward -#endif -}; - -class DynamicRecurrentOp : public framework::OperatorBase { - public: - DynamicRecurrentOp(const std::string& type, - const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - DynamicRecurrentOp(const DynamicRecurrentOp& o) - : framework::OperatorBase( - static_cast(o)) { - PADDLE_THROW("Not implemented"); - } - - void Run(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const override; - - mutable RNNAlgorithm rnn; -}; - -class DynamicRecurrentGradientOp : public framework::OperatorBase { - public: - DynamicRecurrentGradientOp(const std::string& type, - const framework::VariableNameMap& inputs, - const framework::VariableNameMap& outputs, - const framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - DynamicRecurrentGradientOp(const DynamicRecurrentGradientOp& o) - : framework::OperatorBase( - static_cast(o)) { - PADDLE_THROW("Not implemented"); - } - - void Run(const framework::Scope& scope, - const platform::DeviceContext& dev_ctx) const override; - - mutable RNNAlgorithm rnn; -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/operators/dynamic_recurrent_op_test.cc b/paddle/operators/dynamic_recurrent_op_test.cc deleted file mode 100644 index 8d840e259b190ead86a66df8ab31c5170db4d824..0000000000000000000000000000000000000000 --- a/paddle/operators/dynamic_recurrent_op_test.cc +++ /dev/null @@ -1,217 +0,0 @@ -#include "paddle/operators/dynamic_recurrent_op.h" - -#include - -#include "paddle/framework/ddim.h" -#include "paddle/framework/lod_tensor.h" -#include "paddle/framework/op_desc.h" -#include "paddle/framework/op_registry.h" -#include "paddle/operators/net_op.h" - -namespace paddle { -namespace operators { - -using framework::Scope; -using framework::TensorArray; -using framework::LoDTensor; -using framework::Variable; - -class TestOp : public framework::OperatorBase { - public: - using framework::OperatorBase::OperatorBase; - DEFINE_OP_CLONE_METHOD(TestOp); - void Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const override {} -}; - -void OpDescNewVar(const std::string& param_name, - std::initializer_list arguments, - paddle::framework::OpDesc::Var* var) { - var->set_parameter(param_name); - for (auto& arg_name : arguments) { - var->add_arguments(arg_name); - } -} - -// create a LoD tensor in scope with specific dims -LoDTensor* CreateVar(Scope& scope, std::string name, framework::DDim dims, - const platform::Place& place) { - auto* var = scope.Var(name); - auto* tensor = var->GetMutable(); - tensor->Resize(dims); - tensor->mutable_data(place); - return tensor; -} - -class RNNAlgorithmTestHelper : public ::testing::Test { - protected: - const rnn::ArgumentName argname = RNNAlgorithm::kArgNames[0]; - - virtual void SetUp() override { - CreateGlobalVariables(); - - auto op_desc = CreateOpDesc(); - op = paddle::framework::OpRegistry::CreateOp(op_desc); - dop = &(dynamic_cast(op.get())->rnn); - InitCacheManually(); - InitStepNet(); - } - - framework::OpDesc CreateOpDesc() { - // create op - paddle::framework::OpDesc op_desc; - op_desc.set_type("dynamic_recurrent"); - - OpDescNewVar(argname.inlinks, {"in0"}, op_desc.add_inputs()); - OpDescNewVar(argname.initial_states, {"boot_mem"}, op_desc.add_inputs()); - OpDescNewVar(argname.step_scopes, {"step_scopes"}, op_desc.add_outputs()); - OpDescNewVar(argname.outlinks, {"out0"}, op_desc.add_outputs()); - - // set pre-states - auto pre_memories = op_desc.mutable_attrs()->Add(); - pre_memories->set_name(argname.ex_states); - pre_memories->set_type(paddle::framework::AttrType::STRINGS); - auto pre_memories_item = pre_memories->add_strings(); - *pre_memories_item = "mem@pre"; - - // set states - auto memories = op_desc.mutable_attrs()->Add(); - memories->set_name(argname.states); - memories->set_type(paddle::framework::AttrType::STRINGS); - auto memories_item = memories->add_strings(); - *memories_item = "mem"; - return op_desc; - } - - void CreateGlobalVariables() { - platform::CPUPlace place; - scope.Var("step_scopes"); - CreateVar(scope, "boot_mem", framework::make_ddim({10, 20}), place); - CreateVar(scope, "out0", framework::make_ddim({10, 20}), place); - auto* in0 = CreateVar(scope, "in0", framework::make_ddim({10, 8}), place); - // 10 instanes with 4 sentences, length is 4, 3, 2, 1 respectively. - framework::LoD in0_lod(1); - for (int x : std::vector{0, 4, 7, 9, 10}) { - in0_lod[0].push_back(x); - } - in0->set_lod(in0_lod); - in0->Resize(framework::make_ddim({10, 8})); - // set the content, each sentence content is seqid.batchid - // the seqid starts from 0 - int start = 0; - for (size_t seqid = 0; seqid < in0_lod.size() - 1; seqid++) { - for (size_t batchid = 0; - batchid < in0_lod[0][seqid + 1] - in0_lod[0][seqid]; batchid++) { - float v = seqid + batchid * 0.1; - - for (size_t dim = 0; dim < 8; dim++) { - in0->data()[start * 8 + dim] = v; - } - start++; - } - } - } - - void InitCacheManually() { - dop->cache_.Init(RNNAlgorithm::kArgNames[0], *op, scope, &device_context, - &dop->arg_); - } - - void InitStepNet() { - std::unique_ptr stepnet{new NetOp}; - dynamic_cast(stepnet.get()) - ->AppendOp(std::unique_ptr(new TestOp( - "test", {{"inputs", {"in0"}}, {"initial_states", {"boot_mem"}}}, - {{"outputs", {"out0"}}, {"step_scopes", {"step_scopes"}}}, {}))); - dop->SetStepUnit(std::move(stepnet)); - } - - protected: - RNNAlgorithm* dop; - std::unique_ptr op; - paddle::platform::CPUDeviceContext device_context; - paddle::framework::Scope scope; -}; - -TEST_F(RNNAlgorithmTestHelper, CreateCache) { - const rnn::Argument& arg = dop->arg_; - ASSERT_EQ(arg.inlinks.size(), 1UL); - ASSERT_EQ(arg.outlinks.size(), 1UL); -} - -TEST_F(RNNAlgorithmTestHelper, SplitInputs) { - dop->SplitInputs(); - auto& in0_ta = dop->step_inputs_["in0"]; - ASSERT_EQ(in0_ta.size(), 4UL); - - const auto& batch0 = in0_ta.Read(0); - const auto& batch1 = in0_ta.Read(1); - const auto& batch2 = in0_ta.Read(2); - const auto& batch3 = in0_ta.Read(3); - EXPECT_EQ(batch0.dims()[0], 4); - EXPECT_EQ(batch1.dims()[0], 3); - EXPECT_EQ(batch2.dims()[0], 2); - EXPECT_EQ(batch3.dims()[0], 1); -} - -TEST_F(RNNAlgorithmTestHelper, CreateScopes) { - dop->SplitInputs(); - dop->CreateScopes(); - ASSERT_EQ(dop->cache_.num_steps, 4UL); - ASSERT_EQ(dop->cache_.scopes->size(), 4UL); -} - -TEST_F(RNNAlgorithmTestHelper, WriteStepInputs) { - dop->SplitInputs(); - dop->CreateScopes(); - dop->WriteStepInputs(); - - for (size_t step = 0; step < dop->cache_.num_steps; step++) { - auto& scope = dop->cache_.GetScope(step); - for (auto name : std::vector({"in0"})) { - ASSERT_TRUE(scope.FindVar(name) != nullptr); - } - } -} - -TEST_F(RNNAlgorithmTestHelper, WriteStepOutputs) { - dop->SplitInputs(); - dop->CreateScopes(); - dop->WriteStepInputs(); - dop->WriteStepOutputs(); - - for (size_t step = 0; step < dop->cache_.num_steps; step++) { - auto& scope = dop->cache_.GetScope(step); - for (auto name : std::vector({"out0"})) { - ASSERT_TRUE(scope.FindVar(name)); - } - } -} - -TEST_F(RNNAlgorithmTestHelper, ConcatOutputs) { - // Let's leave this test to python unittest. -} - -TEST_F(RNNAlgorithmTestHelper, InitStates) { - dop->SetComputeMode(RNNAlgorithm::ComputeMode::kForward); - dop->SplitInputs(); - dop->CreateScopes(); - dop->WriteStepInputs(); - dop->WriteStepOutputs(); - dop->InitStates(); - - for (size_t step = 0; step < dop->cache_.num_steps; step++) { - auto& scope = dop->cache_.GetScope(step); - auto state = scope.FindVar("mem"); - ASSERT_TRUE(state != nullptr); - - auto* pre_state = scope.FindVar("mem@pre"); - ASSERT_TRUE(pre_state != nullptr); - - auto* boot_state = scope.FindVar("boot_mem"); - ASSERT_TRUE(boot_state != nullptr); - } -} - -} // operators -} // namespace paddle diff --git a/paddle/operators/expand_op.h b/paddle/operators/expand_op.h index 8ae2c11a5d31dafc1b90d129054ebfabfb761bfe..4d7996ad1e744fead1329c35ce6ea43bf0683ce6 100644 --- a/paddle/operators/expand_op.h +++ b/paddle/operators/expand_op.h @@ -125,7 +125,8 @@ class ExpandGradKernel : public framework::OpKernel { auto* in0 = context.Input(framework::GradVarName("Out")); auto* out0 = context.Output(framework::GradVarName("X")); out0->mutable_data(context.GetPlace()); - out0->CopyFrom(*in0, context.GetPlace(), context.device_context()); + framework::CopyFrom(*in0, context.GetPlace(), context.device_context(), + out0); } else { switch (dims) { REP_EXPAND_GRAD_TEMPLATE(72) diff --git a/paddle/operators/feed_op.cc b/paddle/operators/feed_op.cc index 0dd84cbeaafbafd45132b0a0b744554ce7475411..ee43c22fb13e203c7de1a7e6d1586423fcbfb25a 100644 --- a/paddle/operators/feed_op.cc +++ b/paddle/operators/feed_op.cc @@ -47,7 +47,7 @@ class FeedOp : public framework::OperatorBase { auto &feed_list = feed_var->Get(); auto &feed_item = feed_list.at(static_cast(col)); auto *out_item = out_var->GetMutable(); - out_item->CopyFrom(feed_item, dev_ctx.GetPlace(), dev_ctx); + framework::CopyFrom(feed_item, dev_ctx.GetPlace(), dev_ctx, out_item); out_item->set_lod(feed_item.lod()); } }; diff --git a/paddle/operators/fetch_op.cc b/paddle/operators/fetch_op.cc index 8108ae69dec4bafd1c04d5ab05eef6f467d4c6e8..1ae07194c235ce6724f59c9c60df80f957787cda 100644 --- a/paddle/operators/fetch_op.cc +++ b/paddle/operators/fetch_op.cc @@ -51,7 +51,7 @@ class FetchOp : public framework::OperatorBase { // FIXME(yuyang18): Should we assume the fetch operator always generate // CPU outputs? - dst_item.CopyFrom(src_item, platform::CPUPlace(), dev_ctx); + CopyFrom(src_item, platform::CPUPlace(), dev_ctx, &dst_item); dev_ctx.Wait(); dst_item.set_lod(src_item.lod()); diff --git a/paddle/operators/fill_constant_batch_size_like_op.cc b/paddle/operators/fill_constant_batch_size_like_op.cc index 985b5d1e865e513d833bff72dcd20a8f20851d8c..892922cd3aaec8bf8194320c5c3a0dd0365bb589 100644 --- a/paddle/operators/fill_constant_batch_size_like_op.cc +++ b/paddle/operators/fill_constant_batch_size_like_op.cc @@ -52,7 +52,7 @@ class FillConstantBatchSizeLikeOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( - static_cast(ctx.Attr("data_type")), + static_cast(ctx.Attr("dtype")), ctx.device_context()); } }; @@ -63,7 +63,7 @@ class FillConstantBatchSizeLikeOpMaker FillConstantBatchSizeLikeOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/fill_constant_op.cc b/paddle/operators/fill_constant_op.cc index 818f113b90a4c239a857791fb9957e51d3287b97..3d5f84bc239615797a5cf01a74150fdb7dfc1b80 100644 --- a/paddle/operators/fill_constant_op.cc +++ b/paddle/operators/fill_constant_op.cc @@ -34,7 +34,7 @@ class FillConstantOp : public framework::OperatorBase { using framework::OperatorBase::OperatorBase; void Run(const framework::Scope &scope, const platform::DeviceContext &dev_ctx) const override { - auto data_type = static_cast(Attr("data_type")); + auto data_type = static_cast(Attr("dtype")); auto value = Attr("value"); auto force_cpu = Attr("force_cpu"); auto &out = @@ -55,7 +55,7 @@ class FillConstantOpMaker : public framework::OpProtoAndCheckerMaker { FillConstantOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/ftrl_op.cc b/paddle/operators/ftrl_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..cb7ae6919623f10a6c4ec98c0e942c1590ac9a7a --- /dev/null +++ b/paddle/operators/ftrl_op.cc @@ -0,0 +1,139 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/ftrl_op.h" + +namespace paddle { +namespace operators { + +class FTRLOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Param"), + "Input(Param) of FTRL should not be null."); + PADDLE_ENFORCE(ctx->HasInput("SquaredAccumulator"), + "Input(SquaredAccumulator) of FTRL should not be null."); + PADDLE_ENFORCE(ctx->HasInput("LinearAccumulator"), + "Input(LinearAccumulator) of FTRL should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Grad"), + "Input(Grad) of FTRL should not be null."); + PADDLE_ENFORCE(ctx->HasInput("LearningRate"), + "Input(LearningRate) of FTRL should not be null."); + + PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), + "Output(ParamOut) of FTRL should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("SquaredAccumOut"), + "Output(SquaredAccumOut) of FTRL should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("LinearAccumOut"), + "Output(LinearAccumOut) of FTRL should not be null."); + + auto param_dim = ctx->GetInputDim("Param"); + PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("Grad"), + "Two input of FTRL Op's dimension must be same."); + + auto lr_dim = ctx->GetInputDim("LearningRate"); + PADDLE_ENFORCE_EQ(framework::product(lr_dim), 1, + "Learning Rate should be a scalar."); + + ctx->SetOutputDim("ParamOut", param_dim); + ctx->SetOutputDim("SquaredAccumOut", param_dim); + ctx->SetOutputDim("LinearAccumOut", param_dim); + } +}; + +class FTRLOpMaker : public framework::OpProtoAndCheckerMaker { + public: + FTRLOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Param", + "(Tensor, default Tensor) " + "Input parameter value that has to be updated."); + AddInput("SquaredAccumulator", + "(Tensor, default Tensor) " + "Accumulator that accumulates squared gradients."); + AddInput("LinearAccumulator", + "(Tensor, default Tensor) " + "Accumulator that accumulates linear gradients."); + AddInput("Grad", + "(Tensor, default Tensor) " + "Input gradient of the parameter."); + AddInput("LearningRate", + "(Tensor, default Tensor) " + "The learning rate should be a tensor of size 1."); + + AddOutput("ParamOut", "(Tensor) Output updated parameter value."); + AddOutput("SquaredAccumOut", + "(Tensor) Output accumulated squared" + " gradients."); + AddOutput("LinearAccumOut", + "(Tensor) Output accumulated linear" + " gradients."); + + AddAttr("l1", + "(float, default 0.0) " + "L1 regularization strength.") + .SetDefault(0.0f); + AddAttr("l2", + "(float, default 0.0) " + "L2 regularization strength.") + .SetDefault(0.0f); + AddAttr("lr_power", + "(float, default -0.5f) " + "Learning Rate Power.") + .SetDefault(-0.5f); + AddComment(R"DOC( +FTRL (Follow The Regularized Leader) Operator. + +Optimizer that implements the FTRL algorithm: + +$$ +new\_accum = squared\_accum + grad^2 \\ +if (lr\_power == -0.5) { + linear\_accum += grad - (\surd(new\_accum) - \surd(squared\_accum)) / + (learning\_rate * param) \\ +} else { + linear\_accum += grad - + (new\_accum^{-lr\_power} - accum^{-lr\_power}) / + (learning\_rate * param) \\ +} + +x = (l1 * sign(linear\_accum) - linear\_accum) +if (lr\_power == -0.5) { + y = \frac{\surd(new\_accum)}{learning\_rate} + (2 * l2) \\ + pre\_shrink = \frac{x}{y} \\ + param = (abs(linear\_accum) > l1).select(pre\_shrink, 0.0) \\ +} else { + y = \frac{new\_accum^{-lr\_power}}{learning\_rate} + (2 * l2) \\ + pre\_shrink = \frac{x}{y} \\ + param = (abs(linear\_accum) > l1).select(pre\_shrink, 0.0) \\ +} +squared\_accum += grad^2; +$$ + +The paper that proposed Follow The Regularized Leader (FTRL): +(https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf) + +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(ftrl, ops::FTRLOp, ops::FTRLOpMaker); +REGISTER_OP_CPU_KERNEL(ftrl, + ops::FTRLOpKernel); diff --git a/paddle/operators/ftrl_op.cu b/paddle/operators/ftrl_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..97b36dade6f531df49615ae2d44d565eadba7154 --- /dev/null +++ b/paddle/operators/ftrl_op.cu @@ -0,0 +1,19 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +You may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/ftrl_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(ftrl, + ops::FTRLOpKernel); diff --git a/paddle/operators/ftrl_op.h b/paddle/operators/ftrl_op.h new file mode 100644 index 0000000000000000000000000000000000000000..b040162f8d1d8998aa13021c10a25fe57135c1e9 --- /dev/null +++ b/paddle/operators/ftrl_op.h @@ -0,0 +1,96 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + +template +class FTRLOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* param_out = ctx.Output("ParamOut"); + auto* sq_accum_out = ctx.Output("SquaredAccumOut"); + auto* lin_accum_out = ctx.Output("LinearAccumOut"); + + param_out->mutable_data(ctx.GetPlace()); + sq_accum_out->mutable_data(ctx.GetPlace()); + lin_accum_out->mutable_data(ctx.GetPlace()); + + auto grad = ctx.Input("Grad"); + + auto l1 = static_cast(ctx.Attr("l1")); + auto l2 = static_cast(ctx.Attr("l2")); + auto lr_power = static_cast(ctx.Attr("lr_power")); + + auto p = EigenVector::Flatten(*ctx.Input("Param")); + auto sq_accum = + EigenVector::Flatten(*ctx.Input("SquaredAccumulator")); + auto lin_accum = + EigenVector::Flatten(*ctx.Input("LinearAccumulator")); + auto g = EigenVector::Flatten(*grad); + auto lr = EigenVector::Flatten(*ctx.Input("LearningRate")); + + auto p_out = EigenVector::Flatten(*param_out); + auto s_acc_out = EigenVector::Flatten(*sq_accum_out); + auto l_acc_out = EigenVector::Flatten(*lin_accum_out); + auto place = ctx.GetEigenDevice(); + + Eigen::DSizes grad_dsize(grad->numel()); + + auto new_accum = sq_accum + g * g; + // Special case for lr_power = -0.5 + if (lr_power == static_cast(-0.5)) { + l_acc_out.device(place) = + lin_accum + g - + ((new_accum.sqrt() - sq_accum.sqrt()) / lr.broadcast(grad_dsize)) * p; + } else { + l_acc_out.device(place) = + lin_accum + g - + ((new_accum.pow(-lr_power) - sq_accum.pow(-lr_power)) / + lr.broadcast(grad_dsize)) * + p; + } + + auto x = (l_acc_out.constant(l1) * l_acc_out.sign() - l_acc_out); + if (lr_power == static_cast(-0.5)) { + auto y = (new_accum.sqrt() / lr.broadcast(grad_dsize)) + + l_acc_out.constant(static_cast(2) * l2); + auto pre_shrink = x / y; + p_out.device(place) = + (l_acc_out.abs() > l_acc_out.constant(l1)) + .select(pre_shrink, p.constant(static_cast(0))); + } else { + auto y = (new_accum.pow(-lr_power) / lr.broadcast(grad_dsize)) + + l_acc_out.constant(static_cast(2) * l2); + auto pre_shrink = x / y; + p_out.device(place) = + (l_acc_out.abs() > l_acc_out.constant(l1)) + .select(pre_shrink, p.constant(static_cast(0))); + } + + s_acc_out.device(place) = sq_accum + g * g; + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 53ad86c6c48d1868f4495af51661d91b39a84f0b..254c83e1378a121d99c89d9d8705935b5f06edc8 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -60,7 +60,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( - static_cast(ctx.Attr("data_type")), + static_cast(ctx.Attr("dtype")), ctx.device_context()); } }; @@ -88,7 +88,7 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { "Random seed of generator." "0 means use system wide seed.") .SetDefault(0); - AddAttr("data_type", + AddAttr("dtype", "(int, default 5(FP32)) " "Output data type.") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/gru_unit_op.cc b/paddle/operators/gru_unit_op.cc index 89c027ff1eea93012dc5ab22b081786efc328e96..877c969103cfc17e1b170449d1922d9c7db2a58b 100644 --- a/paddle/operators/gru_unit_op.cc +++ b/paddle/operators/gru_unit_op.cc @@ -114,18 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault(sigmoid) .InEnum({identity, sigmoid, tanh, relu}); AddComment(R"DOC( -GRUUnit Operator. - -This operator implements partial calculations of the GRU unit as follows: +GRUUnit Operator implements partial calculations of the GRU unit as following: $$ -update \ gate: u_t = actGate(xu_t + W_u * hidden_{prev} + bias_u) \\ -reset \ gate: r_t = actGate(xr_t + W_r * hidden_{prev} + bias_r) \\ -output \ candidate: {h}_t = actNode({xc}_t + W_c * dot(r_t, hidden_{prev}) + bias_c) \\ -output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_{prev}) +update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\ +reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\ +output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\ +output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t) $$ -The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp. +which is same as one time step of GRU Operator. + +@note To implement the complete GRU unit, fully-connected operator must be +used before to feed xu, xr and xc as the Input of GRUUnit operator. )DOC"); } @@ -150,12 +151,6 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { "ResetHiddenPrev"); PADDLE_ENFORCE(ctx->HasInput("Hidden"), "Input(%s) of GRUUnitGradOp should not be null.", "Hidden"); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Gate")), - "Input(%s@GRAD) of GRUUnitGradOp should not be null.", - "Gate"); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("ResetHiddenPrev")), - "Input(%s@GRAD) of GRUUnitGradOp should not be null.", - "ResetHiddenPrev"); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")), "Input(%s@GRAD) of GRUUnitGradOp should not be null.", "Hidden"); diff --git a/paddle/operators/gru_unit_op.h b/paddle/operators/gru_unit_op.h index c53e7d9827e0395e6ce613302e732b2797f83cdd..3398c0934e250cfc292776d08773204bb9b4d87e 100644 --- a/paddle/operators/gru_unit_op.h +++ b/paddle/operators/gru_unit_op.h @@ -28,6 +28,10 @@ template using EigenMatrix = framework::EigenMatrix; +template +using EigenVector = framework::EigenVector; + enum GRUActivationType { identity = 0, sigmoid = 1, tanh = 2, relu = 3 }; template @@ -110,7 +114,7 @@ class GRUUnitKernel : public framework::OpKernel { auto c = g.slice(c_offsets, extents); // output candidate // calculate final output - h.device(place) = u * (h_p - c) + c; + h.device(place) = u * (c - h_p) + h_p; } }; @@ -146,35 +150,27 @@ class GRUUnitGradKernel : public framework::OpKernel { auto* weight_grad = context.Output(framework::GradVarName("Weight")); auto* bias_grad = context.Output(framework::GradVarName("Bias")); - input_grad->mutable_data(context.GetPlace()); - hidden_prev_grad->mutable_data(context.GetPlace()); - weight_grad->mutable_data(context.GetPlace()); Tensor gate_grad; - gate_grad.mutable_data(input->dims(), context.GetPlace()); Tensor reset_hidden_prev_grad; - reset_hidden_prev_grad.mutable_data(reset_hidden_prev->dims(), - context.GetPlace()); - - int batch_size = input->dims()[0]; - int frame_size = hidden_prev->dims()[1]; const T* hidden_prev_data = hidden_prev->data(); - T* hidden_prev_grad_data = hidden_prev_grad->data(); const T* weight_data = weight->data(); - T* weight_grad_data = weight_grad->data(); - T* gate_grad_data = gate_grad.data(); + T* gate_grad_data = + gate_grad.mutable_data(input->dims(), context.GetPlace()); const T* reset_hidden_prev_data = reset_hidden_prev->data(); - T* reset_hidden_prev_grad_data = reset_hidden_prev_grad.data(); + T* reset_hidden_prev_grad_data = reset_hidden_prev_grad.mutable_data( + reset_hidden_prev->dims(), context.GetPlace()); auto h_p = EigenMatrix::From(*hidden_prev); auto g = EigenMatrix::From(*gate); auto d_h = EigenMatrix::From(*hidden_grad); - auto d_x = EigenMatrix::From(*input_grad); - auto d_h_p = EigenMatrix::From(*hidden_prev_grad); auto d_g = EigenMatrix::From(gate_grad); auto d_r_h_p = EigenMatrix::From(reset_hidden_prev_grad); auto place = context.GetEigenDevice(); + int batch_size = input->dims()[0]; + int frame_size = hidden_prev->dims()[1]; + Eigen::array extents({{batch_size, frame_size}}); Eigen::array u_offsets({{0, 0}}); auto u = g.slice(u_offsets, extents); // update gate @@ -185,42 +181,56 @@ class GRUUnitGradKernel : public framework::OpKernel { // backward for unactivated update gate ActGradCompute(context.Attr("gate_activation"), place, u, u, - d_g.slice(u_offsets, extents), d_h * (h_p - c)); + d_g.slice(u_offsets, extents), d_h * (c - h_p)); // backward for unactivated output candidate ActGradCompute(context.Attr("activation"), place, c, c, - d_g.slice(c_offsets, extents), d_h * (u.constant(T(1)) - u)); + d_g.slice(c_offsets, extents), d_h * u); // backward for reset_hidden_prev math::gemm(context.device_context(), false, true, batch_size, frame_size, frame_size, 1, gate_grad_data + frame_size * 2, frame_size * 3, weight_data + frame_size * frame_size * 2, frame_size, 0, reset_hidden_prev_grad_data, frame_size); - // backward for state_weight - math::gemm( - context.device_context(), true, false, frame_size, frame_size, - batch_size, 1, reset_hidden_prev_data, frame_size, - gate_grad_data + frame_size * 2, frame_size * 3, 0, - weight_grad_data + frame_size * frame_size * 2, frame_size); // backward for unactivated reset gate ActGradCompute(context.Attr("gate_activation"), place, r, r, d_g.slice(r_offsets, extents), d_r_h_p * h_p); - // backward for update_gate_weight and reset_gate_weight - math::gemm(context.device_context(), true, false, frame_size, - frame_size * 2, batch_size, 1, hidden_prev_data, - frame_size, gate_grad_data, frame_size * 3, 0, - weight_grad_data, frame_size * 2); + // backward for weight + if (weight_grad) { + T* weight_grad_data = weight_grad->mutable_data(context.GetPlace()); + // backward for state_weight + math::gemm( + context.device_context(), true, false, frame_size, frame_size, + batch_size, 1, reset_hidden_prev_data, frame_size, + gate_grad_data + frame_size * 2, frame_size * 3, 0, + weight_grad_data + frame_size * frame_size * 2, frame_size); + + // backward for update_gate_weight and reset_gate_weight + math::gemm(context.device_context(), true, false, frame_size, + frame_size * 2, batch_size, 1, hidden_prev_data, + frame_size, gate_grad_data, frame_size * 3, 0, + weight_grad_data, frame_size * 2); + } // backward for hidden_prev - d_h_p.device(place) = d_r_h_p * r + d_h * u; - math::gemm(context.device_context(), false, true, batch_size, - frame_size, frame_size * 2, 1, gate_grad_data, - frame_size * 3, weight_data, frame_size * 2, 1, - hidden_prev_grad_data, frame_size); + if (hidden_prev_grad) { + T* hidden_prev_grad_data = + hidden_prev_grad->mutable_data(context.GetPlace()); + auto d_h_p = EigenMatrix::From(*hidden_prev_grad); + d_h_p.device(place) = d_r_h_p * r + d_h * (u.constant(T(1)) - u); + math::gemm(context.device_context(), false, true, batch_size, + frame_size, frame_size * 2, 1, gate_grad_data, + frame_size * 3, weight_data, frame_size * 2, 1, + hidden_prev_grad_data, frame_size); + } // backward for input - d_x.device(place) = d_g; + if (input_grad) { + input_grad->mutable_data(context.GetPlace()); + auto d_x = EigenMatrix::From(*input_grad); + d_x.device(place) = d_g; + } // backward for bias if (bias_grad) { bias_grad->mutable_data(context.GetPlace()); - auto d_b = EigenMatrix::From(*bias_grad); + auto d_b = EigenVector::Flatten(*bias_grad); d_b.device(place) = d_g.sum(Eigen::array({{0}})); } } diff --git a/paddle/operators/huber_loss_op.cc b/paddle/operators/huber_loss_op.cc index 3435e74b0afb470fcbd1c0f4e06ad363352cac00..938803d5b36177c782fe40bc34fd92504e5bbf7b 100644 --- a/paddle/operators/huber_loss_op.cc +++ b/paddle/operators/huber_loss_op.cc @@ -70,11 +70,18 @@ input value and Y as the target value. Huber loss can evaluate the fitness of X to Y. Different from MSE loss, Huber loss is more robust for outliers. The shape of X and Y are [batch_size, 1]. The equation is: -L_{\delta}(y, f(x)) = +$$ +Out_{\delta}(X, Y)_i = \begin{cases} -0.5 * (y - f(x))^2, \quad |y - f(x)| \leq \delta \\ -\delta * (|y - f(x)| - 0.5 * \delta), \quad otherwise +0.5 * (Y_i - X_i)^2, +\quad |Y_i - X_i| \leq \delta \\ +\delta * (|Y_i - X_i| - 0.5 * \delta), +\quad otherwise \end{cases} +$$ + +In the above equation, $Out_\delta(X, Y)_i$, $X_i$ and $Y_i$ represent the ith +element of Out, X and Y. )DOC"); } diff --git a/paddle/operators/linear_chain_crf_op.cc b/paddle/operators/linear_chain_crf_op.cc index 066bdf67aa037e9c25cfdfaff7ec8771eb59cde8..8e079a14e0a15e8ff803b6087e6b0b02083479ef 100644 --- a/paddle/operators/linear_chain_crf_op.cc +++ b/paddle/operators/linear_chain_crf_op.cc @@ -32,19 +32,19 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { "[(D + 2) x D]. The learnable parameter for the linear_chain_crf " "operator. See more details in the operator's comments."); AddInput("Label", - "(LoDTensor, default LoDTensor) A LoDTensor with shape " + "(LoDTensor, default LoDTensor) A LoDTensor with shape " "[N x 1], where N is the total element number in a mini-batch. " "The ground truth."); AddOutput( "Alpha", "(Tensor, default Tensor) A 2-D Tensor with shape [N x D]. " - "The forward vectors for the entire batch. Denote it as \f$\alpha\f$. " - "\f$\alpha$\f is a memo table used to calculate the normalization " - "factor in CRF. \f$\alpha[k, v]$\f stores the unnormalized " + "The forward vectors for the entire batch. Denote it as $\alpha$. " + "$\alpha$ is a memo table used to calculate the normalization " + "factor in CRF. $\alpha[k, v]$ stores the unnormalized " "probabilites of all possible unfinished sequences of tags that end at " - "position \f$k$\f with tag \f$v$\f. For each \f$k$\f, " - "\f$\alpha[k, v]$\f is a vector of length \f$D$\f with a component for " - "each tag value \f$v$\f. This vector is called a forward vecotr and " + "position $k$ with tag $v$. For each $k$, " + "$\alpha[k, v]$ is a vector of length $D$ with a component for " + "each tag value $v$. This vector is called a forward vecotr and " "will also be used in backward computations.") .AsIntermediate(); AddOutput( @@ -73,9 +73,9 @@ LinearChainCRF Operator. Conditional Random Field defines an undirected probabilistic graph with nodes denoting random variables and edges denoting dependencies between these -variables. CRF learns the conditional probability \f$P(Y|X)\f$, where -\f$X = (x_1, x_2, ... , x_n)\f$ are structured inputs and -\f$Y = (y_1, y_2, ... , y_n)\f$ are labels for the inputs. +variables. CRF learns the conditional probability $P(Y|X)$, where +$X = (x_1, x_2, ... , x_n)$ are structured inputs and +$Y = (y_1, y_2, ... , y_n)$ are labels for the inputs. Linear chain CRF is a special case of CRF that is useful for sequence labeling task. Sequence labeling tasks do not assume a lot of conditional @@ -88,21 +88,22 @@ CRF. Please refer to http://www.cs.columbia.edu/~mcollins/fb.pdf and http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for details. Equation: -1. Denote Input(Emission) to this operator as \f$x\f$ here. +1. Denote Input(Emission) to this operator as $x$ here. 2. The first D values of Input(Transition) to this operator are for starting -weights, denoted as \f$a\f$ here. +weights, denoted as $a$ here. 3. The next D values of Input(Transition) of this operator are for ending -weights, denoted as \f$b\f$ here. +weights, denoted as $b$ here. 4. The remaning values of Input(Transition) are for transition weights, -denoted as \f$w\f$ here. -5. Denote Input(Label) as \f$s\f$ here. - -The probability of a sequence \f$s\f$ of length \f$L\f$ is defined as: -\f$P(s) = (1/Z) \exp(a_{s_1} + b_{s_L} - + \sum_{l=1}^L x_{s_l} - + \sum_{l=2}^L w_{s_{l-1},s_l})\f$ -where \f$Z\f$ is a normalization value so that the sum of \f$P(s)\f$ over -all possible sequences is \f$1\f$, and \f$x\f$ is the emission feature weight +denoted as $w$ here. +5. Denote Input(Label) as $s$ here. + +The probability of a sequence $s$ of length $L$ is defined as: +$$P(s) = (1/Z) \exp(a_{s_1} + b_{s_L} + + \sum_{l=1}^L x_{s_l} + + \sum_{l=2}^L w_{s_{l-1},s_l})$$ + +where $Z$ is a normalization value so that the sum of $P(s)$ over +all possible sequences is 1, and $x$ is the emission feature weight to the linear chain CRF. Finally, the linear chain CRF operator outputs the logarithm of the conditional diff --git a/paddle/operators/linear_chain_crf_op.h b/paddle/operators/linear_chain_crf_op.h index ddf73981751798c72cef08f2dd5c87580b45aec3..014bbfa7580011e38a2f546e30d1e584965a7815 100644 --- a/paddle/operators/linear_chain_crf_op.h +++ b/paddle/operators/linear_chain_crf_op.h @@ -195,7 +195,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel { auto copyLoDTensor = [](const platform::DeviceContext& ctx, const LoDTensor& src, LoDTensor* dst) { dst->mutable_data(src.dims(), platform::CPUPlace()); - dst->CopyFrom(src, platform::CPUPlace(), ctx); + framework::CopyFrom(src, platform::CPUPlace(), ctx, dst); }; copyLoDTensor(ctx, emission_weights_src, emission_weights_dst); @@ -203,8 +203,8 @@ class LinearChainCRFOpKernel : public framework::OpKernel { transition_weights_dst->mutable_data(transition_weights_src.dims(), platform::CPUPlace()); - transition_weights_dst->CopyFrom(transition_weights_src, - platform::CPUPlace(), ctx); + framework::CopyFrom(transition_weights_src, platform::CPUPlace(), ctx, + transition_weights_dst); } void CopyOutputsToGpuMemory(const platform::DeviceContext& ctx, @@ -219,7 +219,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel { auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor& src, Tensor* dst) { dst->mutable_data(platform::GPUPlace()); - dst->CopyFrom(src, platform::GPUPlace(), ctx); + framework::CopyFrom(src, platform::GPUPlace(), ctx, dst); }; copyTensor(ctx, emission_exps_src, emission_exps_dst); copyTensor(ctx, transition_exps_src, transition_exps_dst); @@ -271,7 +271,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel { ll -= std::log(sum); // Now ll is equal to -log(Z). - const int* lbl = label.data(); + const int64_t* lbl = label.data(); PADDLE_ENFORCE_LT( static_cast(*std::max_element(lbl, lbl + seq_length)), tag_num, "An invalid tag label that execesses the largest tag number."); @@ -410,12 +410,12 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { // Copy the inputs from GPU memory to CPU memory when this operators runs on // GPU device. label_dst->mutable_data(label_src.dims(), platform::CPUPlace()); - label_dst->CopyFrom(label_src, platform::CPUPlace(), ctx); + framework::CopyFrom(label_src, platform::CPUPlace(), ctx, label_dst); auto copyTensor = [](const platform::DeviceContext& ctx, const Tensor& src, Tensor* dst) { dst->mutable_data(src.dims(), platform::CPUPlace()); - dst->CopyFrom(src, platform::CPUPlace(), ctx); + framework::CopyFrom(src, platform::CPUPlace(), ctx, dst); }; copyTensor(ctx, emission_exps_src, emission_exps_dst); copyTensor(ctx, transition_exps_src, transition_exps_dst); @@ -434,7 +434,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { Tensor* dst) { if (src && dst) { dst->mutable_data(platform::GPUPlace()); - dst->CopyFrom(*src, platform::GPUPlace(), ctx); + framework::CopyFrom(*src, platform::GPUPlace(), ctx, dst); } }; copyTensor(ctx, emission_grad_src, emission_grad_dst); @@ -449,7 +449,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { Tensor* emission_grad) const { const T* w_exps = transition_exps.data(); const T* x_exps = emission_exps.data(); - const int* label_value = label.data(); + const int64_t* label_value = label.data(); T* beta_value = beta->data(); auto x_dims = emission_exps.dims(); diff --git a/paddle/operators/load_op.cc b/paddle/operators/load_op.cc index b71a33a6b1ce80b545e6d7a4020dafc941dc55d2..b0838eed1611c1d51e57fc2300606f753982dc89 100644 --- a/paddle/operators/load_op.cc +++ b/paddle/operators/load_op.cc @@ -105,7 +105,7 @@ class LoadOp : public framework::OperatorBase { out_var->Clear(); tensor = out_var->GetMutable(); tensor->set_lod(cpu_tensor.lod()); - tensor->CopyFrom(cpu_tensor, place, dev_ctx); + CopyFrom(cpu_tensor, place, dev_ctx, tensor); } } }; diff --git a/paddle/operators/lod_reset_op.h b/paddle/operators/lod_reset_op.h index 2bb916ccee80c83a02ea429fe95f5fafc86ccfa6..cbcbf80adc3cf68f9eb28bbe2a69168cc8798347 100644 --- a/paddle/operators/lod_reset_op.h +++ b/paddle/operators/lod_reset_op.h @@ -33,7 +33,8 @@ class LoDResetKernel : public framework::OpKernel { auto* lod = lod_t->data(); if (platform::is_gpu_place(ctx.GetPlace())) { framework::Tensor lod_cpu; - lod_cpu.CopyFrom(*lod_t, platform::CPUPlace(), ctx.device_context()); + framework::CopyFrom(*lod_t, platform::CPUPlace(), ctx.device_context(), + &lod_cpu); lod = lod_cpu.data(); } level0 = std::vector(lod, lod + lod_t->numel()); diff --git a/paddle/operators/lod_tensor_to_array_op.cc b/paddle/operators/lod_tensor_to_array_op.cc index 58af35564d83b9699af4f7783fb6367ff9590682..010c79d4e153463d4b2e48e5fd798d3bc4febaf1 100644 --- a/paddle/operators/lod_tensor_to_array_op.cc +++ b/paddle/operators/lod_tensor_to_array_op.cc @@ -81,11 +81,11 @@ class LoDTensorToArrayOp : public framework::OperatorBase { continue; } // out[i][offset: offset+len] = x[each_range.begin: each_range.end] - out[i] - .Slice(static_cast(offset), static_cast(offset + len)) - .CopyFrom(x.Slice(static_cast(each_range.begin), - static_cast(each_range.end)), - x.place(), dev_ctx); + auto slice = out[i].Slice(static_cast(offset), + static_cast(offset + len)); + framework::CopyFrom(x.Slice(static_cast(each_range.begin), + static_cast(each_range.end)), + x.place(), dev_ctx, &slice); offset += len; } } diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt index 002b68fecf4f1e294387357f0346d9926a2b2b5a..3017f133afc5d4dcd484c78b44591a876ab4d667 100644 --- a/paddle/operators/math/CMakeLists.txt +++ b/paddle/operators/math/CMakeLists.txt @@ -14,6 +14,7 @@ if(WITH_GPU) nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context) nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions) nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function) + nv_library(maxouting SRCS maxouting.cc maxouting.cu DEPS device_context) else() cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context framework_proto) cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function) @@ -26,6 +27,7 @@ else() cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context) cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions) cc_library(gru_compute SRCS gru_compute.cc DEPS device_context activation_functions math_function) + cc_library(maxouting SRCS maxouting.cc DEPS device_context) endif() cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) diff --git a/paddle/operators/math/context_project.h b/paddle/operators/math/context_project.h index 72f4202bace4461d2597204feaa2a21e355bd1ac..d853507188cf8c80aede1e7646736036e30c9678 100644 --- a/paddle/operators/math/context_project.h +++ b/paddle/operators/math/context_project.h @@ -149,7 +149,7 @@ class ContextProjectFunctor { Tensor out_t_sub = out_t.Slice(k * context_length, k * context_length + padding_size); Tensor w_sub = padding_data.Slice(k, k + padding_size); - out_t_sub.CopyFrom(w_sub, context.GetPlace(), context); + framework::CopyFrom(w_sub, context.GetPlace(), context, &out_t_sub); } } if (down_pad > 0) { // add down pad @@ -179,7 +179,7 @@ class ContextProjectFunctor { (down_pad_begin_row + t) * context_length); Tensor w_sub = padding_data.Slice( up_pad + padding_idx, up_pad + padding_idx + padding_size); - out_t_sub.CopyFrom(w_sub, context.GetPlace(), context); + framework::CopyFrom(w_sub, context.GetPlace(), context, &out_t_sub); } } out_t.Resize({sequence_height, context_length * sequence_width}); diff --git a/paddle/operators/math/im2col.h b/paddle/operators/math/im2col.h index deb60051beef56437cf75f0fa2cef90bbc0a209a..24fd9a06e9f5fbd50483429379cf3f46ff88bcaa 100644 --- a/paddle/operators/math/im2col.h +++ b/paddle/operators/math/im2col.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/framework/tensor.h" +#include "paddle/framework/tensor_util.h" #include "paddle/platform/device_context.h" namespace paddle { diff --git a/paddle/operators/math/im2col_test.cc b/paddle/operators/math/im2col_test.cc index 10c28da72ba9d3b94bb59c5cf00e7f5a2f28fd06..ae197a97ed8aa089b51be77a59a8ba6a98ac70ec 100644 --- a/paddle/operators/math/im2col_test.cc +++ b/paddle/operators/math/im2col_test.cc @@ -74,7 +74,7 @@ void testIm2col() { if (paddle::platform::is_cpu_place(*place)) { input = input_tmp; } else { - input.CopyFrom(input_tmp, *place, *context); + CopyFrom(input_tmp, *place, *context, &input); } output_cfo.mutable_data( {1, filter_size, filter_size, output_height, output_width}, *place); @@ -99,7 +99,7 @@ void testIm2col() { if (paddle::platform::is_cpu_place(*place)) { out_cfo_ptr = output_cfo.data(); } else { - output_tmp.CopyFrom(output_cfo, paddle::platform::CPUPlace(), *context); + CopyFrom(output_cfo, paddle::platform::CPUPlace(), *context, &output_tmp); out_cfo_ptr = output_tmp.data(); } for (int i = 0; i < 6; ++i) { @@ -110,7 +110,7 @@ void testIm2col() { if (paddle::platform::is_cpu_place(*place)) { out_ocf_ptr = output_ocf.data(); } else { - output_tmp.CopyFrom(output_ocf, paddle::platform::CPUPlace(), *context); + CopyFrom(output_ocf, paddle::platform::CPUPlace(), *context, &output_tmp); out_ocf_ptr = output_tmp.data(); } for (int i = 0; i < 6; ++i) { @@ -130,7 +130,7 @@ void testIm2col() { if (paddle::platform::is_cpu_place(*place)) { input = input_tmp; } else { - input.CopyFrom(input_tmp, *place, *context); + CopyFrom(input_tmp, *place, *context, &input); } col2im(*context, output_cfo, dilation, stride, padding, &input); @@ -139,7 +139,7 @@ void testIm2col() { if (paddle::platform::is_cpu_place(*place)) { in_ptr = input.data(); } else { - input_tmp.CopyFrom(input, paddle::platform::CPUPlace(), *context); + CopyFrom(input, paddle::platform::CPUPlace(), *context, &input_tmp); in_ptr = input_tmp.data(); } for (int i = 0; i < 6; ++i) { @@ -151,7 +151,7 @@ void testIm2col() { if (paddle::platform::is_cpu_place(*place)) { input = input_tmp; } else { - input.CopyFrom(input_tmp, *place, *context); + CopyFrom(input_tmp, *place, *context, &input); } col2im_ocf(*context, output_ocf, dilation, stride, padding, &input); @@ -159,7 +159,7 @@ void testIm2col() { if (paddle::platform::is_cpu_place(*place)) { in_ptr = input.data(); } else { - input_tmp.CopyFrom(input, paddle::platform::CPUPlace(), *context); + CopyFrom(input, paddle::platform::CPUPlace(), *context, &input_tmp); in_ptr = input_tmp.data(); } for (int i = 0; i < 6; ++i) { diff --git a/paddle/operators/math/math_function.cu b/paddle/operators/math/math_function.cu index 58356a4b7783241ca0292829bf05dc1a8ed80c6c..3018e50a4f54592123df6b9cadd45ce525d7b3e1 100644 --- a/paddle/operators/math/math_function.cu +++ b/paddle/operators/math/math_function.cu @@ -297,7 +297,25 @@ void set_constant_with_place( template struct RowwiseAdd; template struct RowwiseAdd; template struct ColwiseSum; -template struct ColwiseSum; +// template struct ColwiseSum; +// The ColwiseSum failed in debug mode, +// and only failed for this case. So reimplemented it. +template <> +void ColwiseSum::operator()( + const platform::DeviceContext& context, const framework::Tensor& input, + framework::Tensor* vector) { + auto in_dims = input.dims(); + auto size = input.numel() / in_dims[0]; + PADDLE_ENFORCE_EQ(vector->numel(), size); + framework::Tensor one; + one.mutable_data({in_dims[0]}, context.GetPlace()); + SetConstant set; + set(context, &one, static_cast(1.0)); + gemv(context, true, static_cast(in_dims[0]), + static_cast(in_dims[1]), 1.0, + input.data(), one.data(), + 0.0, vector->data()); +} } // namespace math } // namespace operators diff --git a/paddle/operators/math/math_function.h b/paddle/operators/math/math_function.h index ffb99f53808c4316ede96b04e57aec4dae4134de..5a42854f22234629b3405ec2397143ef761a9d08 100644 --- a/paddle/operators/math/math_function.h +++ b/paddle/operators/math/math_function.h @@ -49,6 +49,7 @@ int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda, #include "paddle/framework/eigen.h" #include "paddle/framework/tensor.h" +#include "paddle/framework/tensor_util.h" #include "paddle/platform/device_context.h" #include "paddle/platform/enforce.h" diff --git a/paddle/operators/math/math_function_test.cu b/paddle/operators/math/math_function_test.cu index 780d17ffc6539c5f4d67ebab5476d6f646840b41..d5d6f0c73bc6bce7a74db2c98fa9f884a0bcd9a2 100644 --- a/paddle/operators/math/math_function_test.cu +++ b/paddle/operators/math/math_function_test.cu @@ -16,15 +16,15 @@ TEST(math_function, notrans_mul_trans) { auto* gpu_place = new paddle::platform::GPUPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); - input1_gpu.CopyFrom(input1, *gpu_place, context); - input2_gpu.CopyFrom(input1, *gpu_place, context); + paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); + paddle::framework::CopyFrom(input1, *gpu_place, context, &input2_gpu); out_gpu.mutable_data({2, 2}, *gpu_place); paddle::operators::math::matmul( context, input1_gpu, false, input2_gpu, true, 1, &out_gpu, 0); - out.CopyFrom(out_gpu, *cpu_place, context); + paddle::framework::CopyFrom(out_gpu, *cpu_place, context, &out); float* out_ptr = out.data(); context.Wait(); @@ -50,15 +50,15 @@ TEST(math_function, trans_mul_notrans) { auto* gpu_place = new paddle::platform::GPUPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); - input1_gpu.CopyFrom(input1, *gpu_place, context); - input2_gpu.CopyFrom(input1, *gpu_place, context); + paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); + paddle::framework::CopyFrom(input1, *gpu_place, context, &input2_gpu); out_gpu.mutable_data({3, 3}, *gpu_place); paddle::operators::math::matmul( context, input1_gpu, true, input2_gpu, false, 1, &out_gpu, 0); - out.CopyFrom(out_gpu, *cpu_place, context); + paddle::framework::CopyFrom(out_gpu, *cpu_place, context, &out); float* out_ptr = out.data(); context.Wait(); @@ -99,9 +99,9 @@ TEST(math_function, gemm_notrans_cublas) { auto* gpu_place = new paddle::platform::GPUPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); - input1_gpu.CopyFrom(input1, *gpu_place, context); - input2_gpu.CopyFrom(input2, *gpu_place, context); - input3_gpu.CopyFrom(input3, *gpu_place, context); + paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); + paddle::framework::CopyFrom(input2, *gpu_place, context, &input2_gpu); + paddle::framework::CopyFrom(input3, *gpu_place, context, &input3_gpu); float* a = input1_gpu.data(); float* b = input2_gpu.data(); float* c = input3_gpu.mutable_data(*gpu_place); @@ -109,7 +109,7 @@ TEST(math_function, gemm_notrans_cublas) { paddle::operators::math::gemm( context, false, false, m, n, k, 1, a, 3, b + 1, 4, 1, c + 1, 4); - input3.CopyFrom(input3_gpu, *cpu_place, context); + paddle::framework::CopyFrom(input3_gpu, *cpu_place, context, &input3); // numpy code: // a = np.arange(6).reshape(2, 3) @@ -154,9 +154,9 @@ TEST(math_function, gemm_trans_cublas) { auto* gpu_place = new paddle::platform::GPUPlace(0); paddle::platform::CUDADeviceContext context(*gpu_place); - input1_gpu.CopyFrom(input1, *gpu_place, context); - input2_gpu.CopyFrom(input2, *gpu_place, context); - input3_gpu.CopyFrom(input3, *gpu_place, context); + paddle::framework::CopyFrom(input1, *gpu_place, context, &input1_gpu); + paddle::framework::CopyFrom(input2, *gpu_place, context, &input2_gpu); + paddle::framework::CopyFrom(input3, *gpu_place, context, &input3_gpu); float* a = input1_gpu.data(); float* b = input2_gpu.data(); float* c = input3_gpu.mutable_data(*gpu_place); @@ -164,7 +164,7 @@ TEST(math_function, gemm_trans_cublas) { paddle::operators::math::gemm( context, false, true, m, n, k, 1, a, 3, b + 3, 3, 1, c + 1, 4); - input3.CopyFrom(input3_gpu, *cpu_place, context); + paddle::framework::CopyFrom(input3_gpu, *cpu_place, context, &input3); context.Wait(); EXPECT_EQ(input3_ptr[0], 0); @@ -205,14 +205,15 @@ void GemvTest(int m, int n, bool trans) { } paddle::platform::CUDADeviceContext context(*gpu_place); - g_mat_a.CopyFrom(mat_a, *gpu_place, context); - g_vec_b.CopyFrom(vec_b, *gpu_place, context); + paddle::framework::CopyFrom(mat_a, *gpu_place, context, &g_mat_a); + paddle::framework::CopyFrom(vec_b, *gpu_place, context, &g_vec_b); paddle::operators::math::gemv( context, trans, static_cast(m), static_cast(n), 1., g_data_a, g_data_b, 0., g_data_c); - vec_c.CopyFrom(g_vec_c, paddle::platform::CPUPlace(), context); + paddle::framework::CopyFrom(g_vec_c, paddle::platform::CPUPlace(), context, + &vec_c); if (!trans) { for (int i = 0; i < m; ++i) { diff --git a/paddle/operators/math/maxouting.cc b/paddle/operators/math/maxouting.cc new file mode 100644 index 0000000000000000000000000000000000000000..c9003962d33b70b8e21a0d6b78bf5a77981df409 --- /dev/null +++ b/paddle/operators/math/maxouting.cc @@ -0,0 +1,101 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/maxouting.h" + +namespace paddle { +namespace operators { +namespace math { + +// All tensors are in NCHW format, and the groups must be greater than 1 +template +class MaxOutFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& input, framework::Tensor* output, + int groups) { + const int batch_size = input.dims()[0]; + const int input_height = input.dims()[2]; + const int input_width = input.dims()[3]; + const int output_channels = output->dims()[1]; + int fea_size = input_height * input_width; + // c_size means the output size of each sample + int c_size = fea_size * output_channels; + const T* input_data = input.data(); + T* output_data = output->mutable_data(context.GetPlace()); + + for (int i = 0; i < batch_size; ++i) { + int new_bindex = c_size * i; + for (int c = 0; c < output_channels; ++c) { + int new_cindex = fea_size * c; + for (int f = 0; f < fea_size; ++f) { + T ele = static_cast(-FLT_MAX); + for (int ph = 0; ph < groups; ++ph) { + T x = input_data[(new_bindex + new_cindex) * groups + + ph * fea_size + f]; + ele = ele > x ? ele : x; + } + output_data[(new_bindex + new_cindex + f)] = ele; + } + } + } + } +}; + +template +class MaxOutGradFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& input, framework::Tensor* input_grad, + const framework::Tensor& output, + const framework::Tensor& output_grad, int groups) { + const int batch_size = input.dims()[0]; + const int input_height = input.dims()[2]; + const int input_width = input.dims()[3]; + const int output_channels = output.dims()[1]; + int fea_size = input_height * input_width; + const T* input_data = input.data(); + const T* output_data = output.data(); + const T* output_grad_data = output_grad.data(); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + + for (int i = 0; i < batch_size; ++i) { + int blen = fea_size * output_channels * i; + for (int c = 0; c < output_channels; ++c) { + int clen = fea_size * c; + for (int f = 0; f < fea_size; ++f) { + int input_idx0 = (blen + clen) * groups + f; + bool continue_match = true; + int output_idx = blen + clen + f; + for (int g = 0; g < groups && continue_match; ++g) { + int input_idx = input_idx0 + fea_size * g; + if (input_data[input_idx] == output_data[output_idx]) { + input_grad_data[input_idx] += output_grad_data[output_idx]; + continue_match = false; + } + } + } + } + } + } +}; + +template class MaxOutGradFunctor; +template class MaxOutGradFunctor; +template class MaxOutFunctor; +template class MaxOutFunctor; + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/maxouting.cu b/paddle/operators/math/maxouting.cu new file mode 100644 index 0000000000000000000000000000000000000000..c3fabcae081e24d92d50d0e2a2cad4a2e9872125 --- /dev/null +++ b/paddle/operators/math/maxouting.cu @@ -0,0 +1,152 @@ +/* Copyright (c) 2016 paddlepaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/math/maxouting.h" +#include "paddle/platform/cuda_helper.h" + +namespace paddle { +namespace operators { +namespace math { + +template +__global__ void KernelMaxOut(const int nthreads, const T* input_data, + const int channels, const int input_height, + const int input_width, int groups, + T* output_data) { + const int size = input_height * input_width * channels / groups; + const int feat_len = input_height * input_width; + int index = blockIdx.x * blockDim.x + threadIdx.x; + int offset = blockDim.x * gridDim.x; + for (int i = index; i < nthreads; i += offset) { + int batch_idx = i / size; + int batch_offset = i % size; + int channel_idx = batch_offset / feat_len; + int feat_idx = batch_offset % feat_len; + int data_idx = + (batch_idx * size + channel_idx * feat_len) * groups + feat_idx; + T ele = static_cast(-FLT_MAX); + for (int g = 0; g < groups; ++g) { + T x = input_data[data_idx + g * feat_len]; + ele = ele > x ? ele : x; + } + output_data[i] = ele; + } +} +template +__global__ void KernelMaxoutGrad(const int nthreads, const T* input_data, + const T* output_data, const T* output_grad, + T* input_grad, const int channels, + const int input_height, const int input_width, + int groups) { + const int size = input_height * input_width * channels / groups; + const int feat_len = input_height * input_width; + int index = blockIdx.x * blockDim.x + threadIdx.x; + int offset = blockDim.x * gridDim.x; + for (int i = index; i < nthreads; i += offset) { + int batch_idx = i / size; + int batch_offset = i % size; + int channel_idx = batch_offset / feat_len; + int feat_idx = batch_offset % feat_len; + int data_idx = + (batch_idx * size + channel_idx * feat_len) * groups + feat_idx; + int max_index = -1; + bool continue_match = true; + for (int g = 0; g < groups && continue_match; ++g) { + if (input_data[data_idx + g * feat_len] == output_data[i]) { + max_index = data_idx + g * feat_len; + continue_match = false; + break; + } + } + if (max_index != -1) { + input_grad[max_index] += output_grad[index]; + } + } +} +/* + * All tensors are in NCHW format. + */ +template +class MaxOutFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& input, framework::Tensor* output, + int groups) { + const int batch_size = input.dims()[0]; + const int input_channels = input.dims()[1]; + const int input_height = input.dims()[2]; + const int input_width = input.dims()[3]; + const int output_channels = output->dims()[1]; + const int output_height = output->dims()[2]; + const int output_width = output->dims()[3]; + + const T* input_data = input.data(); + T* output_data = output->mutable_data(context.GetPlace()); + int nthreads = output->numel(); + int blocks = (nthreads + 1024 - 1) / 1024; + dim3 threads(1024, 1); + dim3 grid(blocks, 1); + + KernelMaxOut< + T><<(context) + .stream()>>>(nthreads, input_data, input_channels, + input_height, input_width, groups, output_data); + } +}; +/* + * All tensors are in NCHW format. + */ +template +class MaxOutGradFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& input, framework::Tensor* input_grad, + const framework::Tensor& output, + const framework::Tensor& output_grad, int groups) { + const int batch_size = input.dims()[0]; + const int input_channels = input.dims()[1]; + const int input_height = input.dims()[2]; + const int input_width = input.dims()[3]; + const int output_channels = output.dims()[1]; + const int output_height = output.dims()[2]; + const int output_width = output.dims()[3]; + + const T* input_data = input.data(); + const T* output_data = output.data(); + const T* output_grad_data = output_grad.data(); + T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + int nthreads = output.numel(); + int blocks = (nthreads + 1024 - 1) / 1024; + dim3 threads(1024, 1); + dim3 grid(blocks, 1); + + KernelMaxoutGrad< + T><<(context) + .stream()>>>(nthreads, input_data, output_data, + output_grad_data, input_grad_data, input_channels, + input_height, input_width, groups); + } +}; + +template class MaxOutGradFunctor; +template class MaxOutGradFunctor; + +template class MaxOutFunctor; +template class MaxOutFunctor; + +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/maxouting.h b/paddle/operators/math/maxouting.h new file mode 100644 index 0000000000000000000000000000000000000000..2d9069b0b3ca3e7bad3b21a46985c52ef00f50e6 --- /dev/null +++ b/paddle/operators/math/maxouting.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/tensor.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/hostdevice.h" + +namespace paddle { +namespace operators { +namespace math { + +#define FLT_MAX __FLT_MAX__ + +template + +class MaxOutFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& input, framework::Tensor* output, + int groups); +}; + +template +class MaxOutGradFunctor { + public: + void operator()(const platform::DeviceContext& context, + const framework::Tensor& input, framework::Tensor* input_grad, + const framework::Tensor& output, + const framework::Tensor& output_grad, int groups); +}; +} // namespace math +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/math/pooling.cc b/paddle/operators/math/pooling.cc index ead89e146f32ef005b06f4f6f04224d691805d74..135984586a67f666425f81456148c3623ed7ef25 100644 --- a/paddle/operators/math/pooling.cc +++ b/paddle/operators/math/pooling.cc @@ -498,8 +498,8 @@ template class Pool3dGradFunctor< * Ksize, strides, paddings are two elements. These two elements represent * height and width, respectively. */ -template -class MaxPool2dWithIndexFunctor { +template +class MaxPool2dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& input, std::vector& ksize, @@ -520,9 +520,9 @@ class MaxPool2dWithIndexFunctor { const int input_stride = input_height * input_width; const int output_stride = output_height * output_width; - const T* input_data = input.data(); - T* output_data = output->mutable_data(context.GetPlace()); - T* mask_data = mask->mutable_data(context.GetPlace()); + const T1* input_data = input.data(); + T1* output_data = output->mutable_data(context.GetPlace()); + T2* mask_data = mask->mutable_data(context.GetPlace()); for (int i = 0; i < batch_size; i++) { for (int c = 0; c < output_channels; ++c) { @@ -535,7 +535,7 @@ class MaxPool2dWithIndexFunctor { int wend = std::min(wstart + ksize_width, input_width); wstart = std::max(wstart, 0); - T ele = static_cast(-FLT_MAX); + T1 ele = static_cast(-FLT_MAX); int index = -1; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { @@ -563,8 +563,8 @@ class MaxPool2dWithIndexFunctor { * Ksize, strides, paddings are two elements. These two elements represent * height and width, respectively. */ -template -class MaxPool2dWithIndexGradFunctor { +template +class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& output_grad, @@ -580,9 +580,9 @@ class MaxPool2dWithIndexGradFunctor { const int input_stride = input_height * input_width; const int output_stride = output_height * output_width; - const T* mask_data = mask.data(); - const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + const T2* mask_data = mask.data(); + const T1* output_grad_data = output_grad.data(); + T1* input_grad_data = input_grad->mutable_data(context.GetPlace()); for (int n = 0; n < batch_size; ++n) { for (int c = 0; c < output_channels; ++c) { @@ -602,18 +602,18 @@ class MaxPool2dWithIndexGradFunctor { } }; -template class MaxPool2dWithIndexFunctor; -template class MaxPool2dWithIndexGradFunctor; -template class MaxPool2dWithIndexFunctor; -template class MaxPool2dWithIndexGradFunctor; +template class MaxPool2dWithIndexFunctor; +template class MaxPool2dWithIndexGradFunctor; +template class MaxPool2dWithIndexFunctor; +template class MaxPool2dWithIndexGradFunctor; /* * All tensors are in NCDHW format. * Ksize, strides, paddings are three elements. These three elements represent * depth, height and width, respectively. */ -template -class MaxPool3dWithIndexFunctor { +template +class MaxPool3dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& input, std::vector& ksize, @@ -639,9 +639,9 @@ class MaxPool3dWithIndexFunctor { const int input_stride = input_depth * input_height * input_width; const int output_stride = output_depth * output_height * output_width; - const T* input_data = input.data(); - T* output_data = output->mutable_data(context.GetPlace()); - T* mask_data = mask->mutable_data(context.GetPlace()); + const T1* input_data = input.data(); + T1* output_data = output->mutable_data(context.GetPlace()); + T2* mask_data = mask->mutable_data(context.GetPlace()); for (int i = 0; i < batch_size; i++) { for (int c = 0; c < output_channels; ++c) { @@ -659,7 +659,7 @@ class MaxPool3dWithIndexFunctor { wstart = std::max(wstart, 0); int output_idx = (pd * output_height + ph) * output_width + pw; - T ele = static_cast(-FLT_MAX); + T1 ele = static_cast(-FLT_MAX); int index = -1; for (int d = dstart; d < dend; ++d) { for (int h = hstart; h < hend; ++h) { @@ -691,8 +691,8 @@ class MaxPool3dWithIndexFunctor { * Ksize, strides, paddings are three elements. These three elements represent * depth, height and width, respectively. */ -template -class MaxPool3dWithIndexGradFunctor { +template +class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& output_grad, @@ -710,9 +710,9 @@ class MaxPool3dWithIndexGradFunctor { const int input_stride = input_depth * input_height * input_width; const int output_stride = output_depth * output_height * output_width; - const T* mask_data = mask.data(); - const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + const T2* mask_data = mask.data(); + const T1* output_grad_data = output_grad.data(); + T1* input_grad_data = input_grad->mutable_data(context.GetPlace()); for (int n = 0; n < batch_size; ++n) { for (int c = 0; c < output_channels; ++c) { @@ -735,10 +735,10 @@ class MaxPool3dWithIndexGradFunctor { } }; -template class MaxPool3dWithIndexFunctor; -template class MaxPool3dWithIndexGradFunctor; -template class MaxPool3dWithIndexFunctor; -template class MaxPool3dWithIndexGradFunctor; +template class MaxPool3dWithIndexFunctor; +template class MaxPool3dWithIndexGradFunctor; +template class MaxPool3dWithIndexFunctor; +template class MaxPool3dWithIndexGradFunctor; } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/math/pooling.cu b/paddle/operators/math/pooling.cu index 6d1138ad50cb095e85b4ceb44fa81731316f10dd..ca3560f264b59057fd655084f3d43adc617c6606 100644 --- a/paddle/operators/math/pooling.cu +++ b/paddle/operators/math/pooling.cu @@ -658,13 +658,13 @@ template class Pool3dGradFunctor< template class Pool3dGradFunctor< platform::GPUPlace, paddle::operators::math::AvgPoolGrad, double>; -template +template __global__ void KernelMaxPool2dWithIdx( - const int nthreads, const T* input_data, const int channels, + const int nthreads, const T1* input_data, const int channels, const int input_height, const int input_width, const int output_height, const int output_width, const int ksize_height, const int ksize_width, const int stride_height, const int stride_width, const int padding_height, - const int padding_width, T* output_data, T* mask_data) { + const int padding_width, T1* output_data, T2* mask_data) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int pw = index % output_width; @@ -681,7 +681,7 @@ __global__ void KernelMaxPool2dWithIdx( wstart = max(wstart, 0); input_data += (batch_idx * channels + c) * input_height * input_width; - T ele = -FLT_MAX; + T1 ele = -FLT_MAX; int max_index = -1; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { @@ -697,13 +697,13 @@ __global__ void KernelMaxPool2dWithIdx( } } -template +template __global__ void KernelMaxPool2DWithIdxGrad( - const int nthreads, const T* output_grad, const T* mask_data, + const int nthreads, const T1* output_grad, const T2* mask_data, const int channels, const int input_height, const int input_width, const int output_height, const int output_width, const int ksize_height, const int ksize_width, const int stride_height, const int stride_width, - const int padding_height, const int padding_width, T* input_grad) { + const int padding_height, const int padding_width, T1* input_grad) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int w_offset = index % input_width; @@ -724,7 +724,7 @@ __global__ void KernelMaxPool2DWithIdxGrad( int pw_end = min((w_offset + padding_width) / stride_width + 1, output_width); - T gradient = 0; + T1 gradient = 0; int input_current_featuremap_idx = h_offset * input_width + w_offset; int output_idx = (batch_idx * channels + c_offset) * output_height * output_width; @@ -746,8 +746,8 @@ __global__ void KernelMaxPool2DWithIdxGrad( * Ksize, strides, paddings are two elements. These two elements represent * height and width, respectively. */ -template -class MaxPool2dWithIndexFunctor { +template +class MaxPool2dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& input, std::vector& ksize, @@ -767,9 +767,9 @@ class MaxPool2dWithIndexFunctor { const int padding_height = paddings[0]; const int padding_width = paddings[1]; - const T* input_data = input.data(); - T* output_data = output->mutable_data(context.GetPlace()); - T* mask_data = mask->mutable_data(context.GetPlace()); + const T1* input_data = input.data(); + T1* output_data = output->mutable_data(context.GetPlace()); + T2* mask_data = mask->mutable_data(context.GetPlace()); int nthreads = batch_size * output_channels * output_height * output_width; int blocks = (nthreads + 1024 - 1) / 1024; @@ -777,9 +777,9 @@ class MaxPool2dWithIndexFunctor { dim3 grid(blocks, 1); KernelMaxPool2dWithIdx< - T><<(context) - .stream()>>>( + T1, T2><<(context) + .stream()>>>( nthreads, input_data, input_channels, input_height, input_width, output_height, output_width, ksize_height, ksize_width, stride_height, stride_width, padding_height, padding_width, output_data, mask_data); @@ -791,8 +791,8 @@ class MaxPool2dWithIndexFunctor { * Ksize, strides, paddings are two elements. These two elements represent * height and width, respectively. */ -template -class MaxPool2dWithIndexGradFunctor { +template +class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& output_grad, @@ -812,9 +812,9 @@ class MaxPool2dWithIndexGradFunctor { const int padding_height = paddings[0]; const int padding_width = paddings[1]; - const T* mask_data = mask.data(); - const T* output_grad_data = output_grad.data(); - T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + const T2* mask_data = mask.data(); + const T1* output_grad_data = output_grad.data(); + T1* input_grad_data = input_grad->mutable_data(context.GetPlace()); int nthreads = batch_size * input_channels * input_height * input_width; int blocks = (nthreads + 1024 - 1) / 1024; @@ -822,30 +822,30 @@ class MaxPool2dWithIndexGradFunctor { dim3 grid(blocks, 1); KernelMaxPool2DWithIdxGrad< - T><<(context) - .stream()>>>(nthreads, output_grad_data, mask_data, - input_channels, input_height, input_width, - output_height, output_width, ksize_height, - ksize_width, stride_height, stride_width, - padding_height, padding_width, input_grad_data); + T1, T2><<(context) + .stream()>>>( + nthreads, output_grad_data, mask_data, input_channels, input_height, + input_width, output_height, output_width, ksize_height, ksize_width, + stride_height, stride_width, padding_height, padding_width, + input_grad_data); } }; -template class MaxPool2dWithIndexFunctor; -template class MaxPool2dWithIndexGradFunctor; -template class MaxPool2dWithIndexFunctor; -template class MaxPool2dWithIndexGradFunctor; +template class MaxPool2dWithIndexFunctor; +template class MaxPool2dWithIndexGradFunctor; +template class MaxPool2dWithIndexFunctor; +template class MaxPool2dWithIndexGradFunctor; -template +template __global__ void KernelMaxPool3DWithIdx( - const int nthreads, const T* input_data, const int channels, + const int nthreads, const T1* input_data, const int channels, const int input_depth, const int input_height, const int input_width, const int output_depth, const int output_height, const int output_width, const int ksize_depth, const int ksize_height, const int ksize_width, const int stride_depth, const int stride_height, const int stride_width, const int padding_depth, const int padding_height, const int padding_width, - T* output_data, T* mask_data) { + T1* output_data, T2* mask_data) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int pw = index % output_width; @@ -865,7 +865,7 @@ __global__ void KernelMaxPool3DWithIdx( hstart = max(hstart, 0); wstart = max(wstart, 0); - T ele = -FLT_MAX; + T1 ele = -FLT_MAX; int max_index = -1; input_data += (batch_idx * channels + c) * input_depth * input_height * input_width; @@ -885,15 +885,15 @@ __global__ void KernelMaxPool3DWithIdx( } } -template +template __global__ void KernelMaxPool3DWithIdxGrad( - const int nthreads, const T* output_grad, const T* mask, const int channels, - const int input_depth, const int input_height, const int input_width, - const int output_depth, const int output_height, const int output_width, - const int ksize_depth, const int ksize_height, const int ksize_width, - const int stride_depth, const int stride_height, const int stride_width, - const int padding_depth, const int padding_height, const int padding_width, - T* input_grad) { + const int nthreads, const T1* output_grad, const T2* mask, + const int channels, const int input_depth, const int input_height, + const int input_width, const int output_depth, const int output_height, + const int output_width, const int ksize_depth, const int ksize_height, + const int ksize_width, const int stride_depth, const int stride_height, + const int stride_width, const int padding_depth, const int padding_height, + const int padding_width, T1* input_grad) { for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int w_offset = index % input_width; @@ -922,7 +922,7 @@ __global__ void KernelMaxPool3DWithIdxGrad( int pw_end = min((w_offset + padding_width) / stride_width + 1, output_width); - T gradient = 0; + T1 gradient = 0; int input_current_feature_map_idx = (d_offset * input_height + h_offset) * input_width + w_offset; int output_idx = (batch_idx * channels + c_offset) * output_depth * @@ -949,8 +949,8 @@ __global__ void KernelMaxPool3DWithIdxGrad( * Ksize, strides, paddings are three elements. These three elements represent * depth, height and width, respectively. */ -template -class MaxPool3dWithIndexFunctor { +template +class MaxPool3dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& input, std::vector& ksize, @@ -975,9 +975,9 @@ class MaxPool3dWithIndexFunctor { const int padding_height = paddings[1]; const int padding_width = paddings[2]; - const T* input_data = input.data(); - T* output_data = output->mutable_data(context.GetPlace()); - T* mask_data = mask->mutable_data(context.GetPlace()); + const T1* input_data = input.data(); + T1* output_data = output->mutable_data(context.GetPlace()); + T2* mask_data = mask->mutable_data(context.GetPlace()); int nthreads = batch_size * output_channels * output_depth * output_height * output_width; @@ -986,9 +986,9 @@ class MaxPool3dWithIndexFunctor { dim3 grid(blocks, 1); KernelMaxPool3DWithIdx< - T><<(context) - .stream()>>>( + T1, T2><<(context) + .stream()>>>( nthreads, input_data, input_channels, input_depth, input_height, input_width, output_depth, output_height, output_width, ksize_depth, ksize_height, ksize_width, stride_depth, stride_height, stride_width, @@ -1001,8 +1001,8 @@ class MaxPool3dWithIndexFunctor { * Ksize, strides, paddings are three elements. These three elements represent * depth, height and width, respectively. */ -template -class MaxPool3dWithIndexGradFunctor { +template +class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, const framework::Tensor& output_grad, @@ -1027,9 +1027,9 @@ class MaxPool3dWithIndexGradFunctor { const int padding_height = paddings[1]; const int padding_width = paddings[2]; - const T* output_grad_data = output_grad.data(); - const T* mask_data = mask.data(); - T* input_grad_data = input_grad->mutable_data(context.GetPlace()); + const T1* output_grad_data = output_grad.data(); + const T2* mask_data = mask.data(); + T1* input_grad_data = input_grad->mutable_data(context.GetPlace()); int nthreads = batch_size * input_channels * input_depth * input_height * input_width; @@ -1038,9 +1038,9 @@ class MaxPool3dWithIndexGradFunctor { dim3 grid(blocks, 1); KernelMaxPool3DWithIdxGrad< - T><<(context) - .stream()>>>( + T1, T2><<(context) + .stream()>>>( nthreads, output_grad_data, mask_data, input_channels, input_depth, input_height, input_width, output_depth, output_height, output_width, ksize_depth, ksize_height, ksize_width, stride_depth, stride_height, @@ -1049,10 +1049,10 @@ class MaxPool3dWithIndexGradFunctor { } }; -template class MaxPool3dWithIndexFunctor; -template class MaxPool3dWithIndexGradFunctor; -template class MaxPool3dWithIndexFunctor; -template class MaxPool3dWithIndexGradFunctor; +template class MaxPool3dWithIndexFunctor; +template class MaxPool3dWithIndexGradFunctor; +template class MaxPool3dWithIndexFunctor; +template class MaxPool3dWithIndexGradFunctor; } // namespace math } // namespace operators diff --git a/paddle/operators/math/pooling.h b/paddle/operators/math/pooling.h index f6719e1e628cdd2cf7445ec9cd05713bc4f14c84..19fbd8b4bb2469d3ce8a139ce30a48641dbd6e0f 100644 --- a/paddle/operators/math/pooling.h +++ b/paddle/operators/math/pooling.h @@ -153,7 +153,7 @@ class MaxPool3dGradFunctor { * In pool2d, all tensors are in NCHW format. In pool3d, all tensors are in * NCDHW format. */ -template +template class MaxPool2dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, @@ -162,7 +162,7 @@ class MaxPool2dWithIndexFunctor { framework::Tensor* output, framework::Tensor* mask); }; -template +template class MaxPool2dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, @@ -172,7 +172,7 @@ class MaxPool2dWithIndexGradFunctor { framework::Tensor* input_grad); }; -template +template class MaxPool3dWithIndexFunctor { public: void operator()(const platform::DeviceContext& context, @@ -181,7 +181,7 @@ class MaxPool3dWithIndexFunctor { framework::Tensor* output, framework::Tensor* mask); }; -template +template class MaxPool3dWithIndexGradFunctor { public: void operator()(const platform::DeviceContext& context, diff --git a/paddle/operators/math/selected_rows_functor.cc b/paddle/operators/math/selected_rows_functor.cc index 075196b47eeaf118a588b96532d87a05e4e600c6..514f2adef284c8877e2e74b943b4e6419c6ae721 100644 --- a/paddle/operators/math/selected_rows_functor.cc +++ b/paddle/operators/math/selected_rows_functor.cc @@ -145,6 +145,8 @@ struct SelectedRowsAddTo { template struct SelectedRowsAddTo; template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; template struct SelectedRowsAddToTensor { @@ -175,6 +177,8 @@ struct SelectedRowsAddToTensor { template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; } // namespace math } // namespace operators diff --git a/paddle/operators/math/selected_rows_functor.cu b/paddle/operators/math/selected_rows_functor.cu index 47fe3b44a50fee9f41ae807793187258159b9f29..c1dd323ba29e03e3ab4a3e4d7248388b408fb9d6 100644 --- a/paddle/operators/math/selected_rows_functor.cu +++ b/paddle/operators/math/selected_rows_functor.cu @@ -173,6 +173,8 @@ struct SelectedRowsAddTo { template struct SelectedRowsAddTo; template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; +template struct SelectedRowsAddTo; namespace { template @@ -223,7 +225,8 @@ struct SelectedRowsAddToTensor { template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; - +template struct SelectedRowsAddToTensor; +template struct SelectedRowsAddToTensor; } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/math/selected_rows_functor_test.cu b/paddle/operators/math/selected_rows_functor_test.cu index 09de9dc53a1de9537b5109b3cc7cf9744f9c7908..7de9291c17d3f09a3c6076f00f2457f240e6f0af 100644 --- a/paddle/operators/math/selected_rows_functor_test.cu +++ b/paddle/operators/math/selected_rows_functor_test.cu @@ -67,7 +67,7 @@ TEST(selected_rows_functor, gpu_add) { EXPECT_EQ(out_rows[6], 9); Tensor out_cpu; - out_cpu.CopyFrom(*out_value, cpu_place, ctx); + CopyFrom(*out_value, cpu_place, ctx, &out_cpu); ctx.Wait(); auto* out_cpu_data = out_cpu.data(); @@ -94,7 +94,7 @@ TEST(selected_rows_functor, gpu_add) { add_tensor_functor(ctx, *output, *tensor1, tensor2.get()); Tensor tensor2_cpu; - tensor2_cpu.CopyFrom(*tensor2, cpu_place, ctx); + CopyFrom(*tensor2, cpu_place, ctx, &tensor2_cpu); ctx.Wait(); auto* tensor2_cpu_data = tensor2_cpu.data(); @@ -167,7 +167,7 @@ TEST(selected_rows_functor, gpu_add_to) { EXPECT_EQ(out_rows[6], 9); Tensor out_cpu; - out_cpu.CopyFrom(*out_value, cpu_place, ctx); + CopyFrom(*out_value, cpu_place, ctx, &out_cpu); ctx.Wait(); auto* out_cpu_data = out_cpu.data(); @@ -191,7 +191,7 @@ TEST(selected_rows_functor, gpu_add_to) { add_to_tensor_functor(ctx, *output, tensor1.get()); Tensor tensor1_cpu; - tensor1_cpu.CopyFrom(*tensor1, cpu_place, ctx); + CopyFrom(*tensor1, cpu_place, ctx, &tensor1_cpu); ctx.Wait(); auto* tensor1_cpu_data = tensor1_cpu.data(); diff --git a/paddle/operators/math/vol2col.h b/paddle/operators/math/vol2col.h index cbc30bd754608dd6e6def1a4097d69bdf0c942c3..dc64d1d9776261541a380ed15207904d6b4e641c 100644 --- a/paddle/operators/math/vol2col.h +++ b/paddle/operators/math/vol2col.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/framework/tensor.h" +#include "paddle/framework/tensor_util.h" #include "paddle/platform/device_context.h" namespace paddle { diff --git a/paddle/operators/math/vol2col_test.cc b/paddle/operators/math/vol2col_test.cc index c31c716842f30de67c29b803866b8c82ddcf4a41..62c3152304ad7fe946c996be413e102f3dd92bb2 100644 --- a/paddle/operators/math/vol2col_test.cc +++ b/paddle/operators/math/vol2col_test.cc @@ -82,7 +82,7 @@ void testVol2col() { if (paddle::platform::is_cpu_place(*place)) { input = input_tmp; } else { - input.CopyFrom(input_tmp, *place, *context); + CopyFrom(input_tmp, *place, *context, &input); } output.mutable_data({1, filter_size, filter_size, filter_size, output_depth, output_height, output_width}, @@ -96,7 +96,7 @@ void testVol2col() { if (paddle::platform::is_cpu_place(*place)) { out_cfo_ptr = output.data(); } else { - output_tmp.CopyFrom(output, paddle::platform::CPUPlace(), *context); + CopyFrom(output, paddle::platform::CPUPlace(), *context, &output_tmp); out_cfo_ptr = output_tmp.data(); } @@ -110,7 +110,7 @@ void testVol2col() { if (paddle::platform::is_cpu_place(*place)) { input = input_tmp; } else { - input.CopyFrom(input_tmp, *place, *context); + CopyFrom(input_tmp, *place, *context, &input); } paddle::operators::math::Col2VolFunctor col2vol; @@ -120,7 +120,7 @@ void testVol2col() { if (paddle::platform::is_cpu_place(*place)) { in_ptr = input.data(); } else { - input_tmp.CopyFrom(input, paddle::platform::CPUPlace(), *context); + CopyFrom(input, paddle::platform::CPUPlace(), *context, &input_tmp); in_ptr = input_tmp.data(); } diff --git a/paddle/operators/max_sequence_len_op.cc b/paddle/operators/max_sequence_len_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..798022c9dd904a0ac189b4b550a94264a433ebf2 --- /dev/null +++ b/paddle/operators/max_sequence_len_op.cc @@ -0,0 +1,66 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/lod_rank_table.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace operators { + +class MaxSeqenceLenOp : public framework::OperatorBase { + public: + MaxSeqenceLenOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + auto &rank_table = + scope.FindVar(Input("RankTable"))->Get(); + auto *out = + scope.FindVar(Output("Out"))->GetMutable(); + int64_t *out_ptr = out->mutable_data({1}, platform::CPUPlace()); + *out_ptr = rank_table.items()[0].length; + } +}; + +class MaxSeqenceLenOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + MaxSeqenceLenOpProtoMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("RankTable", "The lod_rank_table."); + AddOutput("Out", "The max sequence length."); + AddComment( + R"DOC(Calculate the max sequence length through lod_rank_table.)DOC"); + } +}; + +class MaxSeqenceLenInferShape : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *context) const override { + PADDLE_ENFORCE(context->HasInput("RankTable")); + context->SetOutputDim("Out", {1}); + } +}; +} // namespace operators +} // namespace paddle + +REGISTER_OPERATOR(max_sequence_len, paddle::operators::MaxSeqenceLenOp, + paddle::operators::MaxSeqenceLenOpProtoMaker, + paddle::operators::MaxSeqenceLenInferShape, + paddle::framework::EmptyGradOpMaker); diff --git a/paddle/operators/maxout_op.cc b/paddle/operators/maxout_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..e203a25d544372220e8246e5e17ffbc6408d2998 --- /dev/null +++ b/paddle/operators/maxout_op.cc @@ -0,0 +1,102 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#include "paddle/operators/maxout_op.h" +namespace paddle { +namespace operators { + +using framework::Tensor; + +class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker { + public: + MaxOutOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput( + "X", + "(Tensor) The input tensor of maxout operator. " + "The format of input tensor is NCHW. Where N is batch size, C is the " + "number of channels, H and W is the height and width of feature."); + AddOutput("Out", + "(Tensor) The output tensor of maxout operator." + "The format of output tensor is also NCHW." + "Where N is batch size, C is " + "the number of channels, H and W is the height and " + "width of feature."); + AddAttr( + "groups", + R"DOC("Specifies how many groups the input tensor will be split" + "in the channel dimension. And the number of output channel is " + "the number of channels divided by groups.." + )DOC"); + AddComment(R"DOC( + Assumed the input shape is (N, Ci, H, W). + The output shape is (N, Co, H, W). Then `Co = Ci / groups`. + + math: + y_{si+j} = \max_k x_{gsi + sk + j} + g = groups + s = input.size / num_channels + 0 \le i < num_channels / groups + 0 \le j < s + 0 \le k < groups + + Please refer to Paper: + - Maxout Networks: http://www.jmlr.org/proceedings/papers/v28/goodfellow13.pdf + - Multi-digit Number Recognition from Street View \ + Imagery using Deep Convolutional Neural Networks: \ + https://arxiv.org/pdf/1312.6082v4.pdf + )DOC"); + } +}; + +class MaxOutOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of MaxoutOp" + "should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of MaxoutOp should not be null."); + auto in_x_dims = ctx->GetInputDim("X"); + int groups = ctx->Attrs().Get("groups"); + // check groups > 1 + PADDLE_ENFORCE_GT(groups, 1, "groups should be larger than 1 in maxoutop"); + std::vector output_shape({in_x_dims[0], in_x_dims[1] / groups}); + output_shape.push_back(in_x_dims[2]); + output_shape.push_back(in_x_dims[3]); + ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); + } +}; + +class MaxOutOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); + PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), + "Input(X@GRAD) should not be null."); + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(maxout, ops::MaxOutOp, ops::MaxOutOpMaker, maxout_grad, + ops::MaxOutOpGrad); +REGISTER_OP_CPU_KERNEL(maxout, + ops::MaxOutKernel); +REGISTER_OP_CPU_KERNEL( + maxout_grad, ops::MaxOutGradKernel); diff --git a/paddle/operators/maxout_op.cu.cc b/paddle/operators/maxout_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..decd43913d69d122330886e07178778d03f7fef5 --- /dev/null +++ b/paddle/operators/maxout_op.cu.cc @@ -0,0 +1,23 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/maxout_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(maxout, + ops::MaxOutKernel, + ops::MaxOutKernel); +REGISTER_OP_GPU_KERNEL( + maxout_grad, ops::MaxOutGradKernel, + ops::MaxOutGradKernel); diff --git a/paddle/operators/maxout_op.h b/paddle/operators/maxout_op.h new file mode 100644 index 0000000000000000000000000000000000000000..44a0d073dda642f6e261ce5760013f3e1055f43d --- /dev/null +++ b/paddle/operators/maxout_op.h @@ -0,0 +1,62 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/maxouting.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class MaxOutKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const Tensor* in_x = context.Input("X"); + Tensor* out = context.Output("Out"); + int groups = context.template Attr("groups"); + + math::MaxOutFunctor maxout_forward; + maxout_forward(context.device_context(), *in_x, out, groups); + } +}; + +template +class MaxOutGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const Tensor* in_x = context.Input("X"); + const Tensor* out = context.Input("Out"); + const Tensor* out_grad = + context.Input(framework::GradVarName("Out")); + Tensor* in_x_grad = context.Output(framework::GradVarName("X")); + int groups = context.template Attr("groups"); + auto& device_ctx = context.device_context(); + math::SetConstant zero; + if (in_x_grad) { + in_x_grad->mutable_data(context.GetPlace()); + zero(device_ctx, in_x_grad, static_cast(0.0)); + math::MaxOutGradFunctor maxout_backward; + maxout_backward(context.device_context(), *in_x, in_x_grad, *out, + *out_grad, groups); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/merge_lod_tensor_op.cc b/paddle/operators/merge_lod_tensor_op.cc index 80460c476921b63ec5228a9780880c7db3c85217..adc688dbd5e13a2203d6842a12acdb8625288275 100644 --- a/paddle/operators/merge_lod_tensor_op.cc +++ b/paddle/operators/merge_lod_tensor_op.cc @@ -45,7 +45,7 @@ class MergeLoDTensorOp : public framework::OperatorBase { cpu_mask->ShareDataWith(mask); } else if (platform::is_gpu_place(mask.place())) { #ifdef PADDLE_WITH_CUDA - cpu_mask->CopyFrom(mask, platform::CPUPlace(), dev_ctx); + framework::CopyFrom(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get()); #else PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option"); #endif @@ -99,8 +99,9 @@ class MergeLoDTensorOp : public framework::OperatorBase { if (len == 0) { continue; } - out->Slice(out_offset, out_offset + len) - .CopyFrom(input->Slice(start_offset, end_offset), place, dev_ctx); + auto slice = out->Slice(out_offset, out_offset + len); + framework::CopyFrom(input->Slice(start_offset, end_offset), place, + dev_ctx, &slice); out_offset += len; (*in_idx) += 1; } diff --git a/paddle/operators/multiplex_op.cu b/paddle/operators/multiplex_op.cu index 49ed8a8879527fd32dd8b001ea256e46a0353487..10dff8d021d0394702cc8b92e779c012a4cf3eb2 100644 --- a/paddle/operators/multiplex_op.cu +++ b/paddle/operators/multiplex_op.cu @@ -33,7 +33,7 @@ class MultiplexGPUKernel : public framework::OpKernel { auto cols = ins[0]->numel() / rows; // copy index to cpu Tensor index_t_cpu; - index_t_cpu.CopyFrom(*ids, platform::CPUPlace(), ctx.device_context()); + CopyFrom(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu); auto* index = index_t_cpu.data(); auto stream = ctx.cuda_device_context().stream(); Place place = boost::get(ctx.GetPlace()); @@ -68,7 +68,7 @@ class MultiplexGradGPUKernel : public framework::OpKernel { auto cols = ins[0]->numel() / rows; // copy index to cpu Tensor index_t_cpu; - index_t_cpu.CopyFrom(*ids, platform::CPUPlace(), ctx.device_context()); + CopyFrom(*ids, platform::CPUPlace(), ctx.device_context(), &index_t_cpu); auto* index = index_t_cpu.data(); auto stream = ctx.cuda_device_context().stream(); diff --git a/paddle/operators/nccl_op.cc b/paddle/operators/nccl_op.cc index 66fcc09bc877867e66a37adc73230d8dabf4cbed..22a37ff1bbf6b8cfb2cbc3c3dbbb20a87c5ea4e7 100644 --- a/paddle/operators/nccl_op.cc +++ b/paddle/operators/nccl_op.cc @@ -49,7 +49,7 @@ class NCCLInitOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("Communicator", "Create Communicator for communicating between gpus"); AddAttr>("gpus", "(vector) GPU id lists"); - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/nccl_op_test.cu.cc b/paddle/operators/nccl_op_test.cu.cc index 56ba57854955c08031214d1f751c17fbb8bb882c..bb7ae20286dd8e52f72b79cbf353bd812a2cc092 100644 --- a/paddle/operators/nccl_op_test.cu.cc +++ b/paddle/operators/nccl_op_test.cu.cc @@ -97,7 +97,7 @@ class NCCLTester : public ::testing::Test { send_tensor->mutable_data(kDims, place); std::vector send_vector(f::product(kDims), gpu_id); - send_tensor->CopyFromVector(send_vector, *ctx); + paddle::framework::CopyFromVector(send_vector, *ctx, send_tensor); ctx->Wait(); VLOG(1) << "Send Tensor filled with elements " << send_tensor->numel(); } diff --git a/paddle/operators/pool_cudnn_op.cc b/paddle/operators/pool_cudnn_op.cc index f962d9e3e6abde14ce21eb0102f10d139fdb160e..be9fcc5661f420aadf908cf80cce6c963008b0e4 100644 --- a/paddle/operators/pool_cudnn_op.cc +++ b/paddle/operators/pool_cudnn_op.cc @@ -20,6 +20,18 @@ REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad, ops::PoolOpGrad); REGISTER_OP_CPU_KERNEL(pool2d_cudnn, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_CPU_KERNEL(pool2d_cudnn_grad, - ops::PoolGradKernel) + ops::PoolGradKernel, + ops::PoolGradKernel) + +REGISTER_OP(pool3d_cudnn, ops::PoolOp, ops::Pool3dOpMaker, pool3d_cudnn_grad, + ops::PoolOpGrad); + +REGISTER_OP_CPU_KERNEL(pool3d_cudnn, + ops::PoolKernel, + ops::PoolKernel); +REGISTER_OP_CPU_KERNEL(pool3d_cudnn_grad, + ops::PoolGradKernel, + ops::PoolGradKernel) diff --git a/paddle/operators/pool_cudnn_op.cu.cc b/paddle/operators/pool_cudnn_op.cu.cc index f9d8af3e1c5db49873979fdfeb17a32d16341a1a..66dd194ccd5ed629c5861552a7c124dc911362d7 100644 --- a/paddle/operators/pool_cudnn_op.cu.cc +++ b/paddle/operators/pool_cudnn_op.cu.cc @@ -52,7 +52,13 @@ class PoolCudnnOpKernel : public framework::OpKernel { ScopedTensorDescriptor input_desc; ScopedTensorDescriptor output_desc; ScopedPoolingDescriptor pool_desc; - DataLayout layout = DataLayout::kNCHW; + DataLayout layout; + + if (strides.size() == 2U) { + layout = DataLayout::kNCHW; + } else { + layout = DataLayout::kNCDHW; + } cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( layout, framework::vectorize2int(input->dims())); @@ -112,7 +118,13 @@ class PoolCudnnGradOpKernel : public framework::OpKernel { ScopedTensorDescriptor input_desc; ScopedTensorDescriptor output_desc; ScopedPoolingDescriptor pool_desc; - DataLayout layout = DataLayout::kNCHW; + DataLayout layout; + + if (strides.size() == 2U) { + layout = DataLayout::kNCHW; + } else { + layout = DataLayout::kNCDHW; + } cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( layout, framework::vectorize2int(input->dims())); @@ -150,5 +162,12 @@ class PoolCudnnGradOpKernel : public framework::OpKernel { namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel); -REGISTER_OP_GPU_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel); +REGISTER_OP_GPU_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel, + ops::PoolCudnnOpKernel); +REGISTER_OP_GPU_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel, + ops::PoolCudnnGradOpKernel); + +REGISTER_OP_GPU_KERNEL(pool3d_cudnn, ops::PoolCudnnOpKernel, + ops::PoolCudnnOpKernel); +REGISTER_OP_GPU_KERNEL(pool3d_cudnn_grad, ops::PoolCudnnGradOpKernel, + ops::PoolCudnnGradOpKernel); diff --git a/paddle/operators/pool_op.cc b/paddle/operators/pool_op.cc index f3963b1995ef8767786f0bf230b134afc69aa99d..d8c58618cf703d086d3cabc927ebc5eb038b1aec 100644 --- a/paddle/operators/pool_op.cc +++ b/paddle/operators/pool_op.cc @@ -217,14 +217,18 @@ REGISTER_OP(pool2d, ops::PoolOp, ops::Pool2dOpMaker, pool2d_grad, ops::PoolOpGrad); REGISTER_OP_CPU_KERNEL(pool2d, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_CPU_KERNEL(pool2d_grad, - ops::PoolGradKernel) + ops::PoolGradKernel, + ops::PoolGradKernel) REGISTER_OP(pool3d, ops::PoolOp, ops::Pool3dOpMaker, pool3d_grad, ops::PoolOpGrad); REGISTER_OP_CPU_KERNEL(pool3d, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_CPU_KERNEL(pool3d_grad, - ops::PoolGradKernel); + ops::PoolGradKernel, + ops::PoolGradKernel); diff --git a/paddle/operators/pool_op.cu.cc b/paddle/operators/pool_op.cu.cc index 0e3b80868f7b9d1697d619889160856d65ad59a3..1010cb762289dd39cd632c699f7528f4ba638278 100644 --- a/paddle/operators/pool_op.cu.cc +++ b/paddle/operators/pool_op.cu.cc @@ -17,11 +17,15 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(pool2d, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_GPU_KERNEL(pool2d_grad, - ops::PoolGradKernel); + ops::PoolGradKernel, + ops::PoolGradKernel); REGISTER_OP_GPU_KERNEL(pool3d, - ops::PoolKernel); + ops::PoolKernel, + ops::PoolKernel); REGISTER_OP_GPU_KERNEL(pool3d_grad, - ops::PoolGradKernel); + ops::PoolGradKernel, + ops::PoolGradKernel); diff --git a/paddle/operators/pool_with_index_op.cc b/paddle/operators/pool_with_index_op.cc index 1df36e965abab3549aeb88bf682b712033c4d79c..4958fa645405db0798f37165030eae95da371477 100644 --- a/paddle/operators/pool_with_index_op.cc +++ b/paddle/operators/pool_with_index_op.cc @@ -29,11 +29,11 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), - "X(Input) of Pooling should not be null."); + "Input(X) of Pooling should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Out(Output) of Pooling should not be null."); + "Output(Out) of Pooling should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Mask"), - "Mask(Output) of Pooling should not be null."); + "Output(Mask) of Pooling should not be null."); auto in_x_dims = ctx->GetInputDim("X"); @@ -67,6 +67,14 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); ctx->SetOutputDim("Mask", framework::make_ddim(output_shape)); } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext &ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } }; class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel { @@ -80,6 +88,14 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel { "Input(X@GRAD) should not be null."); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext &ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } }; class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { @@ -116,7 +132,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { // TypedAttrChecker don't support vector type.) AddAttr( "global_pooling", - "(bool, default false) Whether to use the global pooling. " + "(bool, default:false) Whether to use the global pooling. " "If global_pooling = true, ksize and paddings will be ignored.") .SetDefault(false); AddAttr>("strides", @@ -126,7 +142,7 @@ class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker { // TypedAttrChecker don't support vector type.) AddAttr>( "paddings", - "(vector, defalut {0, 0}), paddings(height, width) of pooling " + "(vector, defalut:{0, 0}), paddings(height, width) of pooling " "operator. " "If global_pooling = true, paddings and will be ignored.") .SetDefault({0, 0}); // TODO(Chengduo): Add checker. (Currently, @@ -250,10 +266,12 @@ REGISTER_OP(max_pool2d_with_index, ops::MaxPoolWithIndexOp, REGISTER_OP_CPU_KERNEL( max_pool2d_with_index, - ops::MaxPoolWithIndexKernel); + ops::MaxPoolWithIndexKernel, + ops::MaxPoolWithIndexKernel); REGISTER_OP_CPU_KERNEL( max_pool2d_with_index_grad, - ops::MaxPoolWithIndexGradKernel) + ops::MaxPoolWithIndexGradKernel, + ops::MaxPoolWithIndexGradKernel) REGISTER_OP(max_pool3d_with_index, ops::MaxPoolWithIndexOp, ops::MaxPool3dWithIndexOpMaker, max_pool3d_with_index_grad, @@ -261,7 +279,9 @@ REGISTER_OP(max_pool3d_with_index, ops::MaxPoolWithIndexOp, REGISTER_OP_CPU_KERNEL( max_pool3d_with_index, - ops::MaxPoolWithIndexKernel); + ops::MaxPoolWithIndexKernel, + ops::MaxPoolWithIndexKernel); REGISTER_OP_CPU_KERNEL( max_pool3d_with_index_grad, - ops::MaxPoolWithIndexGradKernel) + ops::MaxPoolWithIndexGradKernel, + ops::MaxPoolWithIndexGradKernel) diff --git a/paddle/operators/pool_with_index_op.cu.cc b/paddle/operators/pool_with_index_op.cu.cc index 287657d4b1c57f354ef050885f71261092bdc062..335064a7eea4ec15c529db5254cbb026ba575f3d 100644 --- a/paddle/operators/pool_with_index_op.cu.cc +++ b/paddle/operators/pool_with_index_op.cu.cc @@ -18,14 +18,18 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( max_pool2d_with_index, - ops::MaxPoolWithIndexKernel); + ops::MaxPoolWithIndexKernel, + ops::MaxPoolWithIndexKernel); REGISTER_OP_GPU_KERNEL( max_pool2d_with_index_grad, - ops::MaxPoolWithIndexGradKernel) + ops::MaxPoolWithIndexGradKernel, + ops::MaxPoolWithIndexGradKernel) REGISTER_OP_GPU_KERNEL( max_pool3d_with_index, - ops::MaxPoolWithIndexKernel); + ops::MaxPoolWithIndexKernel, + ops::MaxPoolWithIndexKernel); REGISTER_OP_GPU_KERNEL( max_pool3d_with_index_grad, - ops::MaxPoolWithIndexGradKernel) + ops::MaxPoolWithIndexGradKernel, + ops::MaxPoolWithIndexGradKernel) diff --git a/paddle/operators/pool_with_index_op.h b/paddle/operators/pool_with_index_op.h index a081607edce335f0265388ab01238d584bcf3ead..40766c7e821e8b85aeda9473798a1f696d0ad719 100644 --- a/paddle/operators/pool_with_index_op.h +++ b/paddle/operators/pool_with_index_op.h @@ -24,8 +24,8 @@ namespace operators { using Tensor = framework::Tensor; -template -class MaxPoolWithIndexKernel : public framework::OpKernel { +template +class MaxPoolWithIndexKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* in_x = context.Input("X"); @@ -44,13 +44,13 @@ class MaxPoolWithIndexKernel : public framework::OpKernel { switch (ksize.size()) { case 2: { - paddle::operators::math::MaxPool2dWithIndexFunctor + paddle::operators::math::MaxPool2dWithIndexFunctor pool2d_forward; pool2d_forward(context.device_context(), *in_x, ksize, strides, paddings, out, mask); } break; case 3: { - paddle::operators::math::MaxPool3dWithIndexFunctor + paddle::operators::math::MaxPool3dWithIndexFunctor pool3d_forward; pool3d_forward(context.device_context(), *in_x, ksize, strides, paddings, out, mask); @@ -60,8 +60,8 @@ class MaxPoolWithIndexKernel : public framework::OpKernel { } }; -template -class MaxPoolWithIndexGradKernel : public framework::OpKernel { +template +class MaxPoolWithIndexGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { const Tensor* mask = context.Input("Mask"); @@ -80,19 +80,19 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel { } if (in_x_grad) { - in_x_grad->mutable_data(context.GetPlace()); + in_x_grad->mutable_data(context.GetPlace()); auto& device_ctx = context.device_context(); math::set_constant(device_ctx, in_x_grad, 0); switch (ksize.size()) { case 2: { - paddle::operators::math::MaxPool2dWithIndexGradFunctor + paddle::operators::math::MaxPool2dWithIndexGradFunctor pool2d_backward; pool2d_backward(device_ctx, *out_grad, *mask, ksize, strides, paddings, in_x_grad); } break; case 3: { - paddle::operators::math::MaxPool3dWithIndexGradFunctor + paddle::operators::math::MaxPool3dWithIndexGradFunctor pool3d_backward; pool3d_backward(device_ctx, *out_grad, *mask, ksize, strides, paddings, in_x_grad); diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 0075ccd24271bf83f139e121efad00c2316cc11b..c976e22c7740ad11279ab5ee75e4d130be8fa0c5 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -284,7 +284,8 @@ class RecurrentOp : public RecurrentBase { auto dst_out = dst_tensor->Slice(seq_offset, seq_offset + 1); // Explicit copy output since the local RNN scope can be destroyed // early. - dst_out.CopyFrom(src_tensor, dev_ctx.GetPlace(), dev_ctx); + framework::CopyFrom(src_tensor, dev_ctx.GetPlace(), dev_ctx, + &dst_out); }); scopes.Next(); @@ -365,7 +366,8 @@ class RecurrentGradOp : public RecurrentBase { auto *cur_grad_var = cur_scope.Var(cur_grad); auto cur_grad_tensor = cur_grad_var->GetMutable(); - cur_grad_tensor->CopyFrom(ex_tensor, dev_ctx.GetPlace(), dev_ctx); + framework::CopyFrom(ex_tensor, dev_ctx.GetPlace(), dev_ctx, + cur_grad_tensor); } } @@ -401,7 +403,7 @@ class RecurrentGradOp : public RecurrentBase { auto &inside_tensor = cur_scope.FindVar(inside_grad_name) ->Get(); framework::AttributeMap attrs; - attrs["data_type"] = framework::ToDataType(inside_tensor.type()); + attrs["dtype"] = framework::ToDataType(inside_tensor.type()); attrs["shape"] = framework::vectorize2int(inside_tensor.dims()); attrs["value"] = 0.0f; @@ -438,7 +440,7 @@ class RecurrentGradOp : public RecurrentBase { } auto dst = outside->Slice(seq_offset, seq_offset + 1); - dst.CopyFrom(inside, dev_ctx.GetPlace(), dev_ctx); + framework::CopyFrom(inside, dev_ctx.GetPlace(), dev_ctx, &dst); }); VLOG(5) << "Link outside gradient finished "; @@ -451,7 +453,7 @@ class RecurrentGradOp : public RecurrentBase { framework::LoDTensor *outside) { outside->Resize(inside.dims()); outside->mutable_data(dev_ctx.GetPlace(), inside.type()); - outside->CopyFrom(inside, dev_ctx.GetPlace(), dev_ctx); + framework::CopyFrom(inside, dev_ctx.GetPlace(), dev_ctx, outside); }); VLOG(5) << "Link initialize state gradient finished "; } diff --git a/paddle/operators/reshape_op.h b/paddle/operators/reshape_op.h index beb951713ae2a9fd83fe7c1a5e97ee8c642158a8..0e98c8b4f443f88ecba044f2f79228227695e182 100644 --- a/paddle/operators/reshape_op.h +++ b/paddle/operators/reshape_op.h @@ -28,7 +28,7 @@ class ReshapeKernel : public framework::OpKernel { auto* in = ctx.Input("X"); auto out_dims = out->dims(); out->mutable_data(ctx.GetPlace()); - out->CopyFrom(*in, ctx.GetPlace(), ctx.device_context()); + framework::CopyFrom(*in, ctx.GetPlace(), ctx.device_context(), out); out->Resize(out_dims); } }; @@ -42,7 +42,7 @@ class ReshapeGradKernel : public framework::OpKernel { d_x->mutable_data(ctx.GetPlace()); auto in_dims = d_x->dims(); - d_x->CopyFrom(*d_out, ctx.GetPlace(), ctx.device_context()); + framework::CopyFrom(*d_out, ctx.GetPlace(), ctx.device_context(), d_x); d_x->Resize(in_dims); } }; diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc deleted file mode 100644 index ee61ea300c33722471189d06eb09f67a083d2a4d..0000000000000000000000000000000000000000 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ /dev/null @@ -1,134 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/operators/rnn/recurrent_op_utils.h" - -namespace paddle { -namespace operators { -namespace rnn { - -namespace f = paddle::framework; - -using Tensor = framework::Tensor; -using LoDTensor = framework::LoDTensor; - -void SegmentInputs(const std::vector& step_scopes, - const std::vector& inlinks, - const size_t seq_len) { - PADDLE_ENFORCE(!inlinks.empty(), "no in links are provided."); - for (size_t i = 0; i < inlinks.size(); ++i) { - // global inputs - auto input_var = step_scopes[0]->parent().FindVar(inlinks[i]); - PADDLE_ENFORCE_NOT_NULL(input_var, "input link [%s] is not in scope.", - inlinks[i]); - - LoDTensor* input = input_var->GetMutable(); - f::DDim dims = input->dims(); - PADDLE_ENFORCE_EQ(static_cast(dims[0]), seq_len, - "all the inputs be the same length"); - f::DDim step_dims = slice_ddim(dims, 1, dims.size()); - for (size_t j = 0; j < seq_len; j++) { - Tensor* step_input = - step_scopes[j]->Var(inlinks[i])->GetMutable(); - // The input of operators of each step is Tensor here. - // Maybe need to modify Slice function. - *step_input = input->Slice(j, j + 1); - step_input->Resize(step_dims); - } - } -} - -void ConcatOutputs(const std::vector& step_scopes, - const std::vector& outlinks, - const size_t seq_len, const platform::DeviceContext& ctx) { - for (size_t i = 0; i < outlinks.size(); i++) { - auto* output_var = step_scopes[0]->parent().FindVar(outlinks[i]); - PADDLE_ENFORCE_NOT_NULL(output_var, "output link [%s] is not in scope.", - outlinks[i]); - LoDTensor* output = output_var->GetMutable(); - - auto* step_scope_var = step_scopes[0]->FindVar(outlinks[i]); - PADDLE_ENFORCE_NOT_NULL(step_scope_var, "%s not in scope", outlinks[i]); - f::DDim step_dims = - step_scope_var->template GetMutable()->dims(); - std::vector dims_vec = vectorize(step_dims); - dims_vec.insert(dims_vec.begin(), seq_len); - output->Resize(f::make_ddim(dims_vec)); - output->mutable_data(platform::CPUPlace()); - for (size_t j = 0; j < seq_len; j++) { - LoDTensor* step_output = - step_scopes[j]->FindVar(outlinks[i])->GetMutable(); - // TODO(luotao02) data type and platform::DeviceContext() should set - // correctly - (output->Slice(j, j + 1)) - .CopyFrom(*step_output, platform::CPUPlace(), ctx); - } - } -} - -void LinkMemories(const std::vector& scopes, - const std::vector& memories, - const size_t step_id, const int offset) { - PADDLE_ENFORCE_LT(step_id, scopes.size(), - "step [%d] is out of range of step scopes' size [%d]", - step_id, scopes.size()); - PADDLE_ENFORCE_GE(static_cast(step_id) + offset, 0, - "offset [%d] must be large than -[%d]", offset, step_id); - PADDLE_ENFORCE_LT( - step_id + offset, scopes.size(), - "offset [%d] is out of range, it must be less than (%d - %d)", offset, - scopes.size(), step_id); - auto* scope = scopes[step_id]; - auto* linked_scope = scopes[step_id + offset]; - for (auto& attr : memories) { - auto* mem = scope->FindVar(attr.pre_var)->GetMutable(); - auto* linked_mem = linked_scope->FindVar(attr.var)->GetMutable(); - mem->Resize(linked_mem->dims()); - mem->ShareDataWith(*linked_mem); - } -} - -void InitArgument(const ArgumentName& name, Argument* arg, - const framework::OperatorBase& op, bool is_grad) { - arg->step_scopes = - is_grad ? op.Input(name.step_scopes) : op.Output(name.step_scopes); - arg->inlinks = op.Inputs(name.inlinks); - arg->outlinks = op.Outputs(name.outlinks); - - auto& boot_memories = is_grad ? op.Outputs(name.initial_states) - : op.Inputs(name.initial_states); - // attributes - auto& memories = op.Attr>(name.states); - auto& pre_memories = op.Attr>(name.ex_states); - - PADDLE_ENFORCE(memories.size() == boot_memories.size(), - "the size of states, initial_states don't match:%d,%d", - memories.size(), boot_memories.size()); - PADDLE_ENFORCE(pre_memories.size() == boot_memories.size(), - "the size of ex_states, initial_states don't match:%d,%d", - pre_memories.size(), boot_memories.size()); - PADDLE_ENFORCE(memories.size() > 0, "more than 1 states should be set"); - - for (size_t i = 0; i < memories.size(); ++i) { - rnn::StateAttr mem_attr; - mem_attr.var = memories[i]; - mem_attr.pre_var = pre_memories[i]; - mem_attr.boot_var = boot_memories[i]; - (arg->states).push_back(mem_attr); - } -} - -} // namespace rnn -} // namespace operators -} // namespace paddle diff --git a/paddle/operators/rnn/recurrent_op_utils.h b/paddle/operators/rnn/recurrent_op_utils.h deleted file mode 100644 index fb0e158e07745d58c6211d33e385b324e492b95e..0000000000000000000000000000000000000000 --- a/paddle/operators/rnn/recurrent_op_utils.h +++ /dev/null @@ -1,85 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#pragma once - -#include - -#include "paddle/framework/operator.h" - -namespace paddle { -namespace operators { -namespace rnn { - -using Scope = framework::Scope; - -/** - * Memory of a RNN (same as the role of `Momory` in PaddlePaddle). - * - * Memory attributes cached by this op, dims will be infered from - * boot memories in father scope. Other attributes are copied from Op's proto - * attributes. - */ -struct StateAttr { - // name of current state variable - std::string var; - // name of previous step's state variable - std::string pre_var; - // name of the variables to init this memory (same role of `boot_layer` in - // PaddlePaddle), which is store in father's scope. - std::string boot_var; -}; - -struct Argument { - std::string step_net; - std::string step_scopes; - std::vector inlinks; - std::vector outlinks; - std::vector states; -}; - -struct ArgumentName { - std::string step_net; - std::string step_scopes; - std::string inlinks; - std::string outlinks; - std::string states; // the memory name - std::string ex_states; // the previous memory name - std::string initial_states; // the boot memory name -}; - -/** - * Prepare inputs for each step net. - */ -void SegmentInputs(const std::vector& step_scopes, - const std::vector& inlinks, - const size_t seq_len); - -/** - * Process outputs of step nets and merge to variables. - */ -void ConcatOutputs(const std::vector& step_scopes, - const std::vector& outlinks, - const size_t seq_len, const platform::DeviceContext& ctx); - -void LinkMemories(const std::vector& step_scopes, - const std::vector& memories, const size_t step_id, - const int offset); - -void InitArgument(const ArgumentName& name, Argument* arg, - const framework::OperatorBase& op, bool is_grad = false); - -} // namespace rnn -} // namespace operators -} // namespace paddle diff --git a/paddle/operators/rnn_memory_helper_op.cc b/paddle/operators/rnn_memory_helper_op.cc index b621c7f1ba3f9e9613dea5bc98ef74c7c6dae9a0..3a035f0b9acb94bab60659938e11b4996b8eaa0f 100644 --- a/paddle/operators/rnn_memory_helper_op.cc +++ b/paddle/operators/rnn_memory_helper_op.cc @@ -62,7 +62,7 @@ class RNNMemoryHelperOpInfoMaker : public framework::OpProtoAndCheckerMaker { : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", ""); AddOutput("Out", ""); - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); @@ -95,7 +95,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase { auto &in_var_tensor = in_var->Get(); framework::AttributeMap attrs; - attrs["data_type"] = framework::ToDataType(in_var_tensor.type()); + attrs["dtype"] = framework::ToDataType(in_var_tensor.type()); attrs["shape"] = framework::vectorize2int(in_var_tensor.dims()); attrs["value"] = 0.0f; @@ -121,7 +121,7 @@ class RNNMemoryHelperGradOpInfoMaker AddInput("X", ""); AddInput("Out", ""); AddOutput(framework::GradVarName("X"), ""); - AddAttr("data_type", + AddAttr("dtype", "(int, default 5 (FP32)) " "Output data type") .SetDefault(framework::DataType::FP32); diff --git a/paddle/operators/roi_pool_op.cc b/paddle/operators/roi_pool_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..2b5e66c96b726a3c1fdb2596a244c5395db85279 --- /dev/null +++ b/paddle/operators/roi_pool_op.cc @@ -0,0 +1,165 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/roi_pool_op.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +static constexpr int kROISize = 5; + +class ROIPoolOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of ROIPoolOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("ROIs"), + "Input(ROIs) of ROIPoolOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of ROIPoolOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Argmax"), + "Output(Argmax) of ROIPoolOp should not be null."); + auto input_dims = ctx->GetInputDim("X"); + auto rois_dims = ctx->GetInputDim("ROIs"); + + PADDLE_ENFORCE(input_dims.size() == 4, + "The format of input tensor is NCHW."); + PADDLE_ENFORCE(rois_dims.size() == 2, + "ROIs should be a 2-D tensor of shape (num_rois, 5)" + "given as [[batch_id, x1, y1, x2, y2], …]."); + PADDLE_ENFORCE(rois_dims[1] == kROISize, + "ROIs should be a 2-D tensor of shape (num_rois, 5)" + "given as [[batch_id, x1, y1, x2, y2], …]."); + + int pooled_height = ctx->Attrs().Get("pooled_height"); + int pooled_width = ctx->Attrs().Get("pooled_width"); + float spatial_scale = ctx->Attrs().Get("spatial_scale"); + + PADDLE_ENFORCE_GT(pooled_height, 0, + "The pooled output height must greater than 0"); + PADDLE_ENFORCE_GT(pooled_width, 0, + "The pooled output width must greater than 0"); + PADDLE_ENFORCE_GT(spatial_scale, 0.0f, + "The spatial scale must greater than 0"); + + auto out_dims = input_dims; + out_dims[0] = rois_dims[0]; + out_dims[1] = input_dims[1]; + out_dims[2] = pooled_height; + out_dims[3] = pooled_width; + + ctx->SetOutputDim("Out", out_dims); + ctx->SetOutputDim("Argmax", out_dims); + } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } +}; + +class ROIPoolGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "The gradient of Out should not be null."); + PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName("X")), + "The gradient of X should not be null."); + ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X")); + } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } +}; + +class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker { + public: + ROIPoolOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(Tensor), " + "the input of ROIPoolOp. " + "The format of input tensor is NCHW. Where N is batch size, " + "C is the number of input channels, " + "H is the height of the feature, and " + "W is the width of the feature."); + AddInput("ROIs", + "(Tensor), " + "ROIs (Regions of Interest) to pool over. " + "should be a 2-D tensor of shape (num_rois, 5)" + "given as [[batch_id, x1, y1, x2, y2], …]. " + "Where batch_id is the id of the data, " + "(x1, y1) is the top left coordinates, and " + "(x2, y2) is the bottom right coordinates."); + AddOutput("Out", + "(Tensor), " + "The output of ROIPoolOp is a 4-D tensor with shape " + "(num_rois, channels, pooled_h, pooled_w)."); + AddOutput("Argmax", + "(Tensor), " + "Argmaxes corresponding to indices in X used " + "for gradient computation. Only output " + "if arg “is_test” is false.") + .AsIntermediate(); + AddAttr("spatial_scale", + "(float, default 1.0), " + "Multiplicative spatial scale factor " + "to translate ROI coords from their input scale " + "to the scale used when pooling.") + .SetDefault(1.0); + AddAttr("pooled_height", + "(int, default 1), " + "The pooled output height.") + .SetDefault(1); + AddAttr("pooled_width", + "(int, default 1), " + "The pooled output width.") + .SetDefault(1); + AddComment(R"DOC( +ROIPool operator + +ROI Pooling for Faster-RCNN. The link below is a further introduction: +https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn + )DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(roi_pool, ops::ROIPoolOp, ops::ROIPoolOpMaker, roi_pool_grad, + ops::ROIPoolGradOp); +REGISTER_OP_CPU_KERNEL( + roi_pool, ops::CPUROIPoolOpKernel, + ops::CPUROIPoolOpKernel); +REGISTER_OP_CPU_KERNEL( + roi_pool_grad, + ops::CPUROIPoolGradOpKernel, + ops::CPUROIPoolOpKernel); diff --git a/paddle/operators/roi_pool_op.cu b/paddle/operators/roi_pool_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..9a4c8ca752bb7abc4f44d4815743769bc989703a --- /dev/null +++ b/paddle/operators/roi_pool_op.cu @@ -0,0 +1,208 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/roi_pool_op.h" +#include "paddle/platform/cuda_helper.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +static constexpr int kNumCUDAThreads = 512; +static constexpr int kNumMaxinumNumBlocks = 4096; +static constexpr int kROISize = 5; + +static inline int NumBlocks(const int N) { + return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, + kNumMaxinumNumBlocks); +} + +template +__global__ void GPUROIPoolForward(const int nthreads, const T* input_data, + const int64_t* input_rois, + const float spatial_scale, const int channels, + const int height, const int width, + const int pooled_height, + const int pooled_width, T* output_data, + int64_t* argmax_data) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int offset = blockDim.x * gridDim.x; + for (size_t i = index; i < nthreads; i += offset) { + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const int64_t* offset_input_rois = input_rois + n * kROISize; + int roi_batch_ind = offset_input_rois[0]; + int roi_start_w = round(offset_input_rois[1] * spatial_scale); + int roi_start_h = round(offset_input_rois[2] * spatial_scale); + int roi_end_w = round(offset_input_rois[3] * spatial_scale); + int roi_end_h = round(offset_input_rois[4] * spatial_scale); + + int roi_width = max(roi_end_w - roi_start_w + 1, 1); + int roi_height = max(roi_end_h - roi_start_h + 1, 1); + T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + int hstart = static_cast(floor(static_cast(ph) * bin_size_h)); + int wstart = static_cast(floor(static_cast(pw) * bin_size_w)); + int hend = static_cast(ceil(static_cast(ph + 1) * bin_size_h)); + int wend = static_cast(ceil(static_cast(pw + 1) * bin_size_w)); + + hstart = min(max(hstart + roi_start_h, 0), height); + hend = min(max(hend + roi_start_h, 0), height); + wstart = min(max(wstart + roi_start_w, 0), width); + wend = min(max(wend + roi_start_w, 0), width); + bool is_empty = (hend <= hstart) || (wend <= wstart); + + T maxval = is_empty ? 0 : -std::numeric_limits::max(); + int maxidx = -1; + const T* offset_input_data = + input_data + (roi_batch_ind * channels + c) * height * width; + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + int input_data_index = h * width + w; + if (offset_input_data[input_data_index] > maxval) { + maxval = offset_input_data[input_data_index]; + maxidx = input_data_index; + } + } + } + output_data[index] = maxval; + if (argmax_data) { + argmax_data[index] = maxidx; + } + } +} + +template +__global__ void GPUROIPoolBackward( + const int nthreads, const int64_t* input_rois, const T* output_grad, + const int64_t* argmax_data, const int num_rois, const float spatial_scale, + const int channels, const int height, const int width, + const int pooled_height, const int pooled_width, T* input_grad) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int offset = blockDim.x * gridDim.x; + for (int i = index; i < nthreads; i += offset) { + int pw = index % pooled_width; + int ph = (index / pooled_width) % pooled_height; + int c = (index / pooled_width / pooled_height) % channels; + int n = index / pooled_width / pooled_height / channels; + + const int64_t* offset_input_rois = input_rois + n * kROISize; + int roi_batch_ind = offset_input_rois[0]; + int input_offset = (roi_batch_ind * channels + c) * height * width; + int output_offset = (n * channels + c) * pooled_height * pooled_width; + const T* offset_output_grad = output_grad + output_offset; + T* offset_input_grad = input_grad + input_offset; + const int64_t* offset_argmax_data = argmax_data + output_offset; + + int argmax = offset_argmax_data[ph * pooled_width + pw]; + if (argmax != -1) { + platform::CudaAtomicAdd( + offset_input_grad + argmax, + static_cast(offset_output_grad[ph * pooled_width + pw])); + } + } +} + +template +class GPUROIPoolOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto* rois = ctx.Input("ROIs"); + auto* out = ctx.Output("Out"); + auto* argmax = ctx.Output("Argmax"); + + auto pooled_height = ctx.Attr("pooled_height"); + auto pooled_width = ctx.Attr("pooled_width"); + auto spatial_scale = ctx.Attr("spatial_scale"); + + auto in_dims = in->dims(); + auto in_stride = framework::stride(in_dims); + int channels = in_dims[1]; + int height = in_dims[2]; + int width = in_dims[3]; + + size_t rois_num = rois->dims()[0]; + if (rois_num == 0) return; + + int output_size = out->numel(); + int blocks = NumBlocks(output_size); + int threads = kNumCUDAThreads; + + GPUROIPoolForward< + T><<>>( + output_size, in->data(), rois->data(), spatial_scale, + channels, height, width, pooled_height, pooled_width, + out->mutable_data(ctx.GetPlace()), + argmax->mutable_data(ctx.GetPlace())); + } +}; + +template +class GPUROIPoolGradOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto* rois = ctx.Input("ROIs"); + auto* argmax = ctx.Input("Argmax"); + + auto* out_grad = ctx.Input(framework::GradVarName("Out")); + auto* x_grad = ctx.Output(framework::GradVarName("X")); + + auto pooled_height = ctx.Attr("pooled_height"); + auto pooled_width = ctx.Attr("pooled_width"); + auto spatial_scale = ctx.Attr("spatial_scale"); + + size_t rois_num = rois->dims()[0]; + int channels = in->dims()[1]; + int height = in->dims()[2]; + int width = in->dims()[3]; + + if (x_grad) { + x_grad->mutable_data(ctx.GetPlace()); + math::SetConstant set_zero; + set_zero(ctx.device_context(), x_grad, static_cast(0)); + + int output_grad_size = out_grad->numel(); + int blocks = NumBlocks(output_grad_size); + int threads = kNumCUDAThreads; + + if (output_grad_size > 0) { + GPUROIPoolBackward< + T><<>>( + output_grad_size, rois->data(), out_grad->data(), + argmax->data(), rois_num, spatial_scale, channels, height, + width, pooled_height, pooled_width, + x_grad->mutable_data(ctx.GetPlace())); + } + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + roi_pool, ops::GPUROIPoolOpKernel, + ops::GPUROIPoolOpKernel); +REGISTER_OP_GPU_KERNEL( + roi_pool_grad, + ops::GPUROIPoolGradOpKernel, + ops::GPUROIPoolOpKernel); diff --git a/paddle/operators/roi_pool_op.h b/paddle/operators/roi_pool_op.h new file mode 100644 index 0000000000000000000000000000000000000000..1691eb482b03eab9fc793974ba1f39fbf17beafa --- /dev/null +++ b/paddle/operators/roi_pool_op.h @@ -0,0 +1,189 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +template +class CPUROIPoolOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto* rois = ctx.Input("ROIs"); + auto* out = ctx.Output("Out"); + auto* argmax = ctx.Output("Argmax"); + + auto pooled_height = ctx.Attr("pooled_height"); + auto pooled_width = ctx.Attr("pooled_width"); + auto spatial_scale = ctx.Attr("spatial_scale"); + + auto in_dims = in->dims(); + int batch_size = in_dims[0]; + int channels = in_dims[1]; + int height = in_dims[2]; + int width = in_dims[3]; + int rois_num = rois->dims()[0]; + + auto in_stride = framework::stride(in_dims); + auto argmax_stride = framework::stride(argmax->dims()); + auto roi_stride = framework::stride(rois->dims()); + auto out_stride = framework::stride(out->dims()); + + const T* input_data = in->data(); + const int64_t* rois_data = rois->data(); + T* output_data = out->mutable_data(ctx.GetPlace()); + int64_t* argmax_data = argmax->mutable_data(ctx.GetPlace()); + + for (int n = 0; n < rois_num; ++n) { + int roi_batch_id = rois_data[0]; + PADDLE_ENFORCE_GE(roi_batch_id, 0); + PADDLE_ENFORCE_LT(roi_batch_id, batch_size); + rois_data += roi_stride[0]; + } + + rois_data = rois->data(); + for (int n = 0; n < rois_num; ++n) { + int roi_batch_id = rois_data[0]; + int roi_start_w = round(rois_data[1] * spatial_scale); + int roi_start_h = round(rois_data[2] * spatial_scale); + int roi_end_w = round(rois_data[3] * spatial_scale); + int roi_end_h = round(rois_data[4] * spatial_scale); + + // Force malformed ROIs to be 1x1 + int roi_height = std::max(roi_end_h - roi_start_h + 1, 1); + int roi_width = std::max(roi_end_w - roi_start_w + 1, 1); + + const float bin_size_h = + static_cast(roi_height) / static_cast(pooled_height); + const float bin_size_w = + static_cast(roi_width) / static_cast(pooled_width); + + const T* batch_data = input_data + roi_batch_id * in_stride[0]; + + for (int c = 0; c < channels; ++c) { + for (int ph = 0; ph < pooled_height; ++ph) { + for (int pw = 0; pw < pooled_width; ++pw) { + // Compute pooling region for this output unit: + // start (included) = floor(ph * roi_height / pooled_height_) + // end (excluded) = ceil((ph + 1) * roi_height / pooled_height_) + int hstart = + static_cast(floor(static_cast(ph) * bin_size_h)); + int wstart = + static_cast(floor(static_cast(pw) * bin_size_w)); + int hend = + static_cast(ceil(static_cast(ph + 1) * bin_size_h)); + int wend = + static_cast(ceil(static_cast(pw + 1) * bin_size_w)); + + hstart = std::min(std::max(hstart + roi_start_h, 0), height); + hend = std::min(std::max(hend + roi_start_h, 0), height); + wstart = std::min(std::max(wstart + roi_start_w, 0), width); + wend = std::min(std::max(wend + roi_start_w, 0), width); + + const int pool_index = ph * pooled_width + pw; + + // Define an empty pooling region to be zero + bool is_empty = (hend <= hstart) || (wend <= wstart); + output_data[pool_index] = + is_empty ? 0 : -std::numeric_limits::max(); + argmax_data[pool_index] = -1; + + for (int h = hstart; h < hend; ++h) { + for (int w = wstart; w < wend; ++w) { + const int index = h * width + w; + if (batch_data[index] > output_data[pool_index]) { + output_data[pool_index] = batch_data[index]; + argmax_data[pool_index] = index; + } + } + } + } + } + + batch_data += in_stride[1]; + output_data += out_stride[1]; + argmax_data += argmax_stride[1]; + } + // Increment ROI data pointer + rois_data += roi_stride[0]; + } + return; + } +}; + +template +class CPUROIPoolGradOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto* rois = ctx.Input("ROIs"); + auto* argmax = ctx.Input("Argmax"); + + auto* out_grad = + ctx.Input(framework::GradVarName("Out")); + auto* x_grad = ctx.Output(framework::GradVarName("X")); + + auto pooled_height = ctx.Attr("pooled_height"); + auto pooled_width = ctx.Attr("pooled_width"); + + if (x_grad) { + int channels = in->dims()[1]; + auto in_stride = framework::stride(in->dims()); + auto roi_stride = framework::stride(rois->dims()); + + const int64_t* rois_data = rois->data(); + int rois_num = rois->dims()[0]; + + T* x_grad_data = x_grad->mutable_data(ctx.GetPlace()); + math::SetConstant set_zero; + set_zero(ctx.device_context(), x_grad, static_cast(0)); + + size_t roi_offset = roi_stride[0]; + size_t batch_offset = in_stride[0]; + size_t channel_offset = in_stride[1]; + + const T* out_grad_data = out_grad->data(); + size_t pool_channel_offset = pooled_height * pooled_width; + const int64_t* argmax_data = argmax->data(); + + for (size_t n = 0; n < rois_num; ++n) { + size_t roi_batch_idx = rois_data[0]; + T* batch_grad_data = x_grad_data + batch_offset * roi_batch_idx; + for (int c = 0; c < channels; ++c) { + for (int ph = 0; ph < pooled_height; ++ph) { + for (int pw = 0; pw < pooled_width; ++pw) { + size_t pool_index = ph * pooled_width + pw; + + if (argmax_data[pool_index] >= 0) { + size_t index = static_cast(argmax_data[pool_index]); + batch_grad_data[index] += out_grad_data[pool_index]; + } + } + } + batch_grad_data += channel_offset; + out_grad_data += pool_channel_offset; + argmax_data += pool_channel_offset; + } + rois_data += roi_offset; + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/sequence_conv_op.cc b/paddle/operators/sequence_conv_op.cc index 41cadce4c603a9c14db79e2f6b30f8664cf72a38..c5533732d44737bb8cc71fd8ac46f3c36c72ada1 100644 --- a/paddle/operators/sequence_conv_op.cc +++ b/paddle/operators/sequence_conv_op.cc @@ -179,7 +179,9 @@ REGISTER_OP(sequence_conv, ops::SequenceConvOp, ops::SequenceConvOpMaker, sequence_conv_grad, ops::SequenceConvGradOp); REGISTER_OP_CPU_KERNEL( - sequence_conv, ops::SequenceConvKernel); + sequence_conv, ops::SequenceConvKernel, + ops::SequenceConvKernel); REGISTER_OP_CPU_KERNEL( sequence_conv_grad, - ops::SequenceConvGradKernel); + ops::SequenceConvGradKernel, + ops::SequenceConvGradKernel); diff --git a/paddle/operators/sequence_conv_op.cu.cc b/paddle/operators/sequence_conv_op.cu.cc index 6106b0e46c0ab96e01dfc344055f23dbf4a1a2c3..c8136dbcb35be4f1236dddc3d24546f9d91670c8 100644 --- a/paddle/operators/sequence_conv_op.cu.cc +++ b/paddle/operators/sequence_conv_op.cu.cc @@ -16,7 +16,9 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( - sequence_conv, ops::SequenceConvKernel); + sequence_conv, ops::SequenceConvKernel, + ops::SequenceConvKernel); REGISTER_OP_GPU_KERNEL( sequence_conv_grad, - ops::SequenceConvGradKernel); + ops::SequenceConvGradKernel, + ops::SequenceConvGradKernel); diff --git a/paddle/operators/sequence_slice_op.cc b/paddle/operators/sequence_slice_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..255683a572c0e8d54791cb0c905d85239920d992 --- /dev/null +++ b/paddle/operators/sequence_slice_op.cc @@ -0,0 +1,131 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/sequence_slice_op.h" + +namespace paddle { +namespace operators { + +class SequenceSliceOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of SequenceSliceOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Offset"), + "Input(Offset) of SequenceSliceOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Length"), + "Input(Length) of SequenceSliceOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of SequenceSliceOp should not be null."); + auto input_dims = ctx->GetInputDim("X"); + + auto offset_dim = ctx->GetInputDim("Offset"); + auto length_dim = ctx->GetInputDim("Length"); + + PADDLE_ENFORCE_EQ( + offset_dim.size(), 2UL, + "Only support one level sequence now, The rank of offset must be 2."); + PADDLE_ENFORCE_EQ( + length_dim.size(), 2UL, + "Only support one level sequence now, The rank of Length must be 2."); + + // Initialize the output's dims to maximum, + // and re-set to real dims by the value of Offset and Length at kernel + ctx->SetOutputDim("Out", input_dims); + } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } +}; + +class SequenceSliceGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "The gradient of Out should not be null."); + PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName("X")), + "The gradient of X should not be null."); + ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X")); + } + + protected: + framework::OpKernelType GetKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } +}; + +class SequenceSliceOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SequenceSliceOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(LoDTensor), " + "the input of SequenceSliceOp."); + AddInput("Offset", + "(Tensor), " + "a vector to describe the offset of every input sequence for " + "sub sequence item."); + AddInput("Length", + "(Tensor), " + "a vector to describe the length of every input sequence for " + "sub sequence item."); + AddOutput("Out", "(LoDTensor), the output of SequenceSliceOp."); + AddComment(R"DOC( +Sequence slice operator + +The operator crops a subsequence from given sequence with given start offset and subsequence length. +It only supports sequence (LoD Tensor with level number is 1). +- Case: + X = [[a1, a2; + b1, b2; + c1, c2] + [d1, d2; + e1, e2]] + LoD(X) = {{0, 3, 5}}; Dims(X) = (5, 2) + Offset = [[0], [1]]; Length = [[2], [1]] + + Out = [[a1, a2; + b1, b2] + [e1, e2]] + LoD(Out) = {{0, 2, 3}}; Dims(Out) = (3, 2) +NOTE: The first dimension size of input, the size of offset and Length, should be equal. The offset start from 0. + )DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(sequence_slice, ops::SequenceSliceOp, ops::SequenceSliceOpMaker, + sequence_slice_grad, ops::SequenceSliceGradOp); +REGISTER_OP_CPU_KERNEL( + sequence_slice, + ops::SequenceSliceOpKernel); +REGISTER_OP_CPU_KERNEL( + sequence_slice_grad, + ops::SequenceSliceGradOpKernel); diff --git a/paddle/operators/sequence_slice_op.cu b/paddle/operators/sequence_slice_op.cu new file mode 100755 index 0000000000000000000000000000000000000000..a9f59dadba74d900fa5cc0601fb5b264ea19e34d --- /dev/null +++ b/paddle/operators/sequence_slice_op.cu @@ -0,0 +1,23 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/sequence_slice_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL( + sequence_slice, + ops::SequenceSliceOpKernel); +REGISTER_OP_GPU_KERNEL( + sequence_slice_grad, + ops::SequenceSliceGradOpKernel); diff --git a/paddle/operators/sequence_slice_op.h b/paddle/operators/sequence_slice_op.h new file mode 100644 index 0000000000000000000000000000000000000000..6411e0a46630beb0a9abb6aa5e517978b25a5254 --- /dev/null +++ b/paddle/operators/sequence_slice_op.h @@ -0,0 +1,172 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/operators/strided_memcpy.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using LoD = framework::LoD; + +template +inline LoD SequenceSliceLoD(const T& in, const int64_t* offset_data, + const int64_t* length_data) { + auto out_lod = in.lod(); + size_t lod_offset = 0; + + auto n = in.lod()[0].size() - 1; + out_lod[0][0] = 0; + for (size_t i = 0; i < n; ++i) { + lod_offset += length_data[i]; + out_lod[0][i + 1] = lod_offset; + } + return out_lod; +} + +template +class SequenceSliceOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto* offset = ctx.Input("Offset"); + auto* length = ctx.Input("Length"); + auto* out = ctx.Output("Out"); + + auto lod = in->lod(); + auto n = lod[0].size() - 1; + + PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now."); + PADDLE_ENFORCE_EQ( + n, static_cast(length->dims()[0]), + "The size of input-sequence and length-array should be the same") + PADDLE_ENFORCE_EQ( + n, static_cast(offset->dims()[0]), + "The size of input-sequence and offset-array should be the same") + + const int64_t* offset_data = offset->data(); + const int64_t* length_data = length->data(); + framework::Tensor offset_cpu; + framework::Tensor length_cpu; + + if (platform::is_gpu_place(ctx.GetPlace())) { + offset_cpu.mutable_data(offset->dims(), platform::CPUPlace()); + framework::CopyFrom(*offset, platform::CPUPlace(), ctx.device_context(), + &offset_cpu); + offset_data = offset_cpu.data(); + + length_cpu.mutable_data(length->dims(), platform::CPUPlace()); + framework::CopyFrom(*length, platform::CPUPlace(), ctx.device_context(), + &length_cpu); + length_data = length_cpu.data(); + } + + for (size_t i = 0; i < n; ++i) { + PADDLE_ENFORCE_LT(0, offset_data[i], + "The offset[%d] must greater than zero.", i) + PADDLE_ENFORCE_LT(0, length_data[i], + "The length[%d] must greater than zero.", i) + PADDLE_ENFORCE_LT(lod[0][i] + offset_data[i] + length_data[i], + lod[0][i + 1], "The target tensor's length overflow.") + } + + out->mutable_data(ctx.GetPlace()); + auto out_lod = SequenceSliceLoD(*in, offset_data, length_data); + auto out_dims = in->dims(); + out_dims[0] = out_lod[0][out_lod[0].size() - 1]; + out->Resize(out_dims); + out->set_lod(out_lod); + + auto in_stride = framework::stride(in->dims()); + auto out_stride = framework::stride(out->dims()); + + size_t out_offset = 0; + for (size_t i = 0; i < n; ++i) { + Tensor in_t = in->Slice( + static_cast(lod[0][i] + offset_data[i]), + static_cast(lod[0][i] + offset_data[i] + length_data[i])); + + StridedMemcpy(ctx.device_context(), in_t.data(), in_stride, + in_t.dims(), out_stride, out->data() + out_offset); + out_offset += length_data[i] * in_stride[0]; + } + } +}; + +template +class SequenceSliceGradOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + auto* offset = ctx.Input("Offset"); + auto* length = ctx.Input("Length"); + auto* out_grad = + ctx.Input(framework::GradVarName("Out")); + auto* x_grad = + ctx.Output(framework::GradVarName("X")); + + const int64_t* offset_data = offset->data(); + const int64_t* length_data = length->data(); + framework::Tensor offset_cpu; + framework::Tensor length_cpu; + + if (platform::is_gpu_place(ctx.GetPlace())) { + offset_cpu.mutable_data(offset->dims(), platform::CPUPlace()); + framework::CopyFrom(*offset, platform::CPUPlace(), ctx.device_context(), + &offset_cpu); + offset_data = offset_cpu.data(); + + length_cpu.mutable_data(length->dims(), platform::CPUPlace()); + framework::CopyFrom(*length, platform::CPUPlace(), ctx.device_context(), + &length_cpu); + length_data = length_cpu.data(); + } + + auto lod = in->lod(); + auto out_lod = out_grad->lod(); + + if (x_grad) { + x_grad->mutable_data(ctx.GetPlace()); + x_grad->set_lod(in->lod()); + math::SetConstant set_zero; + set_zero(ctx.device_context(), x_grad, static_cast(0)); + + auto out_grad_stride = framework::stride(out_grad->dims()); + + for (size_t i = 0; i < out_lod[0].size() - 1; ++i) { + Tensor out_grad_t = + out_grad->Slice(static_cast(out_lod[0][i]), + static_cast(out_lod[0][i + 1])); + auto out_grad_stride = framework::stride(out_grad_t.dims()); + + auto x_grad_stride = framework::stride(x_grad->dims()); + + Tensor x_grad_t = x_grad->Slice( + static_cast(lod[0][i] + offset_data[i]), + static_cast(lod[0][i] + offset_data[i] + length_data[i])); + + StridedMemcpy(ctx.device_context(), out_grad_t.data(), + out_grad_stride, out_grad_t.dims(), x_grad_stride, + x_grad_t.data()); + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/shrink_rnn_memory_op.cc b/paddle/operators/shrink_rnn_memory_op.cc index 65bccc0c81d0ad9674649933a20ec7b09fec5b37..c380e606869fd2c559c7d5f378857ca74fa8d8d3 100644 --- a/paddle/operators/shrink_rnn_memory_op.cc +++ b/paddle/operators/shrink_rnn_memory_op.cc @@ -57,11 +57,21 @@ class ShrinkRNNMemoryOpProtoMaker : public framework::OpProtoAndCheckerMaker { ShrinkRNNMemoryOpProtoMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", ""); - AddInput("RankTable", ""); - AddInput("I", ""); - AddOutput("Out", ""); - AddComment(""); + AddInput("X", "(LoDTensor) The RNN step memory to be shrinked."); + AddInput("RankTable", "(LoDRankTable) The lod_rank_table of dynamic RNN."); + AddInput("I", + "(LoDTensor) The step index. The RNN step memory 'X' will be " + "shrinked to match the size of the input of the index'th step."); + AddOutput("Out", "(LoDTensor) The shrinked RNN step memory."); + AddComment( + R"DOC( + In dynamic RNN, we are able to handle sequences of different lengths. + Because of the multiple lengths, the size of each step input can be + different, which may lead to a mismatching between the input of + the current step and the memory generated by the previous one. This + operator shrinks memory according to the size of the next step input, + to make sure that they can match each other. + )DOC"); } }; @@ -101,8 +111,8 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { } else { auto &dout_tensor = dout_var->Get(); auto height = dout_tensor.dims()[0]; - dx_tensor.Slice(0, static_cast(height)) - .CopyFrom(dout_tensor, dout_tensor.place(), dev_ctx); + auto slice = dx_tensor.Slice(0, static_cast(height)); + framework::CopyFrom(dout_tensor, dout_tensor.place(), dev_ctx, &slice); if (dx_tensor.dims()[0] < height) { auto rest_tensor = dx_tensor.Slice( static_cast(height), static_cast(dout_tensor.dims()[0])); diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 93f89e33a73c5f4c6c0e5a8793a0abe7c692b656..93e0525badc26808f0dca70cc1153ac728f1fe9c 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -59,7 +59,7 @@ Then the ratio of the exponential of the given dimension and the sum of exponential values of all the other dimensions is the output of the softmax operator. -For each row `i` and each column `j` in input X, we have: +For each row $i$ and each column $j$ in Input(X), we have: $$Y[i, j] = \frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])}$$ )DOC"); diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index 3dbb62d2e571eb92025c1b3fc0a6653c7cda007a..fc027d6f95cdbc24af59ef1188b6f16f6a93e85c 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -67,15 +67,15 @@ The equation is as follows: 1) Hard label (one-hot label, so every sample has exactly one class) -$$Loss_j = \f$ -\text{Logit}_{Label_j} + +$$Loss_j = -\text{Logit}_{Label_j} + \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right), -j = 1, ..., K $\f$$ +j = 1,..., K$$ 2) Soft label (each sample can have a distribution over all classes) -$$Loss_j = \f$ -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i - +$$Loss_j = -\sum_{i=0}^{K}\text{Label}_i \left(\text{Logit}_i - \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right), -j = 1,...,K $\f$$ +j = 1,...,K$$ )DOC"); } diff --git a/paddle/operators/split_lod_tensor_op.cc b/paddle/operators/split_lod_tensor_op.cc index db635f2ba0804143c9a2e04ff006dfbc8744f3fc..f164a4771186635232fea46327ca1fb8b86f2852 100644 --- a/paddle/operators/split_lod_tensor_op.cc +++ b/paddle/operators/split_lod_tensor_op.cc @@ -49,7 +49,7 @@ class SplitLoDTensorOp : public framework::OperatorBase { cpu_mask->ShareDataWith(mask); } else if (platform::is_gpu_place(mask.place())) { #ifdef PADDLE_WITH_CUDA - cpu_mask->CopyFrom(mask, platform::CPUPlace(), dev_ctx); + framework::CopyFrom(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get()); #else PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option"); #endif @@ -105,10 +105,11 @@ class SplitLoDTensorOp : public framework::OperatorBase { continue; } // out[offset: offset+len] = x[each_range.begin: each_range.end] - out->Slice(static_cast(offset), static_cast(offset + len)) - .CopyFrom(x.Slice(static_cast(each_range.begin), - static_cast(each_range.end)), - x.place(), dev_ctx); + auto slice = out->Slice(static_cast(offset), + static_cast(offset + len)); + framework::CopyFrom(x.Slice(static_cast(each_range.begin), + static_cast(each_range.end)), + x.place(), dev_ctx, &slice); offset += len; } } diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index c2b7632b2865a3ef66051d815d7722a08c6a8cbd..ddc210c26e69566fef9baa20f49ba1052e993b3f 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -176,4 +176,6 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradMaker, ops::SumOpVarTypeInference); REGISTER_OP_CPU_KERNEL(sum, ops::SumKernel, - ops::SumKernel); + ops::SumKernel, + ops::SumKernel, + ops::SumKernel); diff --git a/paddle/operators/sum_op.cu b/paddle/operators/sum_op.cu index 5cf05b876b6d6a2ce61d9e10b7ec52ed3cef57d7..5c30dd4d470c2e0acecef18524a4a81f9eb786a9 100644 --- a/paddle/operators/sum_op.cu +++ b/paddle/operators/sum_op.cu @@ -14,4 +14,6 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(sum, ops::SumKernel, - ops::SumKernel); + ops::SumKernel, + ops::SumKernel, + ops::SumKernel); diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h index 4ca15611392b3117aa6c92cba95911eb8bebeb15..4afec03ecef168077c9964f5cb1da7cd61861f40 100644 --- a/paddle/operators/sum_op.h +++ b/paddle/operators/sum_op.h @@ -102,8 +102,8 @@ class SumKernel : public framework::OpKernel { out_array.resize(i + 1); } if (out_array[i].numel() == 0) { - out_array[i].CopyFrom(in_array[i], in_array[i].place(), - context.device_context()); + framework::CopyFrom(in_array[i], in_array[i].place(), + context.device_context(), &out_array[i]); out_array[i].set_lod(in_array[i].lod()); } else { PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod()); diff --git a/paddle/operators/tensor.save b/paddle/operators/tensor.save new file mode 100644 index 0000000000000000000000000000000000000000..c24308a7d0131b84c28c0a9857cce4949afb2091 Binary files /dev/null and b/paddle/operators/tensor.save differ diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc index ae1b48d7a8e3d573a5134a822a2ed5ef70511077..ad09fb53ce8c9bf0187e595fe3cdcb6685ab9889 100644 --- a/paddle/operators/tensor_array_read_write_op.cc +++ b/paddle/operators/tensor_array_read_write_op.cc @@ -38,7 +38,7 @@ class WriteToArrayOp : public ArrayOp { out->resize(offset + 1); } auto *out_tensor = &out->at(offset); - out_tensor->CopyFrom(x_tensor, dev_ctx.GetPlace(), dev_ctx); + CopyFrom(x_tensor, dev_ctx.GetPlace(), dev_ctx, out_tensor); out_tensor->set_lod(x_tensor.lod()); } }; @@ -116,7 +116,8 @@ class ReadFromArrayOp : public ArrayOp { auto *out_tensor = out->GetMutable(); size_t offset = GetOffset(scope, dev_ctx); PADDLE_ENFORCE_LT(offset, x_array.size()); - out_tensor->CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx); + framework::CopyFrom(x_array[offset], dev_ctx.GetPlace(), dev_ctx, + out_tensor); out_tensor->set_lod(x_array[offset].lod()); } }; diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index 7975efc7cf134aaf591385a6866254a9c5f2a0bb..fff1dc7ccddf1d8cee0c8311828fd38888283cd1 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -66,7 +66,7 @@ class UniformRandomOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( - static_cast(ctx.Attr("data_type")), + static_cast(ctx.Attr("dtype")), ctx.device_context()); } }; @@ -99,7 +99,7 @@ uniform distribution. "Random seed used for generating samples. " "0 means use a seed generated by the system.") .SetDefault(0); - AddAttr("data_type", "(int, default 5(FP32)) Output tensor data type") + AddAttr("dtype", "(int, default 5(FP32)) Output tensor data type") .SetDefault(framework::DataType::FP32); } }; diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc index dcc59f5ff2ae3a8ca999d72a20cfd5c759987d89..68b4f7705995e5ecb6c9b8216db7373c1777a31e 100644 --- a/paddle/operators/while_op.cc +++ b/paddle/operators/while_op.cc @@ -180,7 +180,7 @@ class WhileGradOp : public framework::OperatorBase { if (var->IsType()) { auto &inside_tensor = var->Get(); framework::AttributeMap attrs; - attrs["data_type"] = framework::ToDataType(inside_tensor.type()); + attrs["dtype"] = framework::ToDataType(inside_tensor.type()); attrs["shape"] = framework::vectorize2int(inside_tensor.dims()); attrs["value"] = 0.0f; diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index bd86a9fe268c277065cd450f91b544def6c4d32f..88df28a9668e5f354d115ff8ab32cb21e03aefb5 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -1,15 +1,20 @@ -cc_library(cpu_info SRCS cpu_info.cc DEPS gflags glog) +if(WITH_GPU) + cc_library(enforce SRCS enforce.cc DEPS nccl) +else() + cc_library(enforce SRCS enforce.cc) +endif() +cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece enforce) + +cc_library(cpu_info SRCS cpu_info.cc DEPS gflags glog enforce) cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info) -nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog) +nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog enforce) -cc_library(place SRCS place.cc) +cc_library(place SRCS place.cc DEPS enforce) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) add_subdirectory(dynload) -cc_test(enforce_test SRCS enforce_test.cc DEPS stringpiece) - IF(WITH_GPU) set(GPU_CTX_DEPS dynload_cuda dynamic_loader) ELSE() diff --git a/paddle/platform/cuda_helper.h b/paddle/platform/cuda_helper.h index a7d99cde106a0a66f122a8c43f49717c03e60dec..376bb0e6887c797c3c1019e92f738a62d01a9c51 100644 --- a/paddle/platform/cuda_helper.h +++ b/paddle/platform/cuda_helper.h @@ -31,6 +31,16 @@ constexpr int PADDLE_CUDA_NUM_THREADS = 512; // For atomicAdd. USE_CUDA_ATOMIC(Add, float); +USE_CUDA_ATOMIC(Add, int); +USE_CUDA_ATOMIC(Add, unsigned int); +USE_CUDA_ATOMIC(Add, unsigned long long int); + +CUDA_ATOMIC_WRAPPER(Add, int64_t) { + static_assert(sizeof(int64_t) == sizeof(long long int), + "long long should be int64"); + return CudaAtomicAdd(reinterpret_cast(address), + static_cast(val)); +} #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 USE_CUDA_ATOMIC(Add, double); diff --git a/paddle/platform/cudnn_helper.h b/paddle/platform/cudnn_helper.h index dd48605b9ed688e4656d4cd1ddf1f298d0a50a9e..80a4c9bb4bbcd03cf849d86118db4e502382f031 100644 --- a/paddle/platform/cudnn_helper.h +++ b/paddle/platform/cudnn_helper.h @@ -116,7 +116,7 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat( case DataLayout::kNCHW: return CUDNN_TENSOR_NCHW; case DataLayout::kNCDHW: - return CUDNN_TENSOR_NCHW; // TODO(chengduoZH) : add CUDNN_TENSOR_NCDHW + return CUDNN_TENSOR_NCHW; // NOTE: cudnn treat NdTensor as the same default: PADDLE_THROW("Unknown cudnn equivalent for order"); } @@ -143,7 +143,7 @@ class ScopedTensorDescriptor { strides[i] = dims[i + 1] * strides[i + 1]; } // Update tensor descriptor dims setting if groups > 1 - // FIXME(typhoonzero): Assume using NCHW or NCDHW order + // NOTE: Assume using NCHW or NCDHW order std::vector dims_with_group(dims.begin(), dims.end()); // copy if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; @@ -186,7 +186,6 @@ class ScopedFilterDescriptor { // width of the filter. std::vector kernel_with_group(kernel.begin(), kernel.end()); if (groups > 1) { - // M /= groups kernel_with_group[0] /= groups; // NOTE: input filter(C) of the filter is already asserted to be C/groups. } @@ -224,13 +223,15 @@ class ScopedConvolutionDescriptor { PADDLE_ENFORCE_EQ(pads.size(), strides.size()); PADDLE_ENFORCE_EQ(pads.size(), dilations.size()); -#if CUDNN_VERSION < 6000 +#if !CUDNN_VERSION_MIN(6, 0, 0) // cudnn v5 does not support dilation conv, the argument is called upscale // instead of dilations and it is must be one. for (size_t i = 0; i < dilations.size(); ++i) { PADDLE_ENFORCE_EQ( dilations[i], 1, - "Dilations conv is not supported in this cuDNN version"); + "Dilations conv is not supported in this cuDNN version(%d.%d.%d).", + CUDNN_VERSION / 1000, CUDNN_VERSION % 1000 / 100, + CUDNN_VERSION % 100); } #endif diff --git a/paddle/platform/cudnn_helper_test.cc b/paddle/platform/cudnn_helper_test.cc index 6bd85ae1ca8b47b203e0321e9d9224d5cfd3a586..427359f69713b961c4730b697d3ccde5f7085838 100644 --- a/paddle/platform/cudnn_helper_test.cc +++ b/paddle/platform/cudnn_helper_test.cc @@ -38,6 +38,26 @@ TEST(CudnnHelper, ScopedTensorDescriptor) { EXPECT_EQ(strides[2], 6); EXPECT_EQ(strides[1], 36); EXPECT_EQ(strides[0], 144); + + // test tensor5d: ScopedTensorDescriptor + ScopedTensorDescriptor tensor5d_desc; + std::vector shape_5d = {2, 4, 6, 6, 6}; + auto desc_5d = tensor5d_desc.descriptor(DataLayout::kNCDHW, shape_5d); + + std::vector dims_5d(5); + std::vector strides_5d(5); + paddle::platform::dynload::cudnnGetTensorNdDescriptor( + desc_5d, 5, &type, &nd, dims_5d.data(), strides_5d.data()); + + EXPECT_EQ(nd, 5); + for (size_t i = 0; i < dims_5d.size(); ++i) { + EXPECT_EQ(dims_5d[i], shape_5d[i]); + } + EXPECT_EQ(strides_5d[4], 1); + EXPECT_EQ(strides_5d[3], 6); + EXPECT_EQ(strides_5d[2], 36); + EXPECT_EQ(strides_5d[1], 216); + EXPECT_EQ(strides_5d[0], 864); } TEST(CudnnHelper, ScopedFilterDescriptor) { @@ -60,6 +80,20 @@ TEST(CudnnHelper, ScopedFilterDescriptor) { for (size_t i = 0; i < shape.size(); ++i) { EXPECT_EQ(kernel[i], shape[i]); } + + ScopedFilterDescriptor filter_desc_4d; + std::vector shape_4d = {2, 3, 3, 3}; + auto desc_4d = filter_desc.descriptor(DataLayout::kNCDHW, shape_4d); + + std::vector kernel_4d(4); + paddle::platform::dynload::cudnnGetFilterNdDescriptor( + desc_4d, 4, &type, &format, &nd, kernel_4d.data()); + + EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format); + EXPECT_EQ(nd, 4); + for (size_t i = 0; i < shape_4d.size(); ++i) { + EXPECT_EQ(kernel_4d[i], shape_4d[i]); + } } TEST(CudnnHelper, ScopedConvolutionDescriptor) { diff --git a/paddle/platform/dynload/CMakeLists.txt b/paddle/platform/dynload/CMakeLists.txt index bb3fec1be9e811c26cc6851314e960e96fc366b3..f4fda65907dc26e9edb91ee46f3b8bd2de7b3f3a 100644 --- a/paddle/platform/dynload/CMakeLists.txt +++ b/paddle/platform/dynload/CMakeLists.txt @@ -1,3 +1,3 @@ -cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags) +cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce) nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc nccl.cc DEPS dynamic_loader nccl) diff --git a/paddle/platform/enforce.cc b/paddle/platform/enforce.cc new file mode 100644 index 0000000000000000000000000000000000000000..e8d31bc782ec3cddd18ceaedf88fe5e7b4aed2cc --- /dev/null +++ b/paddle/platform/enforce.cc @@ -0,0 +1,19 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/platform/enforce.h" + +namespace paddle { +namespace platform {} // namespace platform +} // namespace paddle diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index bfe708748a62ff9ac5d151bc652142e1f4925c83..415020ab965fa976c37870b7ad5794aab947fb4e 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -49,7 +49,6 @@ limitations under the License. */ namespace paddle { namespace platform { -namespace { #ifdef __GNUC__ inline std::string demangle(std::string name) { int status = -4; // some arbitrary value to eliminate the compiler warning @@ -60,7 +59,6 @@ inline std::string demangle(std::string name) { #else inline std::string demangle(std::string name) { return name; } #endif -} struct EnforceNotMet : public std::exception { std::exception_ptr exp_; diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index a9bcc474387513a8ca019bc9382b88c93e08ff8d..a54dc0d9fdb3c30391b01966ad493540c8ad1375 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -1,8 +1,8 @@ if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc - DEPS pybind python backward proto_desc tensor_array paddle_memory executor prune + DEPS pybind python backward proto_desc paddle_memory executor prune ${GLOB_OP_LIB}) endif(WITH_PYTHON) -cc_binary(print_operators_doc SRCS print_operators_doc.cc DEPS ${GLOB_OP_LIB} tensor_array) +cc_binary(print_operators_doc SRCS print_operators_doc.cc DEPS ${GLOB_OP_LIB}) diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index 5a1ff9b7976abbe4a37f8366181d9d1ae78ea4a0..6c8f06cccb92fa9cd22fdb89a9d410e6853895cc 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -202,9 +202,9 @@ void BindVarDsec(py::module &m) { }, py::return_value_policy::reference) .def("set_shape", &VarDescBind::SetShape) - .def("set_data_type", &VarDescBind::SetDataType) + .def("set_dtype", &VarDescBind::SetDataType) .def("shape", &VarDescBind::Shape, py::return_value_policy::reference) - .def("data_type", &VarDescBind::GetDataType) + .def("dtype", &VarDescBind::GetDataType) .def("lod_level", &VarDescBind::GetLodLevel) .def("set_lod_level", &VarDescBind::SetLoDLevel) .def("type", &VarDescBind::GetType) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 3d8d3f1d2fd3977f945928c723db5fcafffeae85..f55a1edce31ccf2498dcfcf0b30ba1012d7a7d1a 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -26,9 +26,7 @@ limitations under the License. */ #include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/prune.h" #include "paddle/framework/selected_rows.h" -#include "paddle/framework/tensor_array.h" #include "paddle/operators/cond_op.h" -#include "paddle/operators/dynamic_recurrent_op.h" #include "paddle/operators/net_op.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" @@ -293,6 +291,11 @@ All parameter, weight, gradient are variables in Paddle. Prune(*prog_with_targets.Proto(), &pruned_desc); return new ProgramDescBind(pruned_desc); }); + m.def("inference_optimize", [](ProgramDescBind &origin) { + ProgramDesc pruned_desc; + InferenceOptimize(*(origin.Proto()), &pruned_desc); + return new ProgramDescBind(pruned_desc); + }); m.def_submodule( "var_names", "The module will return special predefined variable name in Paddle") @@ -390,83 +393,6 @@ All parameter, weight, gradient are variables in Paddle. self->CompleteAddOp(); }); - py::class_(m, "TensorArray") - .def("__init__", - [](TensorArray &instance) { new (&instance) TensorArray(); }) - .def("read", - [](TensorArray &self, size_t index) { return self.Read(index); }) - .def("write", [](TensorArray &self, size_t index, - LoDTensor &value) { self.Write(index, value); }) - .def("write_shared", - [](TensorArray &self, size_t index, const LoDTensor &value) { - self.WriteShared(index, value); - }) - .def("size", [](TensorArray &self) { return self.size(); }) - .def("pack", - [](TensorArray &self, size_t level, - const std::vector> &meta_info, - const std::vector> &lod) { - std::vector meta; - for (auto &info : meta_info) { - PADDLE_ENFORCE_EQ(info.size(), 3UL); - meta.emplace_back(info[0], info[1], info[2]); - } -#ifndef PADDLE_WITH_CUDA - return self.Pack(level, meta, lod); -#else - LoD new_lod; - new_lod.reserve(lod.size()); - std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); - return self.Pack(level, meta, new_lod); -#endif - }) - .def("unpack", - [](TensorArray &self, const LoDTensor &source, int level, - bool length_descend) { - auto metas = self.Unpack(source, level, length_descend); - std::vector> meta_info; - for (auto meta : metas) { - meta_info.emplace_back( - std::vector({meta.begin, meta.end, meta.ori_idx})); - } - return meta_info; - }) - .def("stack", [](TensorArray &self) { return self.Stack(); }) - .def("unstack", - [](TensorArray &self, const LoDTensor &source) { - return self.Unstack(source); - }) - .def("unstack_shared", [](TensorArray &self, const LoDTensor &source) { - return self.UnstackShared(source); - }); - - py::class_(m, - "DynamicRecurrentOp") - .def_static("create", - [](py::bytes protobin) -> operators::DynamicRecurrentOp * { - OpDesc desc; - PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), - "Cannot parse user input to OpDesc"); - PADDLE_ENFORCE(desc.IsInitialized(), - "User OpDesc is not initialized, reason %s", - desc.InitializationErrorString()); - auto rnn_op = OpRegistry::CreateOp(desc); - return static_cast( - rnn_op.release()); - }) - .def("set_step_unit", - [](operators::DynamicRecurrentOp &self, const operators::NetOp &net) - -> void { self.rnn.SetStepUnit(net.Clone()); }) - .def("get_state", - [](operators::DynamicRecurrentOp &self, const std::string &name) - -> const TensorArray & { return self.rnn.state(name); }) - .def("get_step_input", - [](operators::DynamicRecurrentOp &self, const std::string &name) - -> const TensorArray & { return self.rnn.step_input(name); }) - .def("get_step_output", - [](operators::DynamicRecurrentOp &self, const std::string &name) - -> const TensorArray & { return self.rnn.step_output(name); }); - // cond_op py::class_(m, "CondOp") .def_static("create", diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 595d25fd4830b6e69b9a1080803771b0464741db..fda2a2f1b764106a7a108e8c56bc90ce3459e9b5 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -144,7 +144,7 @@ function gen_dockerfile() { DOCKERFILE_GPU_ENV="" DOCKERFILE_CUDNN_DSO="" if [[ ${WITH_GPU:-OFF} == 'ON' ]]; then - DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" + DOCKERFILE_GPU_ENV="ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu:\${LD_LIBRARY_PATH}" DOCKERFILE_CUDNN_DSO="RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.5 /usr/lib/x86_64-linux-gnu/libcudnn.so" fi diff --git a/paddle/trainer/Trainer.cpp b/paddle/trainer/Trainer.cpp index 88e684849df6fbfe4042b92bdb76ef98159eecea..3e4a2b5fa8a3981f6362edc1dc61ae1616e257ef 100644 --- a/paddle/trainer/Trainer.cpp +++ b/paddle/trainer/Trainer.cpp @@ -138,7 +138,7 @@ void Trainer::init(const std::shared_ptr& config, } if (FLAGS_use_mkldnn) { - CHECK_EQ(FLAGS_trainer_count, 1UL) << "MKLDNN only need 1 trainer"; + CHECK_EQ(FLAGS_trainer_count, 1) << "MKLDNN only need 1 trainer"; } if (testing) { diff --git a/paddle/trainer/tests/CMakeLists.txt b/paddle/trainer/tests/CMakeLists.txt index 80665551ec51214d90b866f0c7b2abb2fdee5f39..2739878b7f2936ea2da689da0b4caa780516ccc1 100644 --- a/paddle/trainer/tests/CMakeLists.txt +++ b/paddle/trainer/tests/CMakeLists.txt @@ -11,7 +11,6 @@ add_unittest_without_exec(test_Trainer test_Trainer.cpp) add_test(NAME test_Trainer COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ - ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/gen_proto_data.py && ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_Trainer WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) diff --git a/paddle/trainer/tests/chunking.conf b/paddle/trainer/tests/chunking.conf deleted file mode 100644 index d88df919df8fee9209336ffa29d724dabe6af31b..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/chunking.conf +++ /dev/null @@ -1,125 +0,0 @@ -#edit-mode: -*- python -*- -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later. - -TrainData(ProtoData( - files = 'trainer/tests/train_files.txt', - usage_ratio = 1.0, -)) - -TestData(ProtoData( - files = 'trainer/tests/test_files.txt' -)) - -default_initial_std(1) -default_decay_rate(4e-4) -default_device(0) - -Inputs("features", "word", "pos", "chunk") - -Outputs("crf") - -Layer( - name = "features", - type = "data", - size = 4339, -) - -Layer( - name = "word", - type = "data", - size = 478, -) - -Layer( - name = "pos", - type = "data", - size = 45 -) - -Layer( - name = "chunk", - type = "data", - size = 23 -) - -Layer( - name = "output", - type = "mixed", - size = 23, - bias = False, - device = -1, - inputs = [ - FullMatrixProjection("features", parameter_name="feature_weights"), - # TableProjection("word"), - # TableProjection("pos"), - ], -) - -Layer( - name = "crf", - type = "crf", - size = 23, - device = -1, - inputs = [ - Input("output", parameter_name="crfw"), - "chunk" - ] -) - -Layer( - name = "crf_decoding", - type = "crf_decoding", - size = 23, - device = -1, - inputs = [ - Input("output", parameter_name="crfw"), - "chunk" - ] -) - -Evaluator( - name = "error", - type = "sum", - inputs = "crf_decoding", -) - -''' -# chuck evaluator cannot be used for GPU training -Evaluator( - name = "chunk_f1", - type = "chunk", - inputs = ["crf_decoding", "chunk"], - chunk_scheme = "IOB", - num_chunk_types = 11, -) -''' - -Settings( - algorithm = 'sgd', - batch_size = 100, - average_window = 0.5, - max_average_window = 2500, - learning_rate = 1e-1, - learning_rate_decay_a = 5e-7, - learning_rate_decay_b = 0.75, - l1weight = 0, - l2weight = 1, - c1 = 0.0001, - backoff = 0.5, - owlqn_steps = 100, - max_backoff = 5, -) diff --git a/paddle/trainer/tests/compare_sparse_data b/paddle/trainer/tests/compare_sparse_data deleted file mode 100644 index 18fc6541383d8e8e1687b8fe1abd57aece3d4cfc..0000000000000000000000000000000000000000 Binary files a/paddle/trainer/tests/compare_sparse_data and /dev/null differ diff --git a/paddle/trainer/tests/data_bin_part b/paddle/trainer/tests/data_bin_part deleted file mode 100644 index 66ede391b0cffe6bc9611d3616b7b626864f5c3e..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/data_bin_part +++ /dev/null @@ -1,214 +0,0 @@ -F -X -X -X -X -X -X -X -X -HC=TFTIַ;H=TFTIYW.8T˔I͚48TN8TE98TW8T&6ͅTTHC=TFTIַ;><.8˔I͚48+E98W8&68H=TFTIHC=TFTIַ;H=TFTI86HC=TFTIַ;W8T;8TJJ8T&$H=TFTIW8Ю+JJ8HC=TFTIַ;H=TFTI HC=TFTIַ;@?H=TFTI@HC=TFTIַ;H=TFTI868T8T&9C6HC=TFTIַ;BT&$88&Ӗ5H=TFTIBTHC=TFTIַ;H=TFTIVTHC=TFTIַ;8T8TͅTT8T&86;8T@N8T8T;9H=TFTI8888&86;8@N88HC=TFTIַ;H=TFTIMKHC=TFTIַ;ٟ@17ȣ8Gȣ8/>7;BAUQUT0A?H=TFTIٟ@17G/>7;BAUQUT0HC=TFTIַ;H=TFTIHC=TFTIַ;H=TFTIHC=TFTIַ;H=TFTI.8T˔I͚48TN8TE98TW8T&6ͅTTHC=TFTIַ;'JA-EJ@8T-Eބ248TYW.8˔I͚48+E98W8&68H=TFTIAM18Mބ248HC=TFTIַ;H=TFTIYW.8T˔I͚48TN8TE98TW8T&6ͅTTHC=TFTIַ;><.8˔I͚48+E98W8&68H=TFTIHC=TFTIַ;H=TFTI HC=TFTIַ;@KH=TFTI@KHC=TFTIַ;H=TFTI HC=TFTIַ;@?H=TFTI@HC=TFTIַ;H=TFTI#!14UƕT6.Q8T@Ԛ<14ƕT6.Q8@Ԛ<HC=TFTIַ;H=TFTIVTHC=TFTIַ;8T8TͅTT8T&86;8T@N8T8T;9H=TFTI8888&86;8@N88HC=TFTIַ;H=TFTIHC=TFTIַ;ܥ6H=TFTIܥ6HC=TFTIַ;H=TFTIHC=TFTIַ;H=TFTIHC=TFTIַ;H=TFTI;9HC=TFTIַ;Q;B !H=TFTIQBHC=TFTIַ;H=TFTIYW.8T˔I͚48TN8TE98TW8T&6ͅTTHC=TFTIַ;><.8˔I͚48+E98W8&68H=TFTIHC=TFTIַ;H=TFTI53HW8T;8T8THC=TFTIַ;#!HW8Ю+8H=TFTIHC=TFTIַ;H=TFTI HC=TFTIַ;@?H=TFTI@HC=TFTIַ;H=TFTI&$HC=TFTIַ;VGD; H=TFTIVGD;  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OGG͡S<%&б ̣ Fۧ11ņAǧ1ņAņA<6ҥ3߫UVKTVU6>VMUF>M5%̋'wuG͡S<% ̣ Fۧ11ņAǧ1ņAņA<6UVKTV6>VMUF>ʶM%̋'  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OG̣ '@@@  @@  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OG&$O4=ӪN/>K/;8,T O4=ӪN/>K;,T  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OG><,9O8.̣ TB0O!./WDSW53,9O8.TB0O!./WDSW  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OG:=X̣ QUTG܂=X̣ QTG  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OG)'= 0̣ M6ͅTO,@Ԛ<#!=ؐ0̣ M6ͅTO,@Ԛ<  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OG/-= 0̣ M6ͅTO,DSDA)'=ؐ0̣ M6ͅTO,DSDA  ̣ OG  ̣ OG&$Eʌ3OXMQ̣ Jʌ3D4T#!Eʌ3OXMQ̣ Jʌ3UT  ̣ OG  ̣ OG  ̣ Ҧ)GG4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ204AQ.ŞGщQHAVTJD8DAP&$4AQ.щQHAVTD8A4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ&$R4Q>.ŞGGщQ6?@Ԛ<#!R4Q>.GщQ6?@Ԛ<4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ&$4Q.ŞGJIGщQDSDA#!4Q.JIGщQDSDA4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ&$.ŞGٟ@6G5IGщQA7B.ٟ@6G5IGщQ+4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ534Q>.ŞGDAP;0T?6T)! 4Q>.A;T6T)4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ534Q>.ŞGDAP;0T?6T)! 4Q>.A;T6T)4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ><49KQ.ŞGRGD9HOKJA.ŞG=RJ/-4-Q.RGD9HKJA.RJ4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ534AIQ.ŞGщQHAVTJD8DAP)'4AIQ.щQHAVTD8A4Q>.ŞGGщQ4Q>.GщQ 4Q.ŞG6P6T4Q.6P64Q>.ŞGGщQ4Q>.GщQ/-4=R4Q>AE.ŞGC/W99 4R4Q>C/W9CPH5CPH5;9H91GRFP.ܤKHUA6)ʪ86H1GRFP.ܤKHUA6)ʪCPH5CPH5UPH>G@Ԛ<UPH>G@Ԛ<CPH5CPH5&$CPHA>GDSPԮK߀3#!CPHA>GDSPٮKCPH5CPH5AHACPG@Ԛ<AHACP@Ԛ<CPH5CPH5;9H91GRFP.ܤKHUA6)ʪ86H1GRFP.ܤKHUA6)ʪCPH5CPH5MKHFșK>7QKH.CQR>“JMB>WMLG,@Ԛ<MKHFșK>7QKH.CQR>“JMB>WMLG,@Ԛ<CPH5CPH5&$CPHA>GDSPԮK߀3#!CPHA>GDSPٮKCPH5CPH553AHMDP58Qٟ@H3/A@@@/-AHMDP8Qٟ@H3/A@@CPH5CPH5;9H91GRFP.ܤKHUA6)ʪ86H1GRFP.ܤKHUA6)ʪCPH5CPH5#!AHACPGDSDA AHACPDSDACPH5CPH5&$CPHA>GDSPԮK߀3#!CPHA>GDSPٮKCPH5CPH5YWI==R>H//GM>ϪJRK22U׵AHTUA6)ʪYWI==R>H//GM>ϪJRK22U׵AHTUA6)ʪCPH5CPH5;9H91GRFP.ܤKHUA6)ʪ86H1GRFP.ܤKHUA6)ʪCPH5CPH5 6PH>5HOAB 6PH>5HOABCPH5CPH5&$CPHA>GDSPԮK߀3#!CPHA>GDSPٮKCPH5CPH5HG22A@@@HG22A@@ B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O20 N߹-7BO1ַ;L߹-NA7OIַ;)' N߹-7BO1;߹-NA7I B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O,* N߹-BO߹-7O߹-ַ;OʈF<4)' N߹-BO߹-7߹-ַ;OʈF<4 B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O&$A N߹-BO>8ֽHٟ@@Ԛ<#!A N߹-BO>8ٟ@@Ԛ< B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O/- - N߹-C7FBOR1:?T)' - Nں-7BOR1:?T B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O ߹-7O߹-BT ߹-7߹-B B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O/- N߹-BO7FOO?L߹-OǧBT)' N߹-BO7OO?L߹-OT B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O><߹- NLB7FOQӮDDA40AT(",*߹- NLOQӮDDA0AT B߹-O B߹-O߹-BTCO@L:߹-BCO@L: B߹-O B߹-O,* ߹-7BOİU1>CBBUQ4,* ߹-7BOİU1>CBBUQ4 L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ/>< - /@ʡH9H1RLA¶7/JDO8,T#!N91LN/JD,T L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ/b`1RLDA¶7/ - J0EKB8//OEKю2E,/WT)ʪDB1LDN/J0KB8/OEю2E)ʪ L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ/20 - 1RLA¶7/J0EO@K&$1LN/J0EO@K L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ/>T7O=P; >7=P L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ//-DA¶7/1RLJʡHWWT%! DN/1LJʡHWWՄO L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ/>< - N1RLA¶7CH231RLA¶7//&$N1LN޻/231LN/ L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ//- LGR1¶7/17>>G>GW=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ/JHA¶7/C1RLH7/N=,::84SQH9T86N/C1L+N=,ў84SQH9T L17A¶7J/ L17NJ/GE/1RLA¶7CʡH =;>W=ѾC -:K48?:T86/1LNCʡH =.=ѾC -:48?:T L17A¶7J/ L17NJ/DB - /@ʡH9H1RLA¶7/JDOEJ< NT΂:8/CT΂:KT΂:WJT΂:ì,UWJ&$ NTCT:Tژ< NT΂:8/CT΂:KT΂:WJT΂:ì,UWJ&$ NTCT:TژBDJ99щQ#!#HK9>BDJ99щQ&$#%9T@A6WDPDA #9@A6WDPDA)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ\ZRBDJ99щQ#!#HK9>BDJ99щQ&$#%9T@A6WDPDA #9@A6WDPDA)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ;9>R>%B>ڜ>A9TK91A#%@@@20>R>%B>ڜ>A9K91A#@@)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ&$#%9T@A6WDPDA #9@A6WDPDA)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ#!#%9TKڜ>BEIUT#9Kڜ>BEIU)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ&$#%9T@A6WDPDA #9@A6WDPDA)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ#!#%K9TD06O@Ԛ<#K9D06@Ԛ<)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ&$#%9T@A6WDPDA #9@A6WDPDA)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ#%9TCۚK@Ԛ<#9CۚK@Ԛ<)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ&$#%9T@A6WDPDA #9@A6WDPDA)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQGE6W#%>9T?#%6O/OO/U!'B8>ڜ>;96W#>9?#6O/O/U!'B8>ڜ>)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQ&$#%9T@A6WDPDA #9@A6WDPDA)'#%HK9T>BDJ99щQ#!#HK9>BDJ99щQYW#%9T>K-A96TWB:OSRQ9#%ѾCHTL6LTJH#9>KA96TWB:OSRQ9#%5L6LT,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S, ؓ =BܤKS/C8Tœ =BܤKS8T,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,DBGDG>W-3M8F=Bٟ@6S9ܤKȟN U686GDG>W38F=B5S9ܤKȟN U,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,>THH8@9FFSA@Ԛ<53ER=B67>HH8@9FFA@Ԛ<,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,PN84C81=BRVT6CAE/:6LUUNԛL@;6GDB8C81=BRVTCAE:6LUUNԛL@6G,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,JHH=B/-8>ܤKDA9=S˱U8QTָUJ)ʪDBH=B/8>ܤKDA9=S˱U8QTU)ʪ,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,GEABRBE9A6BϜ>8=B6ץRRDO6ө ۆ ;9ABRBE9A6BϜ>8=B6ץR6ө ,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,;9RQSAEM8=B>ץR9)NU6!GJ53RQSAEC=B>ץR9)NU6!1,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,/-VJV18=BR6?#%@@@)'VJV18=BR6?#@@,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,D>EȊ56RT8JF=BKT:8J=BRFK,34DH@CӽDҾWK?>S@99ISDPDAzxD>Eˊ5RT8S=BАT:8J=BRF,34DH@CӽDҾWK?>S@9ISDPDA,*E6FA6ܤKJV8=B>S,,*E6FA6ܤKJV8=B>S,V68BXʉ5=B>ܤK%&Ξ)ʉ5VTVEXGVXGV8G&Ξ)VEBVƔ>XVU8—P=ۚKC>JU̟KO4>LV68BX=B>ܤK%&Ξ)ʉ5VTVEXVXV8G&Ξ)VEBV۔>VU8=CJ.4>HD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WXDCGR@NDCG@NHD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WX#!6CGDʉ5>R#!6CGDʉ5>RHD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WX86GR>RP>R699VADSDA20GR>RP>R69VADSDAHD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WX#!DR߻W99@@@DR߻W99@@HD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WXUV1;2X4UV1;2XHD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WXnl>A6߻W$6XT6/ҥ3)T:6X-6ME@EU%!)!MK>A6߻W$6‰XɺRҥ3?:6X-6E@E )PHD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WXA?6=C߻WED>3K֟MȬTT(#$!,*6=C߻WED>3K֟MȬTT HD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WX;966GȂ3ʉ5>R>BCT6;3D5366GȂ3ʉ5>R>BCT;3DHD6߻WXHD6߻WXC߻WX@Ԛ<C߻WX@Ԛ<HD6߻WXHD6߻WXDC߻WR1@KDCW1@K,*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966GE6/KOٟ@—P=>8E9RBHAVTJD8DAP536/Oٟ@=>8ERBHAVTD8A,*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966&$CKOI9RB2SCI9COIRB2SC9,*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966)'LPKO9RB6P6T LPORB6P6,*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966PN6KO9RBEIT6>SK?KI—P=>KI90C9T><6ORBEIT6>SK?K=>K90CT,*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966,*кBPKOK=9F9RHG8T#!кBPOK=9FRHG8,*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966JHHKO>6/—P=9RH>DAP;0T?6T)!/-HO>6/=RH>A;T6T),*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966MKKO6/—P=KORDB6OKKO696KO6щQ@Ԛ<53O6/=ORDB6KO9O6щQ@Ԛ<,*SPKO—P=D9RB5966#!SPO=DRB5966@@@,*SPKO—P=D9RB5966#!SPO=DRB5966,*6/KO9RBDǬP/-C9AT0?9-8ٟ@6EE>PC9AT0?=C9AT0?=#!C9AT0?9-8@Ԛ<#!C9AT0?9-8@Ԛ<C9AT0?=C9AT0?=20ʻ?0?9<9=C9ATVB$/?BRÙKBTA?D>0?9<9=C9ATVB$/BEBC9AT0?=C9AT0?=#!C9AT0?9-8@Ԛ<#!C9AT0?9-8@Ԛ<C9AT0?=C9AT0?=0?6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח>B7Uח>D6@Ԛ<B7Uח>D6@Ԛ<6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח> Uח>@K Uח>@K6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח>B7Uח>ͦBOERB7Uח>ͦBOER6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח>B7Uח>8;BٖTTB7Uח>8;BT6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח>86AHFS=@=՞RU70ח>GDSPԮK߀320AHF=@=՞RU70ח>GDSPٮK6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח> B7Uח>DT("B7Uח>DT6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח>6RTU7HˮDDA6TU7HˮDDA6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח> B7Uח> B7Uח>6RTU7ח>6TU7ח>\ZHS=HˮD>7KOUJҲ.щQHT-:66(UʡH966SQHS=HˮD>7KOUJҲ.щQHT:6(UʡH966RTU7ח>6TU7ח>20AHFS=@=՞RU70ח>GPB6,*AHF=@=՞RU70ח>GPB  ;GB;9ӱQL4ߩ75Q-<>;G  48@@@<ߩ7>48@@  F9Q?WɤKIԊX>F9Q?WɤK 3ϊXQK  ԊXQK,*3ϊX17Q7G/׆N8GF̛<ԊX13G/NGF 3ϊXQK  ԊXQK&$R3ϊX46߻WLQG8@Ԛ< RԊX46߻WLQG@Ԛ< 3ϊXQK  ԊXQK,*3ϊX17Q7G/׆N8GF̛<ԊX13G/NGF 3ϊXQK  ԊXQK><3ϊXR7Q7@475@:ȥB@AT/-ԊXR3@475@:ȥBA 3ϊXQK  ԊXQK,*3ϊX17Q7G/׆N8GF̛<ԊX13G/NGF 3ϊXQK  ԊXQK"!F>"FN߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F720DN/EL>7Aڶ>F7CDƹ;@Ԛ<,*DNȜML>7Aڶ>F7C4@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7)'HN/KN/ڶ>F7=A7B#!HN/KN/ڶ>F7=+N߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7 H/67—P=DG@KH/67=D@KN߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7ܤKKA7B  ܤKK+N߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F77>1T֛7ٟ@9F6U>ʔ71/>ٟ@6LD7>/I/>=щQDDHIN./59Ԛ<ڶ>S-=DN@UW=-щQܭDHTDS=DSDA7>1֛7ٟ@9F6U>ʔ71/>5LD>/I/>=щQDDHIN./51S-=DN@UW=-щQܭDHTDS=DSDAN߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7,*DN=8T=4ڶ>F7S@@@)'DN=8T=4ڶ>F7S@@N߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7 H/67Dƹ;DG@KH/674D@KN߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7>S=>7ʗ74=>SB7ST86D1ƹ;T4>S=>7ʗ74>SB7STN߀3/ڶ>F7N߀3/ڶ>F7,*DN.ی'79Ԛ<=/ڶ>J7@Ԛ<)'DN.ی'71=/ڶ>J7@Ԛ<N߀3/ڶ>F7N߀3/ڶ>F7V/67=DG@KV/67=D@K$5H149A$5H149Aec$/4UR5RH$>#=1,1>Bٟ@T9ALKٟ@6J=@Ԛ<\Z$/4U5RH$>#=1,1>@T9ALKٟ@6=@Ԛ<$5H149A$5H149A>EѾCT86VOTBA?$US/6T9A6APɺDEXET8VOTB$5H149A$5H149Aec$/4UR5RH$>#=1,1>Bٟ@T9ALKٟ@6J=@Ԛ<\Z$/4U5RH$>#=1,1>@T9ALKٟ@6=@Ԛ<$5H149A$5H149A53ER91@5H1Bٟ@49AE@@@/-ER91@5H1@49AE@@$5H149A$5H149A/-$U5/8=49Aٟ@5DSDA/-$U5/8=49Aٟ@5DSDA$5H149A$5H149A,*$9656549Q5؂=@Ԛ<,*$9656549Q5؂=@Ԛ<$5H149A$5H149Aec$/4UR5RH$>#=1,1>Bٟ@T9ALKٟ@6J=@Ԛ<\Z$/4U5RH$>#=1,1>@T9ALKٟ@6=@Ԛ<$5H149A$5H149ADB"Ξ)69$R549AIٟ@TN>CJ@@Ԛ<><"69$R549A@TN>CJ@@Ԛ<$5H149A$5H149A/-$U5/8=49Aٟ@5DSDA/-$U5/8=49Aٟ@5DSDA$5H149A$5H149A;9E4WN$RB5H4LDLIĪNCS@K;9E4WN$RB5H4LDLIĪNCS@K$5H149A$5H149Aec$/4UR5RH$>#=1,1>Bٟ@T9ALKٟ@6J=@Ԛ<\Z$/4U5RH$>#=1,1>@T9ALKٟ@6=@Ԛ<$5H149A$5H149A53@;5RH$ULT9A6DPDA/-@5RHULT9A6DPDA$5H149A$5H149A/-$U5/8=49Aٟ@5DSDA/-$U5/8=49Aٟ@5DSDA$5H149A$5H149A86DP>E5H"$ĪNL=496A7B/-P>E5H"$ĪNL=496+:/SʡH99SH :S9HDBSWJ9?9?:/SʡH99:/SʡH995ܛ?M)'WJ99:S9:S95ܛ?M:/SʡH99SH :S9H/-:/SʡH999?99?D6T:S9999D6:/SʡH99SH :S9H&$SV:/SʡH99S6TV:S96:/SʡH99SH :S9H#!S:/SʡH999?Έ;F:S99Έ;F:/SʡH99SH :S9HDBSWJ9?9?:/SʡH99:/SʡH995ܛ?M)'WJ99:S9:S95ܛ?M:/SʡH99SH :S9H&$SV:/SʡH999?<>KDH><>KJHRD>HHHHHH<>KDH><>Kwu7RDH><>K,07R2 -.TʆL@ϡS4,ܢEM,.O2J6MKR2 -.TʆL@ϡS4,E,.OJ6DH><>KDH><>KJHRD>HHHHHH<>KDH><>KMKRDH><>K,0IO9491یV0—P=—PH>.E6A?RH><>K,0IO94V0=—PH>.E6DH><>KDH><>KJHRD>HHHHHH<>KDH><>K#!RD>HH<>KDH><>KJHRD>HHHHHH<>KDH><>K/-DH>K=<,D6R=4,@Ԛ<&$D54,D6R=4,@Ԛ<DH><>KDH><>KJHRD>HHHHHH<>KDH><>K7RDH><>K2>7.ʆJ6ʆG1?—P=1?I2K7>>MGMߎM6>JRʆ.J6~.ʆJ6ʆG1?=1?IK7>MGMߎM6>JRʆ.J6DH><>KDH><>KJHRD>HHHHHH<>KDH><>K20RDH><>K2>J6/;IN9,*RH><>K2>J6/;N9DH><>KDH><>KJHRD>HHHHHH<>KDH><>K_]RDH><>K2>ʆ>I2́N4TȇN4TI(—Pބ2>N4ʆN4GERH><>K2>ʆ>I(N4ȇN4I(܉2>NʆNDH><>KDH><>KJHRD>HHHHHH<>KDH><>KGERDH><>K2>J>I2ˏR3˰(IB>—P3ˏR2;9RH><>K2>JIˏR3˰(IB>3ˏR2ʰDBNMG> BMG>JHɵO9FDSC4ʰDBN5>35-=9O2:@@@53ɵO9DSC4B5>I-=O2G@@ʰDBNMG> BMG> LNLBʰDBN@@@LNLB@@ʰDBNMG> BMG>JHɵO9FDSC4ʰDBN5>35-=9O2:@@@53ɵO9DSC4B5>I-=O2G@@ʰDBNMG> BMG>)'$";0Q8ҐJ9ҽ6WH)'$";0Q8ҐJ9ҽ6WHCARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOA 4AR=J DG@K4AR= D@KCARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOA/4ARQ=JB4/4ARQ=BCARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOA><İFE1;TVL8ARO8L0AWН?/Н?T,*İFBTVL8ARO8LAН?-CARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOA20İFE1;TVL8ARO8L0AW&$İFBTVL8ARO8LACARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOAPNİFE1;TVL8ARO8L0AWWН?W?UUWTН?>;9İFBTVL8ARO8LAWW?UUW?CARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOA,*İFE1;TVL8O3߫UТ@HT&$İFBTVL8O3߫UТ@HTCARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOA AR4J AR4JCARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOAL/4ARQ>L/4ARQ>CARVCWOAWCARVCWOA4ARQJOA4ARQJOCARVCWOAWCARVCWOA&$ŷ5/BAR4JX>BHH9;>B 9XR9  9R9PNMRF=:9X94.б H>N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9CR9Xnj8@Ԛ<CR9nj8@Ԛ< 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R99XUTI9XNS;UOIַ;URIIIKIHBOF;F;N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9&$9X9C5I91ӛ?69; 9F5I91ӛ?69; 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R99XUC;- 9UC- 9XR9  9R9PNMRF=:9X94.б H>N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9\ZG9XWF5ԎB@JP11.3>72PNG9WF5BJP11.3>72 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R9869XB9ԎB@@OLWFR9B9N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9&$9X9X59QCͦ(!995ƋQC 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R9 9X@?9@ 9XR9  9R9PNMRF=:9X94.б H>N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9YW9XB9ԎB@>54WFR9B9 IC70FŔ6ADMIַ;70DB9B9B>54WFR9B9 IC0FŔ61I7 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R9DB9XCK29R5>9XWA/1C2ODKOD539C2R5>9WA1C2ODKOD 9XR9  9R9PNMRF=:9X94.б H>N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9&$9X>KTCΚIRН?>AT9>KCΚIR?A 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R920CCTC7VCEICַ;C;-CTCCCVĸIַ;C-C 9XR9  9R9PNMRF=:9X94.б H>N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9DBHW:9XB9ԎB@=ʼnEDWFR9B99XCT86HW:9B9B=ʼnEDWFR9B99C 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R99X@T9XR0ܥ69@T9Rܥ6 9XR9  9R9PNMRF=:9X94.б H>N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9G7;CT G7;C 9XR9  9R9&$KX/9CR=U93ATX9CR=U93A 9XR9  9R9A?Hʜ2RA@RS9@>9X3>)כ$>;GB;9Hʜ2RA@RS9@>93>)כ$>;G 9XR9  9R9PNMRF=:9X94.б H>N̛<;TTН?T("'!53MRF=:994.б H>N;Tܞ? 9XR9  9R9><NR=9XC9S99׵AAKEAABC/;9NR=9C9S99׵AAKEAABC/=@KE= =@E=><@Q0H@KûAQH@KûAQ,HPHCB020@0H@ûAQH@ûAQ,HPHB0=@KE= =@E==@J@KI5@=@J@I5@=@KE= =@E=/-7ûAK3@3@K7KK3!#!7ûAK3@3@7K3=@KE= =@E=86=@KAKCK-3O?3377CT)'=@AKCK-.?.7C=@KE= =@E=/-K6S5@KE=4I,S@@@)'K6S5@E=4I,S@@=@KE= =@E= @K@?@@=@KE= =@E=)'C@ַ;C@GC@K=@AB&$C@ַ;C@GC@=@AB=@KE= =@E=DBIK@KQOַ;OE6V=ԋ J>JT7LJ653IK@QOַ;OE6V=JJ7LJ6ԃP;ܢE4JAˑ+86Q FM1UܢE4NԃP;O4HН?U,T#!Q FM1UAOH,ԃP;ܢE4JAˑ+,*ԃP;Q8ȘIK5ܢE4N>4OJAQ8K5>4OԃP;ܢE4JAˑ+ ԃP;1ܢE4NН?̛4׶K21T)'AHQ8K5C>4׶K21ԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4б XQT)'AHQ8K5C>4б XQԃP;ܢE4JAˑ+><ԃP;HQ8ȘIK5ܢE4NC>4HН?Н?>HT,*AHQ8K5C>4H?HTԃP;ܢE4JAˑ+MKԃP;HQ8ȘIK5NC>4ԃP;Q:33ȘIJ82THA>4AQ:33ȘIJ82HԃP;ܢE4JAˑ+53ԃP;HQ8ȘIK5ܢE4NC>4TН?T&$AHQ8K5C>4Tܞ?ԃP;ܢE4JAˑ+/-ԃP;HL-TܢE4NC41TН?> AHL-TC41?ԃP;ܢE4JAˑ+Dֈ;0OFԃP;ܢE4JAˑ+,*ԃP;HQ8ȘIK5ܢE4NC>4 AHQ8K5C>4ԃP;ܢE4JAˑ+86Q FM1UܢE4NԃP;O4HН?U,T#!Q FM1UAOH,ԃP;ܢE4JAˑ+)'ԃP;E72TܢE4NŇ7̛4б 3QT)'AHQ8K5C>4б 3QԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4׶K21T)'AHQ8K5C>4׶K21ԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NCT?TCܢE0&$AHQ8K5CT?T/ԃP;ܢE4JAˑ+><ԃP;HQ8ȘIK5ܢE4NC>4HН?Н?>HT,*AHQ8K5C>4H?HTԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4XН?2J&$AHQ8K5C>4X2ԃP;ܢE4JAˑ+53ԃP;HQ8ȘIK5ܢE4NC>4TН?T&$AHQ8K5C>4Tܞ?ԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4Н?̛4?ETԃP;ܢE4JAˑ+Dֈ;0OFԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4Н?̛4?UԃP;ܢE4JAˑ+86Q FM1UܢE4NԃP;O4HН?U,T#!Q FM1UAOH,ԃP;ܢE4JAˑ+#!Q1NÚQ8ȘIKTԃP;4Q1N8KTA4ԃP;ܢE4JAˑ+ ԃP;1ܢE4NН?̛C1A1J>=)'-AHC;>C1Aܹ1>=ԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4׶K21T)'AHQ8K5C>4׶K21ԃP;ܢE4JAˑ+~4NU.̤3@>ϥJ=T.-0ܢE4N5H01ԃP;R:?=N.̤3@>PTT>JFF8G3b`4NU.LϥJ=T.-05H01AR:=N.LPT>JFF8GԃP;ܢE4JAˑ+><ԃP;HQ8ȘIK5ܢE4NC>4HН?Н?>HT,*AHQ8K5C>4H?HTԃP;ܢE4JAˑ+20ԃP;߽4Q8ȘIK5ܢE4N,4U/T&$A߽4Q8K5,4U/TԃP;ܢE4JAˑ+53ԃP;HQ8ȘIK5ܢE4NC>4TН?T&$AHQ8K5C>4Tܞ?ԃP;ܢE4JAˑ+,*Q1ʡH9BXTܢE4NН?̛<7TQ19XT?7ԃP;ܢE4JAˑ+Dֈ;0OFԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4Н?>RT&$AHQ8K5C>4?RԃP;ܢE4JAˑ+86Q FM1UܢE4NԃP;O4HН?U,T#!Q FM1UAOH,ԃP;ܢE4JAˑ+20ԃP;߽4Q8ȘIK5ܢE4N,4XQT#!A߽4Q8K5,4XQԃP;ܢE4JAˑ+ ԃP;1ܢE4NН?̛4C-HН?̛<&##!)'AHQ8K5C>4*? ԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4׶K21T)'AHQ8K5C>4׶K21ԃP;ܢE4JAˑ+SQԃP;HQʡHɤUBUHMܢE4NCT۹/8HMT>JT8:G3>JT:GԃP;ܢE4JAˑ+><ԃP;HQ8ȘIK5ܢE4NC>4HН?Н?>HT,*AHQ8K5C>4H?HTԃP;ܢE4JAˑ+ecԃP;߽4Q8ȘIK5ܢE4N,4ԃP;ܢE4N5NģCF4QO1MJEа.TН?>;9A߽4Q8K5,4AQO-Eа.T?ԃP;ܢE4JAˑ+53ԃP;HQ8ȘIK5ܢE4NC>4TН?T&$AHQ8K5C>4Tܞ?ԃP;ܢE4JAˑ+/-ԃP;HUܢE4NCRKD?TيR̛<&$AHUCRKD?TيR̛<ԃP;ܢE4JAˑ+Dֈ;0OFԃP;ܢE4JAˑ+,*7ԃP;E72TܢE4NН?>AT7AE7T?AԃP;ܢE4JAˑ+86Q FM1UܢE4NԃP;O4HН?U,T#!Q FM1UAOH,ԃP;ܢE4JAˑ+86߹-JН?̛<ԃP;HQ8ȘIK5ܢE4NC>4&$-?AHQ8K5C>4ԃP;ܢE4JAˑ+ ԃP;1ܢE4NН?̛4б XQT)'AHQ8K5C>4б XQԃP;ܢE4JAˑ+86ԃP;HQ8ȘIK5ܢE4NC>4׶K21T)'AHQ8K5C>4׶K21ԃP;ܢE4JAˑ+#!ԃP;߽4UL6.TܢE4NA߽4UL6TԃP;ܢE4JAˑ+><ԃP;HQ8ȘIK5ܢE4NC>4HН?Н?>HT,*AHQ8K5C>4H?HTԃP;ܢE4JAˑ+20ԃP;߽4U72TܢE4NԃP;߽4TН?T A߽4U7TA߽4Tܞ?ԃP;ܢE4JAˑ+53ԃP;HQ8ȘIK5ܢE4NC>4TН?T&$AHQ8K5C>4Tܞ?ԃP;ܢE4JAˑ+;9ԃP;HQ8ȘIK5ܢE4NC>4 0̛4 0QTIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT /-TIOTބ2BJ768T7P4J#!TIOTބ2BќJ6874TIOT TIOT &$TIOT0Q7J6J7&$TIOT0Q7J6J7TIOT TIOT JHRTIOT4/ >BԚԚ U@Ԛ< RTIOT> U@Ԛ<TIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT #!TIOTބ2B>TV>T#!TIOTބ2B>TV>TTIOT TIOT &$TIOT0Q7J6J7&$TIOT0Q7J6J7TIOT TIOT SQRP4D3TMɾSBTIOTL;U$ N,%!@Ԛ<;9R4D3TMBTIOTL;U N,@Ԛ<TIOT TIOT  RTIOT> U@Ԛ< RTIOT> U@Ԛ<TIOT TIOT 86R9TIOT> BK1١-JL;@@@/-R9TIOT> BK1١-8@@TIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT trT>IOTմ2O̤@ROWBǞV<>MɾS3D UJDP>W>5ֈD,DL9ADSDAkiT>IOTմ2@ROWBȞV>M3D UJDP>W>5ֈD,DL9ADSDATIOT TIOT &$TIOT0Q7J6J7&$TIOT0Q7J6J7TIOT TIOT 53TIOT*B6J768T7P4J2)'TIOT*B6ќJ68742TIOT TIOT  RTIOT> U@Ԛ< RTIOT> U@Ԛ<TIOT TIOT ,*TIOT> ,:%!@Ԛ< TIOT> ,:@Ԛ<TIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT A?T7IOT> 3D,R,SUUP4J@@@53T7IOT> 3D,R,SU4@@TIOT TIOT &$TIOT0Q7J6J7&$TIOT0Q7J6J7TIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT  RTIOT> U@Ԛ< RTIOT> U@Ԛ<TIOT TIOT 86RT>IOTK>SF> P4J@@@)'RT>IOTKS> 4@@TIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT /-TIOTB62LCP4J>T#!TIOTB62C4>TIOT TIOT &$TIOT0Q7J6J7&$TIOT0Q7J6J7TIOT TIOT MK9QDT7IOT>SFDU>F> ;/?BRÙKBT><9QDT7IOTSDU>F> ;/BEBTIOT TIOT  RTIOT> U@Ԛ< RTIOT> U@Ԛ<TIOT TIOT 20P4JTIOTSUXߢ?U,6XT&$4TIOTSUX?6XTTIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT 20TIOT47>4 3DFDSDA,*TIOT4> 3DFDSDATIOT TIOT &$TIOT0Q7J6J7&$TIOT0Q7J6J7TIOT TIOT 20TIOTB6J768T7P4BT)'TIOTB6ќJ6874BTTIOT TIOT  RTIOT> U@Ԛ< RTIOT> U@Ԛ<TIOT TIOT hfRT>IOT> UP4>4—P=AN,:L%!**P4>٬J=$@Ԛ<SQRT>IOT> U4>4=AN,:L**4>٬J=$@Ԛ<TIOT TIOT 53RP4JTIOT> Sߢ?U>9@Ԛ<,*R4TIOT> S?>9@Ԛ<TIOT TIOT DBRP4JTIOT>MKJIOTKK DPDA>MKJIOTKK DPDAA,G߇;G߇;%>MA,G߇;G߇;%>M\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<A,G߇;G߇;%>MA,G߇;G߇;%>M%A%AG  %AAA,G߇;G߇;%>MA,G߇;G߇;%>M%A%A%AAA,G߇;G߇;%>MA,G߇;G߇;%>M&'%IIA$ۏ"&'%IIAG&'%II :AGD3AT(%!AG}{&'%IIA&'%IIA&'%II :AD3ATVAA,G߇;G߇;%>MA,G߇;G߇;%>M\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<A,G߇;G߇;%>MA,G߇;G߇;%>M%A%A %AA A,G߇;G߇;%>MA,G߇;G߇;%>M%A%A%AAA,G߇;G߇;%>MA,G߇;G߇;%>M20%CV2%0J%2CWFTOWW)'%CV2%0%2WFTO9A,G߇;G߇;%>MA,G߇;G߇;%>M\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<A,G߇;G߇;%>MA,G߇;G߇;%>M20%BF%JW DG%AG@F:=#!%<%J D%A@:=A,G߇;G߇;%>MA,G߇;G߇;%>M%A%A%AAA,G߇;G߇;%>MA,G߇;G߇;%>MJHD9GM>AQٟ@DBU,G߇;G3MVٟ@6DPDA>AQٟ@DK,G߇;G3MV5DPDAA,G߇;G߇;%>MA,G߇;G߇;%>M\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<\ZAPIDK4,G,G,G߇;5>,VCʿ7NPI>>>V0>@Ԛ<A,G߇;G߇;%>MA,G߇;G߇;%>M/-AG%;̽>MŹ(Źʿ@@@)'AG%;>Ź(Źʿ@@A,G߇;G߇;%>MA,G߇;G߇;%>M%A%A%AAA,G߇;G߇;%>MA,G߇;G߇;%>M20%DJW.>=V%JW G%A)'%DJW.>=V%J GA 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD86$& C2̙EϪJֈDT9J9@AB/- C2̙EϪJֈDTJ9@AB 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD)' 2EC$&E̛<0>WT 2ECE0>W 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD)'$& C2GE9ֈD@Ԛ<#! C2GE9ֈD@Ԛ< 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD;9Sޡ8$&>&2̙E ֈD>ܤK$'&9Q')'S>&2̙E ֈD>ܤKƋQ' 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD~6AB6T 2EۈXD:ۈX>ў7&B$&,&ίB>T7>KUVJJKUQTI1R/0Qec6AB6T 2EۈXD:ۈX>ў7&B,&ίB>T7KVQI1R/Q 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD$&2@ 8,T2@ ,T 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD_]$֗>AS 19EŹ4(>&24 EB߻WֈD1H%,9: >I\Z$֗>AS 19EŹ4(>&24 EB߻WֈD1H%,: >I 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈDA?$& ۈX2@QTWNEܾW,;PT,T86 ۈX2@QTWNEܾW,;ٱP,T 2EֈD$& 2EֈD)'$ 2̙EֈD>ܤK"6"&#!$ 2̙EֈD>ܤK"6" 2EֈD$& 2EֈD53ޥ0CE$&0> 2EֈDJ<=@,*ޥ0CE0> 2EֈDJ=@;1>DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>D;DHؕ7;EE@;Dؕ7;EE@;1>DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>DXŷ5D/D/ Xŷ5DD;1>DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>D ;DHBU>UW6T;DΑB>U6;1>DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>D;DHDHDHT;DDDT;1>DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>D ;DH>  ;D>;1>DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>D BD/>  BD>;1>DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>D;DH=DH ;1>D,*;>DH66;DH9FA@Ԛ<#!;>D6;D9FA@Ԛ<;1>DH ;1>D;ӈ5UD>DHDH;5D>DDE1?0;E1?0;ַ;E1?,;@Ԛ<ַ;E1?,;@Ԛ<E1?0;E1?0;;9K6>HE1K/Q4DGKIAB86K6>HE1K/Q4GKIABE1?0;E1?0;GEDKOFHE1K/Q4DGKOJܤK>6DG@K20HE1K/Q4GܤK>6D@KE1?0;E1?0;#!DE1ߢ?08IDE1?1BT/>׆B/1/69IPTR;I@Ԛ<MKܤ5ַ;>E1?1BT/>׆B/1/69IPTR;I@Ԛ<E1?0;E1?0;GEDKOFHE1K/Q4DGKOJܤK>6DG@K20HE1K/Q4GܤK>6D@KE1?0;E1?0;A?A׆B?KUEI3R>7DE1?P;66@Ԛ<;9A׆B?KUEI3>7DE1?P;6@Ԛ<QE1?0;E1?0;1A?Iַ;  1AIE1?0;E1?0;ַ;E1?,;@Ԛ<ַ;E1?,;@Ԛ<E1?0;E1?0;53AUE1AIٟ@;N?985D@@@/-AUE1A@;N?985D@@G=ݰFBSF G=FF#!BN0ݰFBSF2Uа.TBNFF2*G=ݰFBSF G=FF)'AOݰFBFASF>LS2 AOFFAF>LSG=ݰFBSF G=FF#!BN0ݰFBSF2Uа.TBNFF2*G=ݰFBSF G=FF86ݰFBSFQBJ768T7QݰFBSFB&$FFQBќJ687QFFBG=ݰFBSF G=FF#!BN0ݰFBSF2Uа.TBNFF2*G=ݰFBSF G=FF AסET/ݰFBٟ@3@Ԛ<ATFٟ@3@Ԛ<G=ݰFBSF G=FF#!BN0ݰFBSF2Uа.TBNFF2*G=ݰFBSF G=FFSFUR7T FU7T11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJ11F֎T V>б 11F֎T Vб 11F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJ/-SAS11F֎T=>щQCE@@@,*SAS11F֎T=>щQCE@@11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJ)'11F֎T=?N;78K11F֎T7K11F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJ=?N;C;MC;M11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJJH$U-£-E7-Ҳ0AʡH9DS&11F֎T7J6!A?$U-£-E7-Ҳ0AʡH9DS&11F֎T7611F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJ;911F֎TBJHį-HUHڶ>2>AR@Ԛ<;911F֎TBJHį-HUHڶ>2>AR@Ԛ<11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJ ӪN11F֎TE@@@ӪN11F֎TE@@11F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJ8611F֎T03VCJ768T711F֎T2011F֎T03VCќJ68711F֎T11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJ11F֎T@?11F֎T@11F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJSQDR07>I8Ҳ02AXڃN>11F֎TAKAٟ@HDPDAPNDR07>8Ҳ02AXڃN>11F֎TAKAٟ@HDPDA11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJ5311F֎TW")$IK46)'11F֎TW")I411F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJJH7&:֎T11F֎TTTT=?N;T!537&:֎T11F֎TTTTTK11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJ&$CE>11F֎T@0=@Ԛ<&$CE>11F֎T@0=@Ԛ<11F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJPN11F֎T=?N;7=?N;GTTT - !.,11F֎T7GTTT+11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJki11F֎TKSħ;S C9>>4K.TRҲ0AGB@>=?N;)ʪ\Z11F֎TKSS Cޖ>>4K.TRҲ0AGB@>)ʪ11F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJ53&11F֎TRBOEVCE@@@,*&11F֎TRBOECE@@11F֎TPAJ11F֎TPAJ20ڶ>S:—PG2&**11F֎T@Ԛ<,*S:I2&**11F֎T@Ԛ<11F֎TPAJ11F֎TPAJ,*7DT11F֎T6U=?N;7T11F֎T611F֎TPAJ11F֎TPAJA?7D2T:֎T11F֎T=?N;TTK;2072T:֎T11F֎TTTK;11F֎TPAJ11F֎TPAJ_]$U-£-E7-Ҳ0AʡH9DS&11F֎T$U-CɤUTҲ0AB!YW$U-£-E7-Ҳ0AʡH9DS&11F֎T$U-CɤUTҲ0AB86X,19CK/ - NW=HDEģCKX1KNW/DCVTX,19CʡH97/ - NW=HDEģCKGģC:7BWT53X1ʡH97NW/DCGģC7BW86X,19CK/ - NW=HDEģCKX1KNW/DCJHX,19CʡH97/ - NW=HDE8KDG@K/-X1ʡH97΂NW/D8KD@K86X,19CK/ - NW=HDEģCKX1KNW/DCGEX,19CʡH97/ - NWCHDEģCK΂:6T)'X1ʡH97NW޻/DC΂:686X,19CK/ - NW=HDEģCKX1KNW/DC\Z-AX,19CʡH97/ - NW=HDEģCK -:K48?:T><-AX1ʡH97NW/DC -:48?:T86X,19CK/ - NW=HDEģCKX1KNW/DC;9X,19CK/ - NW=HDEGI#!X1KNW/DGI86X,19CK/ - NW=HDEģCKX1KNW/DCb`X,19CʡH97/ - NW=HDEK?IU>DE?T΂:C̛<A?X1ʡH97NW/DE?IUD?΂:C̛<86X,19CK/ - NW=HDEģCKX1KNW/DC86X,19CK/ - NW=HDEģCKX1KNW/DC86X,19CK/ - NW=HDEģCKX1KNW/DCGEX,19CʡH97/ - NW=HDE8K΂:4T/-X1ʡH97NW/D8K΂:4T86X,19CK/ - NW=HDEģCKX1KNW/DCVTX,19CʡH97/ - NW=HDEģCKGģC:7BWT53X1ʡH97NW/DCGģC7BW86X,19CK/ - NW=HDEģCKX1KNW/DC86X,19CK/ - NW=HDE>KX1KNW/D>86X,19CK/ - NW=HDEģCKX1KNW/DCGEX,19CʡH97/ - NWCHDEģCK΂:6T)'X1ʡH97NW޻/DC΂:686X,19CK/ - NW=HDEģCKX1KNW/DCSQX,19CʡH97/ - NW=HDEOKDOGDO6G20X1ʡH97NW/DODGD6G86X,19CK/ - NW=HDEģCKX1KNW/DC;9X,19CK/ - NW=HDEGI#!X1KNW/DGI86X,19CK/ - NW=HDEģCKX1KNW/DCJHX,19CʡH97/ - /@CHWDEģCKùBNL,*X1ʡH97N޻/WDCùBNL86X,19CK/ - NW=HDEģCKX1KNW/DC86X,19CK/ - NW=HDEģCKX1KNW/DC86X,19CK/ - NW=HDEģCKX1KNW/DCqoX,19CʡH97/ - NW=HDE>KL28AWT6O0U—PD7>6;PNX1ʡH97NW/D>LPAW6O0U—PD7>6;86X,19CK/ - NW=HDEģCKX1KNW/DCVTX,19CʡH97/ - NW=HDEģCKGģC:7BWT53X1ʡH97NW/DCGģC7BW86X,19CK/ - NW=HDEģCKX1KNW/DC>E6DSDA53-II6I6I66U>E6DSDANB-<66N-<66#!NB-<66ODSDAN-<66DSDANB-<66N-<66_]-I6DD9D66>=/,ֈ;N?KCL3;ނB/6/7TNؕ7؄/ESQ-I6D966>=/,ֈ;N?KCL3ނB/6/7TNڕ7ENB-<66N-<66><-I66OE60FǂSHAVTJD8DAP/--I66E6FǂSHAVTD8ANB-<66N-<6686-II6I6I66OU>E6DSDA53-II6I6I66U>E6DSDANB-<66N-<66#!NB-<6OC8A99N-<6OC8A9NB-<66N-<66_]-I6DD9D66>=/,ֈ;N?KCL3;ނB/6/7TNؕ7؄/ESQ-I6D966>=/,ֈ;N?KCL3ނB/6/7TNڕ7ENB-<66N-<66E6DSDA53-II6I6I66U>E6DSDANB-<66N-<66)'D-IHD6/E6-116)'D-IHD6/E6-116NB-<66N-<66_]-I6DD9D66>=/,ֈ;N?KCL3;ނB/6/7TNؕ7؄/ESQ-I6D966>=/,ֈ;N?KCL3ނB/6/7TNڕ7ENB-<66N-<66 кB-<ԋ/C66JƱCTкB-<ԋ/C66JϱCNB-<66N-<6686-II6I6I66OU>E6DSDA53-II6I6I66U>E6DSDANB-<66N-<66,*NB-=/,ֈ;N?KCL3;ނB/6/7TNؕ7؄/ESQ-I6D966>=/,ֈ;N?KCL3ނB/6/7TNڕ7ENB-<66N-<66865-Н?T  R>ܞ? İU7/ İU7/204UİU7/5.W@ßNWF/ÐWW/-4UİU7/5.W@ßNW/ÐWW İU7/ İU7//-UİU7/.W@ßN1T7̛<,*UİU7/.W@ßN17̛< İU7/ İU7/  -NUİU7/.@K  -NUİU7/.@K İU7/ İU7/534İU7/5:S9İU:4K"!,*4İU7/5:S9İU:4K" İU7/ İU7/86T14UİU7/5.:S9İUAWAT20T14UİU7/5.:S9İUAA İU7/ İU7/;94UİU7/5.W@ßNWF?9GHН?T204UİU7/5.W@ßNW?9G/ İU7/ İU7/204UİU7/5.W@ßNWF/ÐWW/-4UİU7/5.W@ßNW/ÐWW İU7/ İU7/GEUİU7/.W@ßNWF/ɴ9Н?Tɴ9ʡH9?/T;9UİU7/.W@ßNW/ɴ9ܞ?ɴ99/T İU7/ İU7/  -NUİU7/.@K  -NUİU7/.@K İU7/ İU7/#!4UİU7/5.W@ßN#!4UİU7/5.W@ßN İU7/ İU7/86T14UİU7/5.:S9İUAWAT20T14UİU7/5.:S9İUAA İU7/ İU7/864UİU7/5.W@ßNWF/̝5̛FˎWBDIKT)ʪ/-KFEڶ>FˎWBDIK)ʪį-KEˎWٟ@6֬4Jį-KEˎW5֬4J-K-Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J&$Sį-K>JNTCTT#!Sį-K>JϞNCTTį-KEˎWٟ@6֬4Jį-KEˎW5֬4J-K-Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J кB6Sį-KIKT:KкB6Sį-KIK:Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J-K-Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J#!;Kʗ,/Sտ7PC@;B ;Kʗ,/Sտ7PC;Bį-KEˎWٟ@6֬4Jį-KEˎW5֬4J-K-Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J203BBDK6S9A@S@060T203BBDK6S9A@S@060Tį-KEˎWٟ@6֬4Jį-KEˎW5֬4J-K-Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J/-;Kʗ,/—PL>CBFRKAKB,*;Kʗ,/—PL>CBFRKAKį-KEˎWٟ@6֬4Jį-KEˎW5֬4J-K-Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J)';Kʗ,/SKD͙7IRN͙7T&$;Kʗ,/SKDIRN͙7Tį-KEˎWٟ@6֬4Jį-KEˎW5֬4J-K-Kį-KEˎWٟ@6֬4Jį-KEˎW5֬4J#!SKб J768T7U>SKб ќJ687U>R/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;,BR/>47,BR/>47R/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;;9ѹ67,BƸ=DJ7.K/B9A=B@@@&$չ6,BƸ=DJ*/BA@@R/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;;9R/B,B.P԰'0VAUѹ6FG,*R/B,B.P0VA"Uݹ6GR/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;zxR/,BCMR/@BBR-P2KONJ768T7;2/ޟEŮß1QİL R/Ξ),BWβI3I@K/->ß1QİL R/Ξ),BWβI3I@R/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;\Z(<7N6B=G;3>7K  #!<K  R/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;)'N6@4,BHAR/D@Ԛ<&$N@4,BHAR/D@Ԛ<R/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;/-R/,B@Hٟ@ʜ2IAN6@@@)'R/,B@Hٟ@ʜ2IAN@@R/,B;R/,B;)'NЃB;W$,BΞ)9"@@@&$NЃB;W$,BΞ)9"@@R/,B;R/,B;,*/>,BJ>,BJ>,BAB,*/>,BJ>,BJ>,BABB78;U B8;UB;U>C@KB;U>C@KB78;U B8;U,*ʡH9=7B;U>CEJCEJC7CC78N@>;GB B;>8N@>;GB78;U B8;U,*B7;>8N@Ɓ-67Ɓ-6HT#!B;>8N@ȁ-7ȁ-HTB78;U B8;U7B;U>C8,T7B;U>C,TB78;U B8;UB;ULC8,TB;ULC,TB78;U B8;UB;U>C@KB;U>C@KB78;U B8;U)'7B;U>CBU8JCBU8JC7CC7;UN8C.VI<7; B>;UN8C.I7FU/J.ʭB/ FJ.ϭBMKDVD:JTʭB/>ڶ>9ԚGJE@Ԛ<A?DVD:JTϭB>9ԚGJE@Ԛ<FU/J.ʭB/ FJ.ϭBJ.ʭB/@? J.ϭB@FU/J.ʭB/ FJ.ϭB#!J.ʭB/L FUO@KJ.ϭBL FO@KFU/J.ʭB/ FJ.ϭB)'J.ʭB/L F;F?8,T J.ϭBL F;F,TFU/J.ʭB/ FJ.ϭBMKDVD:JTʭB/>ڶ>9ԚGJE@Ԛ<A?DVD:JTϭB>9ԚGJE@Ԛ<FU/J.ʭB/ FJ.ϭB,*J.ʭB/L FUO'GNOC&$J.ϭBL FO'GNOCFU/J.ʭB/ FJ.ϭB#!J.ʭB/L FUO@KJ.ϭBL FO@KFU/J.ʭB/ FJ.ϭB,*J.ʭB/L FUOLBڶ>9ԚGJE@Ԛ<A?DVD:JTϭB>9ԚGJE@Ԛ<FU/J.ʭB/ FJ.ϭBJ.ʭB/>LJ.ϭB>LFU/J.ʭB/ FJ.ϭB#!J.ʭB/L FUO@KJ.ϭBL FO@KFU/J.ʭB/ FJ.ϭB53J.ʭB/8NJ.ʭB/G>98F>T,*J.ϭB8NJ.ϭBG>98F>FU/J.ʭB/ FJ.ϭBMKDVD:JTʭB/>ڶ>9ԚGJE@Ԛ<A?DVD:JTϭB>9ԚGJE@Ԛ<FU/J.ʭB/ FJ.ϭB/-J.ʭB/8IC¨03?;9<>TJ.ϭB8IϨ0-<>FU/J.ʭB/ FJ.ϭB#!J.ʭB/L FUO@KJ.ϭBL FO@KFU/J.ʭB/ FJ.ϭBJ.ʭB/;J6J.ϭB;J6G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=/-?;8WB=&;WɾS2SCI9)'?;8WB=&;W2SC9G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=/-H޽B;8AE0WB=щQUP.T,*H޽B;8AE0WB=щQUP.G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=JHWBRPI9=50׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=20PG,DNG806WB=C=S7,*PG,DNG85WB=CS7G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=GEW=D,?R;G0G8DN@WG7ӽDIECӽDI>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=20޽BR0WB>=M>I?;8щQ@Ԛ<20޽BR0WB>=M>I?;8щQ@Ԛ<G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=)';80WB=D>щQDSDA&$;80WB=ӗ>щQDSDAG8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=JHԓ459D0ԓ4B=SRJ>E;86ST!!";9ԓ45D0ԓ4B=SRJ>E;86STXG8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=#!;8>E6QWB=@N ;>E6QWB=@NG8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=R8G8>=>PR8G8>=>PG8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=VTԓ459D0ԓ4B=O׽RG6ST!!"DBԓ45D0ԓ4B=O׽RG6STXG8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=DBG׫;@2>H8GK0G8WB=F?HG,H,DBG׫;@2>H8GK0G8WB=F?HG,H,G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=\Z7WCȻ22HG/CNK08W=ߌ,3=GGև9>TYW7WCȻ22HG/CNK08W=ߌ,3=GGև9>G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=H$,GG88W-BGHHH$,GG88W-BGHHQH$,GG88W-BGHHDETLBL=,KH$,GG88W-BGHH$,GG88W-BGHQH$,GG88W-BGHDETLBL,KG8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=86G,DNG806WB=C=Pֈ;̛׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=DBTCRJG<8QG8O60G6U<8Gڶ>S=86CJG<8QG8O60G6<8GS=G8ԓ4BWC=G8ԓ4BWC=SQDŽPB;8>׽RG>G8;?Sԓ459D0ԓ4B=R/AEATMKDŽPB;8>׽RG>G8;?Sԓ45D0ԓ4B=R/AEAG8ԓ4BWC=G8ԓ4BWC=DBS9I/CD<8JGԓ4GWB-RN= -KF7DBS9I/CD<8JGԓ4GWB-RN= -KF7 ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ520 Ͳ4ʉ5/%DHGAAOC4ˉ5%DHAAOC ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ5  ʉ5ޚTDG@K5D@K ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ5&$ ۚKʉ5RG̛<"&ۚK݉5G̛<" ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ5ʉ5 8,T ʉ5,T ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ5207 ʉ5ޚT4L/ȈXʉ5B-AB#!H6=>ʉ5B-AB ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ5;9 ۚK4ʉ5G8OE>έ;LSDʡH9;,*ۚK4ʉ5GOE>٭;SDʡH9; ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ5#!@ >ʉ5DSDA@>ʉ5DSDA ʉ5  ʉ5ʉ5 @K ʉ5@K ʉ5  ʉ5&$$6 6ʉ5@Ԛ<$66ʉ5@Ԛ<,*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A6 @K-; @K-;,*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A653.HB@M64A6OI0щQUP.T/-.HB@M64A6I0щQUP.,*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A6><.HB@M64A6OHAVTJD8DAP20.HB@M64A6HAVTD8A,*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A6)'.49B3I6OFUPUT#!.49B3I6FUPU,*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A6A?ڤ55D>.1B@D4A= @6OG;P20ܤ5D>.1B@4A= @6G;P,*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A6GE.JS=HB@DH4ADAP;0T?6T)!,*.SHB@H4AA;T6T),*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A620.BKM4AHAVTJD8DAP)'.BKM4AHAVTD8A,*,BA@D64AE54A6O&$,BA@64AE54A6/-D4A,HB54A6OUP.T)'D4A,HB54A6UP.,*,BA@D64AE54A6O&$,BA@64AE54A6/-.HB@D4A=6OGUP9T#!.HB@4ASGUP9 @GMT  @GM>  BIɤU1.@GMTC3G9/-VN>BIɤU1.@GMC3G9 @GMT  @GMDB@G.MTA/B@G.MTQ8ޚTNGKTOT,*@G6A/@G6Q8+KTO @GMT  @GM;9@G.MT,;MT73;E=57TIַ;)'@G6,;M7;E57TI @GMT  @GM)'@G@MT/-56P9?ַ;#!@G@M/-56P9? @GMT  @GM@G.MTG@=@GMT.@MTC3G3G9ܞNTTOC3G98Iַ;@G.MTG@=@GMT.@MTki@G6G=@GM.@MC3G3G9NTC3G9I@G6G=@GM.@M @GMT  @GM86@G.4@ϚL4MT;M4߹-WHԓ6Iַ;&$@G.@4M6߹-WHԓ6I @GMT  @GM&$@G.MT߹-5TOOIַ;@G6߹-5TOI @GMT  @GM.@MT.MT@MTܞND>.MTE=.MT=.MTIϪJ1.M@G.@MTDC3G98Iַ;\Z.@M6@MN>6E=6=6IϪJ1.M@G.@MDC3G9I @GMT  @GM&$.M@GMTJ-U@ؙDT#!.M@GMJ-U@ؙDT @GMT  @GM)''=.@GMTIB.<.M@GM6.@M@MEM=6C3G9I @GMT  @GM/-@G=@G.MT=.MTIG@ @G=@G6=6IG @GMT  @GM>  BIɤU1.@GMTC3G9/-VN>BIɤU1.@GMC3G9 @GMT  @GMDBMU@G@MT@MTMTMU,HP5ѳBʈFP?53M@G@M@MMM,HP5ѳBʈFP? @GMT  @GM;9@G.MT,;MT73;E=57TIַ;)'@G6,;M7;E57TI @GMT  @GM53@G.MTַ;@G.MTD,BPַ;Υ6&$@G6ַ;@G6D,Pַ;Υ6cI6;0ڳQ  +0ڳQ I6;ٟ@9ٟ@0A@Ԛ<+90A@Ԛ<KI6;0ڳQ  +0ڳQI6;-N  +-NI6;0ڳQ  +0ڳQ20I6;0ʭBќ:-WI6;I6>S2&$+0ʭBќ:-WI6I6>SoI6;0ڳQ  +0ڳQ&$UII6;-N1D@@@UI+-N1ځD@I6;0ڳQ  +0ڳQSQI6;096WI-:PUPޜFTI—PRMTI6ޜF6JH+096WI-:PUPޜFTIRMTI6ޜF6I6;0ڳQ  +0ڳQA?Q2?EC=E@.=9QCB9QCͦ(!)'Q2?EC=@ƋQCBƋQCiI6;0ڳQ  +0ڳQ .IWI6;8TAB.IW+8TABI6;0ڳQ  +0ڳQ86I6;6U=9=>C<ʡH6IHC<ʡH6IHTI6>6;DPDA86>I6>6;DPDA<I6>6;DPDA86>I6>6;DPDA,*ä=FBNLI6>6;DPDA86>I6>6;DPDA3PϪJBE҄JJ9R>9ֈDCSW9ٟ@192D>9ED>9@S6;,DP>=/UP.T.M@D>3PϪJBEԄJ9R>DCSW@192D>BD>9@S6;,DP>=/UP.I6>6;DPDA86>I6>6;DPDAI6>6;DPDA86>I6>6;DPDA6E>6E>I6>6;DPDA86>I6>6;DPDAI6>6;DPDA86>I6>6;DPDASE>C=,B/7Ȼ;T=.LGENA=C,B/7Ȼ;T=LȥW> 1ڶ>SGȥW> 1SG,*A> Q5=Qڶ>SȥW@@@&$A> Q5=QSȥW@@ȥW> 1ڶ>SGȥW> 1SG53>W5CȥWG8E<=?N;†M8T)'>W5CȥWG8E<†M8ȥW> 1ڶ>SGȥW> 1SGDB> @GWC;9Q66BW4 ȥW@@@><> @GWC;9Q6BW4 ȥW@@ȥW> 1ڶ>SGȥW> 1SGhf;>>WȥW,:K>;=?N;7=?N;GTTT - !FD;>>WȥW,:K>;7GTTT+ȥW> 1ڶ>SGȥW> 1SG,*A> Q5=Qڶ>SȥW@@@&$A> Q5=QSȥW@@ȥW> 1ڶ>SGȥW> 1SGDB>W5CWȥWG8E<=?N;TTTG8̛<86>W5CWȥWG8E<TTTG8ȥW> 1ڶ>SGȥW> 1SGDB> @GWC;9Q66BW4 ȥW@@@><> @GWC;9Q6BW4 ȥW@@ȥW> 1ڶ>SGȥW> 1SG20ȥW>W2G/I֣.ŞG9/;7;20ȥW>W2G/I֣.ŞG9/;7;ȥW> 1ڶ>SGȥW> 1SG,*A> Q5=Qڶ>SȥW@@@&$A> Q5=QSȥW@@ȥW> 1ڶ>SGȥW> 1SG20> >QR@8S֗T7ȥW@@@/-> >QR@8S֗T7ȥW@@ȥW> 1ڶ>SGȥW> 1SGDB> @GWC;9Q66BW4 ȥW@@@><> @GWC;9Q6BW4 ȥW@@ȥW> 1ڶ>SGȥW> 1SG20> ȥWS8D0;T=?N;)'> ȥWS8D0;TȥW> 1ڶ>SGȥW> 1SG,*A> Q5=Qڶ>SȥW@@@&$A> Q5=QSȥW@@ȥW> 1ڶ>SGȥW> 1SG#!ȥWȥWKȥW,:ĝ ȥWȥWKȥW,:؝ȥW> 1ڶ>SGȥW> 1SGDB> @GWC;9Q66BW4 ȥW@@@><> @GWC;9Q6BW4 ȥW@@ȥW> 1ڶ>SGȥW> 1SG_]N9UL=>˾3ȥW> G/NIǡ6TTT=?N;T!DBNU=>˾3ȥW> G/NIǡ6TTTTK  ?J=  ?J=)'VHDJ>4=5D3Ȼ;>T VD>4=5D3Ȼ;>  ?J=  ?J=DJ>?=DJ>?=}  ?J=  ?J=?J=Uа.T ?J=*  ?J=  ?J= J?,= J?,=  ?J=  ?J=;9?EJ=׍Q7E70 NʡH -H064T53?EJ=׍Q,0 NʡH -H064T  ?J=  ?J=DJ>?=GĊA>TDJ>?=GĊA>  ?J=  ?J=/-D9DDG?>J>,NDSDA#!9G?>J>=DSDA  ?J=  ?J=?EJ׍QDG@K?EJ׍QD@K  ?J=  ?J= D/F;  DF;  ?J=  ?J=,*DJ>?=E?NKLF9@K)'DJ>?=E?NKLF9@  ?J=  ?J=?=EJ=׍QPB6?=EJ=׍QPB  ?J=  ?J=;?1KEJ>=׍QCPDCK9K>ٟ@9@9W>4R/ҾWB1.O>NB9KJK>N9͝,ڪ3.WȻBDEA¶7ģC:Q;?1KEJ>=׍QCPDC9>ٟ@9@9W>4R/ҾWB1.O>NB8J>N9Ν,.WȻBDENģC:Q  ?J=  ?J=20?>?J>,N166==@Ԛ<)'?>?J>=16=@Ԛ<  ?J=  ?J=DJ>?=@KDJ>?=@K  ?J=  ?J=>?=4FSCܞN/OJ-0E/-DJ>?=4FSNOJ7E  ?J=  ?J=?J=4Н?A3AT?J=4AA  ?J=  ?J=)'VHDJ>4=5D3Ȼ;>T VD>4=5D3Ȼ;>  ?J=  ?J= ?EJ=׍QFK AB ?EJ=׍QFK AB  ?J=  ?J=?J=Uа.T ?J=*  ?J=  ?J=)'J>?=ʡH۩RV-T.6.T&$J>?=ʡH۩RV-T.6.  ?J=  ?J=;9?EJ=׍Q7E70 NʡH -H064T53?EJ=׍Q,0 NʡH -H064T  ?J=  ?J=20UWX=6?KJJ=3WН?>AT,*UWX=6?KJJ=3W?A  ?J=  ?J=/-D9DDG?>J>,NDSDA#!9G?>J>=DSDA  ?J=  ?J=864?߸3ѝ6B5-0IJ?߸3==I̛=F>>@>T#!DJ7>=F>>@>IFET> IFT>_]IFE71UC56K7WE>VWA75SJS24.@7Uև9>TVTIF71UC6K7WE>VWA75SJS24.@7Uև9>IFET> IFT>&$1FEWK.WKC:ET1FWKWKC:EIFET> IFT>,*IFED6AS1F՟?>>@Ԛ<#!IFD6Aū1?>>@Ԛ<IFET> IFT>;9IFEAW̋?6FF1UK>626::@20IFA̋?6.1UK>626::@IFET> IFT>_]IFE71UC56K7WE>VWA75SJS24.@7Uև9>TVTIF71UC6K7WE>VWA75SJS24.@7Uև9>IFET> IFT>&$IKMFE->CϨHQRTIKMF-CΨQRTIFET> IFT>,*IFED6AS1F՟?>>@Ԛ<#!IFD6Aū1?>>@Ԛ<IFET> IFT>20IFED6AS1F՟?>>DSDA)'IFD6Aū1?>>DSDAIFET> IFT>_]IFE71UC56K7WE>VWA75SJS24.@7Uև9>TVTIF71UC6K7WE>VWA75SJS24.@7Uև9>IFET> IFT>53FE>>M*ɬI*I*55TH>M*ɬI*I*5THTIFET> IFT>,*IFED6AS1F՟?>>@Ԛ<#!IFD6Aū1?>>@Ԛ<IFET> IFT>53HFE>>@IU>J-F>TLP20HF>>@IU>J-F>TLPIFET> IFT>_]IFE71UC56K7WE>VWA75SJS24.@7Uև9>TVTIF71UC6K7WE>VWA75SJS24.@7Uև9>IFET> IFT>20I—P=E>>FEDH>QIB,ܔN)'I=E>>FDH>QIBG DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO;:O;4P@Ԛ<:;4P@Ԛ< DEO; DEO;  N:O;BF8@K N:;BF8@K DEO; DEO; -:O;WL/?T -:;W. DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO;O:4;DG@KO:;D@K DEO; DEO;  N:O;BF8@K N:;BF8@K DEO; DEO;:OD>;@K:D>;@K DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO; DO;2  D;2 DEO; DEO;  N:O;BF8@K N:;BF8@K DEO; DEO; :O;2,LDG@K:;2,D@K DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO; :O;2  :;2 DEO; DEO;  N:O;BF8@K N:;BF8@K DEO; DEO;:O;28,T:;2,T DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO;&$ :OƔ>;21ET!! :Ɣ>;21ET DEO; DEO;  N:O;BF8@K N:;BF8@K DEO; DEO;:O;28,T:;2,T DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO; P:O8;:I̺@:TP:8;:@ DEO; DEO;  N:O;BF8@K N:;BF8@K DEO; DEO;#!:O;J:O4974T:;J:474T DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO;DO;2:TD;2:T DEO; DEO;  N:O;BF8@K N:;BF8@K DEO; DEO;:O;2DG@K:;2D@K DEO; DEO;:O;J@Ԛ<:;J@Ԛ< DEO; DEO;:O;27Cͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ?&$/IMTS;ͺ?ٟ@6A7BITS;ͺ?5+ (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ?&$(TS;>6/IM@@@(TS;>6I@@ (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ?86/IMTR;>>VBͺ?C7=V-AB)'ITR;>>Bͺ?C7VAB (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ?(TS64ͺ?(TS64ͺ? (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ? /IMT;ͺ?DSDAIT;ͺ?DSDA (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ?A?/IMPD;Fͺ?M7K/1I-I-@Ԛ<53IPD;Fͺ?MK/I-I-@Ԛ< (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ? /IMF̽>S6>NBIF̽>S6>NB (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ?53;ͺ?9T./I/J@/TA/IMT,*;ͺ?9T.I/J@/TAIT (T;ͺ? (T;ͺ?(TR;>ͺ?@Ԛ<(TR;>ͺ?@Ԛ< (T;ͺ? (T;ͺ?86/IM̺ٟ@6ʔ7;Vͺ?2(/IMI@)'I̺5ʔ7;Vͺ?2(II@G>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S8&$UJG>SIBEU3H8UG>SIB8H8G>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S8zx7HܞNDG>SEU7HܞNDG>SEUQ7HܞNDG>SEUDET߹-8Lԓ6Iַ;C=.b`7HNG>S87HNG>S8Q7HNG>S8DET߹-8Lԓ6IC=G>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S8ki/K@G>SEUSTSUQ=WBSEUSIBEU߹-=EMSIַ;BU1TPN/KG>S8SŘSEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S8DB-ܞNDG>! )SEUQ-Q;ۓRTCG0/--NG>S8Q-Q;ۓRCG0G>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S886RNUG>SEUIBSEU) :/B#!NG>S8IBS8:/G>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S8/-ܞNDG>SIBEU;SIBEU&$NG>SIB8;SIB8G>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S8SQ-ܞNDG>SEUQD2VFȣ84XIUҔB<֗TI7Iַ;ŒATJH-NG>S8QD2VFȣ84XIUҔB<֗TI7IŒATG>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S8 P PG>SEU G>S8/-L44ȣ8G>EUSIBEU̍ L4ȣ8G>8SIB8G>SEU G>S820DGIBEUSEUV;EUBEU#!DGIB8S8V8B8 DBCDIٟ@964>DBR@54>,*BDCD94>,6ODPDA&$BDR94>,6DPDA DBCDIٟ@964>DBR@54>#!BCDO94>6O@Ԛ<BRO94>6@Ԛ< DBCDIٟ@964>DBR@54>DBDBD>CD.NA>%>R6Iٟ@97DSDA53B>R.NA>%>R6@4DSDA DBCDIٟ@964>DBR@54>20DCD97UDE4Oٟ@6AA7B DR5UDMOٟ@6+ DBCDIٟ@964>DBR@54>,*BDCD94>,6ODPDA&$BDR94>,6DPDA DBCDIٟ@964>DBR@54>PNDBCDIٟ@964E>йSDK9ٟ@9SM>BU-щQ@Ԛ<>޹S @9SM>BU-щQ@Ԛ< DBCDIٟ@964>DBR@54>DBDBD>CD.NA>%>R6Iٟ@97DSDA53B>R.NA>%>R6@4DSDA DBCDIٟ@964>DBR@54>PNOD6>D=7ADBDCD=9>DIٟ@OD2O@@@>D=7ABR9>D@OD2O@@ DBCDIٟ@964>DBR@54>,*BDCD94>,6ODPDA&$BDR94>,6DPDA DBCDIٟ@964>DBR@54>b`DBDCDCٟ@9ɤKE7>RɤK/ϪJ>H=Q996ɤKA>A910TDPNBR@9ɤKE7>RɤK/ϪJ>H=Q95ɤKA>A10TD DBCDIٟ@964>DBR@54>DBDBD>CD.NA>%>R6Iٟ@97DSDA53B>R.NA>%>R6@4DSDA DBCDIٟ@964>DBR@54>_]DBCDN59OH348BD4R4O@4WOŮPO4/TDOTDBDBRN5O38BD4MO@4WX޵+TOT DBCDIٟ@964>DBR@54>,*BDCD94>,6ODPDA&$BDR94>,6DPDA DBCDIٟ@964>DBR@54>#!BCD94>A6O@Ԛ<BR94>A6@Ԛ< DBCDIٟ@964>DBR@54>DBDBD>CD.NA>%>R6Iٟ@97DSDA53B>R.NA>%>R6@4DSDA DBCDIٟ@964>DBR@54> UCD94>A6?,UR94>A6?, BU06˩5FE91PBU06FE1PA?BTS6˩5؇9?˩5OMR9I1FUFFPJ86BS6؇9?˩5OMR9I1FUFPJ BU06˩5FE91PBU06FE1P/-B؇96˩5ֲR1FQ?ٟ@SPG3&$B؇96ֲR1FQ?ٟ@SG BU06˩5FE91PBU06FE1P20UC>B06˩5NR31SFщQ@Ԛ</-UC>B06NR31SFщQ@Ԛ< BU06˩5FE91PBU06FE1P#!BOFR6˩5֛7>3PJBOFR673PJ BU06˩5FE91PBU06FE1PA?BTS6˩5؇9?˩5OMR9I1FUFFPJ86BS6؇9?˩5OMR9I1FUFPJ BU06˩5FE91PBU06FE1PkiBTS6˩50QN?9H9RIJIН?TXLI/I/I/B=6I6B=-0YWBS60QN?9H9RIJIܞ?ɜXI/II/B=6I6B=0 BU06˩5FE91PBU06FE1P20UC>B06˩5NR31SFщQ@Ԛ</-UC>B06NR31SFщQ@Ԛ< BU06˩5FE91PBU06FE1PDB05OȨKFD9IVBTELȨKF9IV:TН?>/-05OӨKDIVBELӨKIV:? BU06˩5FE91PBU06FE1PA?BTS6˩5؇9?˩5OMR9I1FUFFPJ86BS6؇9?˩5OMR9I1FUFPJ BU06˩5FE91PBU06FE1P20BTS6˩50BT6˩51T7H;T#!BS60B617H;T BU06˩5FE91PBU06FE1P20UC>B06˩5NR31SFщQ@Ԛ</-UC>B06NR31SFщQ@Ԛ< BU06˩5FE91PBU06FE1P BT66˩50QGН?>B660QG? BU06˩5FE91PBU06FE1PA?BTS6˩5؇9?˩5OMR9I1FUFFPJ86BS6؇9?˩5OMR9I1FUFPJ BU06˩5FE91PBU06FE1PBR6˩51?FBTBR61?BT BU06˩5FE91PBU06FE1P20UC>B06˩5NR31SFщQ@Ԛ</-UC>B06NR31SFщQ@Ԛ< BU06˩5FE91PBU06FE1P BT؇96˩5M5RFFB؇96M5RFFCE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6T.TGT6>?>P.TG6>?>PCE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6T@N>PC @N>PCE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6T;9G߹-.TGTޚT>9BKR9KϋIL‡KAB20G߹-.TGޚT>BR9KϋIL‡KABCE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6T)'.TGT6>7KM?U>T .TG6>7KM?,CE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6T20.T9Kʉ55>A>BK=U;Н?T)'.T9Kʉ55>A>BU;ܞ?CE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6T/-.T9Kʉ55>A>BK=3RT&$.T9Kʉ55>A>B3ҔRCE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6TDBO<>TRIO.TRIOVTIOB<ȬTIQ>86O<>TRO.TROVTIOB<ЬTQCE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6TMK.TGT9.DS>9>AK@—PB@ A6O:@@@><.TG9.DS>IAK@B@ A6:@@CE.TGTK6TCE.TGK6T><.,9T>GTP6ڜ>KDTWAПCDSDA;9.,9T>GP6ڜ>KDTWAПCDSDACE.TGTK6TCE.TGK6T.TGT6>@Ԛ<.TG6>@Ԛ<̾-,AJT0ޡ8;-AJT0AJT?@KAJT?@K̾-,AJT0ޡ8;-AJT0;9̾-,4FE4AJT54T?1WQ̛<7T/-;-4E4AJT54T?WQ̛<7̾-,AJT0ޡ8;-AJT0\Z̾--,̾-,6.ΩWH8443AJT4Q۹/85?1WK40ޡ8>1MK̾--,;-6ΩWH8443AJT4Q۹/85?WåK0>1̾-,AJT0ޡ8;-AJT0PN̾--H,̾-,6.ΩW4L5/B4W5H/OTANя7>1GE̾--H,;-6ΩW4L5/B4W5H/OTAN>1̾-,AJT0ޡ8;-AJT0,*7H984AJT54>0ޡ87̾-,AJT0ޡ8;-AJT0 ̾-/XT ̾-/XT̾-,AJT0ޡ8;-AJT0  ̾-CT  ̾-CT̾-,AJT0ޡ8;-AJT0,*H84AJT540ޡ8>1@K)'H84AJT540>1@K̾-,AJT0ޡ8;-AJT0AJT?@KAJT?@K̾-,AJT0ޡ8;-AJT0\ZH,̾-,XΩW84ALT540ޡ8>11DD>7U ̾-X̾-X-TMKH,;-X84ALT540>11D>7U ̾-X̾-X-̾-,AJT0ޡ8;-AJT0\Z̾--,̾-,6.ΩWH8443AJT4Q۹/85?1WK40ޡ8>1MK̾--,;-6ΩWH8443AJT4Q۹/85?WåK0>1̾-,AJT0ޡ8;-AJT0JH8AJTOC6̾-,84L5/TS:-1QBU/;868AJTOC6;-84L5/S:-ڠ#/̾-,AJT0ޡ8;-AJT0,*7H984AJT54>0ޡ87̾-,AJT0ޡ8;-AJT0/-̾-//?84AJT5T;U/T&$̾-//84AJT5T;*̾-,AJT0ޡ8;-AJT0  ̾-CT  ̾-CT̾-,AJT0ޡ8;-AJT0;9̾-,EAJTTDɍPMA:7.U/T/-;-EAJTTDӍPA:7.*̾-,AJT0ޡ8;-AJT0AJT?@KAJT?@K̾-,AJT0ޡ8;-AJT0ILIL̾-,AJT0ޡ8;-AJT0\Z̾--,̾-,6.ΩWH8443AJT4Q۹/85?1WK40ޡ8>1MK̾--,;-6ΩWH8443AJT4Q۹/85?WåK0>1̾-,AJT0ޡ8;-AJT0&$̾-4AT95/?V/?T ̾-4AT95/@?T)'FBUQDND6S?F: FBUQND6S?,*7F:BP1ND?F:@Ԛ<#!7:BP1ND?@Ԛ<)'FBUQDND6S?F: FBUQND6S?R6!8,TR6,T)'FBUQDND6S?F: FBUQND6S?207F:BP1ND?F:6S@Ԛ<)'7:BP1ND?6S@Ԛ<)'FBUQDND6S?F: FBUQND6S?531K>QP?F:Bб 4D=3-AB,*1K>QP?Bб 4D=-AB)'FBUQDND6S?F: FBUQND6S?,*7F:BP1ND?F:@Ԛ<#!7:BP1ND?@Ԛ<)'FBUQDND6S?F: FBUQND6S?;94F:̔6BUPV715CS?F:@Ԛ<204:̔6BUPV715CS?@Ԛ<)'FBUQDND6S?F: FBUQND6S?207F:BP1ND?F:6S@Ԛ<)'7:BP1ND?6S@Ԛ<)'FBUQDND6S?F: FBUQND6S?&$̔6ַ;IBUVԋ/CS?F:1IBUVԋ/CS?)'FBUQDND6S?F: FBUQND6S?,*7F:BP1ND?F:@Ԛ<#!7:BP1ND?@Ԛ<)'FBUQDND6S?F: FBUQND6S?GEF:̔6BU>ȣ89071KK6S?F:DSDA><:̔6BU>ȣ89071KK6S?DSDA)'FBUQDND6S?F: FBUQND6S?207F:BP1ND?F:6S@Ԛ<)'7:BP1ND?6S@Ԛ<)'FBUQDND6S?F: FBUQND6S? DA7O=—PRߑ4PTDA7=Rߑ4PT)'FBUQDND6S?F: FBUQND6S?,*7F:BP1ND?F:@Ԛ<#!7:BP1ND?@Ԛ<)'FBUQDND6S?F: FBUQND6S? ?F:6S>JK2@Ԛ<JHU2QNDHF/@SKDND SC>K2@Ԛ<UNDHF/UDHF/#!0-0:Nٟ@HFVFT0-:ٟ@HFFTUNDHF/UDHF/GEM:5UND8F/?PS6 1B>UDF?PS6UNDHF/UDHF/PNU2QN5DHF/Bٟ@SKDND SC>K2@Ԛ<JHU2QNDHF/@SKDND SC>K2@Ԛ<UNDHF/UDHF//-ӟ;N@R>8FS/"ҥ3!@;6&$ӟ;NR>8FS"ҥ3!@6UNDHF/UDHF/GEM:5K2@Ԛ<JHU2QNDHF/@SKDND SC>K2@Ԛ<UNDHF/UDHF/&$ FS5/ FS/UNDHF/UDHF/GEM:5K2@Ԛ<JHU2QNDHF/@SKDND SC>K2@Ԛ<UNDHF/UDHF/>/@K8FENܜ>@K  ,ݠ.A,A_]O70CT,ݠ.7>DGܤKP04TVAV07>?Q;GEO߫B>GK04TVAV07>?Q;  ,ݠ.A,A86,ݠ.>O/19O616ABTGA7B/-,>O/19O616ABTG+  ,ݠ.A,A,ݠ.ݠ.O ,ݠ.O  ,ݠ.A,AO,ݠ.B:DG@KO,BD@K  ,ݠ.A,A_]O70CT,ݠ.7>DGܤKP04TVAV07>?Q;GEO߫B>GK04TVAV07>?Q;  ,ݠ.A,A CN,ݠ.QADPDACN,QADPDA  ,ݠ.A,A,ݠ.ݠ.O ,ݠ.O  ,ݠ.A,A)'Iַ;DN0CT,ݠ.AщQ@Ԛ<Iַ;DNAщQ@Ԛ<  ,ݠ.A,A_]O70CT,ݠ.7>DGܤKP04TVAV07>?Q;GEO߫B>GK04TVAV07>?Q;  ,ݠ.A,A,*>T,ݠ.9ABAA4˛5DA4>,9ABA˛5DAn  ,ݠ.A,A,ݠ.ݠ.O ,ݠ.O  ,ݠ.A,A NT,ݠ.Nĵ*  ,ݠ.A,A_]O70CT,ݠ.7>DGܤKP04TVAV07>?Q;GEO߫B>GK04TVAV07>?Q;  ,ݠ.A,APNX>T9;;>X>QA7AO7RN;X7:U>E8DBX>;>X>QA7AO7N;X7:U>E8  ,ݠ.A,A,ݠ.ݠ.O ,ݠ.O  ,ݠ.A,AA?O߹-5,ݠ.߹-,ݠ.:߹-HİUMANC)O8,T53O߹-5,߹-,:߹-HMANC)O,TFIֈD:0DFI:0DIֈD:0@? I:0@FIֈD:0DFI:0D IֈDN0D:DG@KIN0DD@KFIֈD:0DFI:0D20D3ԚIֈD0>D:DSDA#!3IF>I>DDSDAFIֈD:0DFI:0DIֈDGC?DIGC?DFIֈD:0DFI:0DIֈD:0@? I:0@FIֈD:0DFI:0DD:IֈD14  DI1FIֈD:0DFI:0D20D3ԚIֈD0>D:DSDA#!3IF>I>DDSDAFIֈD:0DFI:0D20D3ԚIֈD0>D:DSDA#!3IF>I>DDSDAFIֈD:0DFI:0DIֈD:0@? I:0@FIֈD:0DFI:0D><يRIֈD:0DيR4IֈD:0BIيR4TC,>)'يRI:0DRI:0BIRCFIֈD:0DFI:0D20D3ԚIֈD0>D:DSDA#!3IF>I>DDSDAFIֈD:0DFI:0D;9IֈD>0EFR4:0>ğCѭDӮD:ٟ@H@Ԛ</-I>0EFM:0>ɟCܮDٟ@H@Ԛ<FIֈD:0DFI:0DIֈD:0@? I:0@FIֈD:0DFI:0D,*IֈD:0ߢ?DT7N79UAT#!I:0ߢ?D7N79UAFIֈD:0DFI:0D20D3ԚIֈD0>D:DSDA#!3IF>I>DDSDAFIֈD:0DFI:0D><0IֈD:0D04IֈD:0BI04TC,>/-0I:0D04I:0BI04CC@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>;9E87CC@N.H˱U=FCסE@@@2087CC@N.H˱U=FC@@C@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>GE= ->C@N7U0>ٟ@6MVIW>EDSDA><= ->C@NU0>ٟ@6V=>EDSDAC@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>A?C@N= -F>EMӛ?ߤ8>4FC@N@@@;9C@N= -F>EMӛ?ߤ8>4C@N@@C@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>DB= ->CסEC@NDE0**ԑ49A*/@@@;9= ->CC@ND0**ԑ49A*/@@C@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>b`B˩55=>O*7C@N7C@ĕ6TFR/HFH4ĕ6TPNB˩55=>O*C@ĕ6TFR/HF4ĕ6TC@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>_]6ɵO=>C@Nð.A ->>ٟ@9ٟ@DDܢESܤKA@CסESܤKA@Ԛ<SQ6ɵO=>C@Nð.A ->>9DܢESܤKA@CSܤKA@Ԛ<C@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>/-7C@N7 -=FSÐW7#! -=FSÐW7C@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>><= -F>C@NPEMӛ?M>>Fٟ@6@Ԛ<;9= -F>C@NPEMӛ?M>>F5@Ԛ<C@N= ->>C@N= ->>DB - -HE07!7LJ677/-H0LJ677C@N= ->>C@N= ->>)'C@N= -F(D>M@Ԛ<)'C@N= -F(D>M@Ԛ<HS/ON4/:HSON4/VT7H>S/OB4ʡH9ɰ5ȥ7/:į?I EL/ EHS/ON4/:HSON4/JHùBLW¶7/J7H>/B/WȥOB4784/:ĹBN/J84/HS/ON4/:HSON4/,*HS/ON4/:ҁX?L-T#!HSON4/ҁX?L-HS/ON4/:HSON4/&$HW>S/94/:AƭIHW>S94/AHS/ON4/:HSON4/)'7H/B/>ON47/:/HS/ON4/:HSON4/;9H>S/>4NO/://9¶7JùBL/#!>4NO//NJĹB/HS/ON4/:HSON4/;97H>S/>OB47BR:0ABBR:0ABHS/ON4/:HSON4/20/:7H>S/OB47/://HS/ON4/:HSON4/VT7H>S/OB4ʡH9ɰ5ȥ7/:į?I EL/ EHS/ON4/:HSON4/&$H>S/OB4">OB4>HS/ON4/:HSON4/,*HS/ON4/:ҁX?L-T#!HSON4/ҁX?L-HS/ON4/:HSON4/PN7HS/47/: ȥǶ,W¶7/>;GB20/ ȥǶ,N/>;GHS/ON4/:HSON4/)'7H/B/>ON47/:/HS/ON4/:HSON4/kiùBL9¶7/J7HS/9ȥ4NO7/:9¶7//:66ȈX4&20ĹBN/J/N//66ȈX4HS/ON4/:HSON4/;97H>S/>OB47BR:0ABBR:0ABHS/ON4/:HSON4/><ʡHU٨I7HS/47/::,AF> ʡHU٨I/:,>HS/ON4/:HSON4/VT7H>S/OB4ʡH9ɰ5ȥ7/:į?I EL/ EHS/ON4/:HSON4/HN1,;TLH1,;TLX:86˩5 X:86&$X:86˩5>X:86˩57 X:86>X:867X:86˩5 X:8686X:86˩54X:864VDT(!)'X:864X:86VDTX:86˩5 X:86&$X:86˩5>X:86˩57 X:86>X:867X:86˩5 X:86)'E8:X66˩5H38@@@#!E8:X66H38@@X:86˩5 X:86&$X:86˩5>X:86˩57 X:86>X:867X:86˩5 X:8620X:86ӻBOX:86˩5Q464T#!X:80X:86Q6TX:86˩5 X:86&$X:86˩5>X:86˩57 X:86>X:867X:86˩5 X:86GEX:86ӻBOX:86ӻBOX:86H6T$!20X:80X:86OX:866TX:86˩5 X:86&$X:86˩5>X:86˩57 X:86>X:867X:86˩5 X:86#!5X:8>6˩56R@Ԛ< 5X:8>66R@Ԛ<߹-U.:D>߹-U.:D>_]AU.?:D>>Rٟ@6U>G1@:?;0@W;0:GȻ;T=.LYWAU.?:D>>Rٟ@6>G1@:?;0@W;0:GȻ;T=L߹-U.:D>߹-U.:D> ߹-U.8 ߹-U.8߹-U.:D>߹-U.:D>#!߹-U.:DM=6S@Ԛ<#!߹-U.:DM=6S@Ԛ<߹-U.:D>߹-U.:D>/-U.>DP?14:щQȻ;T=.L)'U.>D?14:щQȻ;T=L߹-U.:D>߹-U.:D>ec1O .9.RU.,D6/EDCD>ڤ55J@C;ϵ>͵ATJ.PTVT1O.9.RU.,D6/EDCD>ܤ5J@Aϵ>͵ATJ.P߹-U.:D>߹-U.:D>)'кBU.6:DP߇;Ȼ;T=.L#!кBU.6:D߇;Ȼ;T=L߹-U.:D>߹-U.:D>_]AU.?:D>>Rٟ@6U>G1@:?;0@W;0:GȻ;T=.LYWAU.?:D>>Rٟ@6>G1@:?;0@W;0:GȻ;T=L߹-U.:D>߹-U.:D> U.8߹-U..ʺBPT U.8߹-U..ʺBPT߹-U.:D>߹-U.:D>#!߹-U.:DM=6S@Ԛ<#!߹-U.:DM=6S@Ԛ<߹-U.:D>߹-U.:D> ߹-U.:/0EFT6 ߹-U.:/0EFT6߹-U.:D>߹-U.:D>ec1O .9.RU.,D6/EDCD>ڤ55J@C;ϵ>͵ATJ.PTVT1O.9.RU.,D6/EDCD>ܤ5J@Aϵ>͵ATJ.P߹-U.:D>߹-U.:D>&$U.VP1B,, 5&$U.VP1B,, 5߹-U.:D>߹-U.:D>_]AU.?:D>>Rٟ@6U>G1@:?;0@W;0:GȻ;T=.LYWAU.?:D>>Rٟ@6>G1@:?;0@W;0:GȻ;T=L߹-U.:D>߹-U.:D>߹-V;T64߹-V;T6߹-U.:D>߹-U.:D>#!߹-U.:DM=6S@Ԛ<#!߹-U.:DM=6S@Ԛ<߹-U.:D>߹-U.:D>MK߹-U.6>P5,A߹-U.˭V6,3T߹-˭V6܈IU?90GE߹-U.6>P59߹-U.˭V6,3T߹-˭V6߈I?90߹-U.:D>߹-U.:D>ec1O .9.RU.,D6/EDCD>ڤ55J@C;ϵ>͵ATJ.PTVT1O.9.RU.,D6/EDCD>ܤ5J@Aϵ>͵ATJ.P߹-U.:D>߹-U.:D>MK N =.H= F0BU.$D: N =MPMPJH N =U= F0BU.$D: N =MPMP߹-U.:D>߹-U.:D>_]AU.?:D>>Rٟ@6U>G1@:?;0@W;0:GȻ;T=.LYWAU.?:D>>Rٟ@6>G1@:?;0@W;0:GȻ;T=L߹-U.:D>߹-U.:D>/-߹-U.DI429-DIV=RJ#!߹-U.D429-+RJ߹-U.:D>߹-U.:D>#!߹-U.:DM=6S@Ԛ<#!߹-U.:DM=6S@Ԛ<߹-U.:D>߹-U.:D>53߹-U./0Bб DD7=EUT۹/UD,*߹-U./0Bб DD7,U/D߹-U.:D>߹-U.:D>ec1O .9.RU.,D6/EDCD>ڤ55J@C;ϵ>͵ATJ.PTVT1O.9.RU.,D6/EDCD>ܤ5J@Aϵ>͵ATJ.P߹-U.:D>߹-U.:D>;9߹-U.0NUOބ2E   =ĪC'AB&$߹-U.0NUOǷ. =AB߹-U.:D>߹-U.:D>_]AU.?:D>>Rٟ@6U>G1@:?;0@W;0:GȻ;T=.LYWAU.?:D>>Rٟ@6>G1@:?;0@W;0:GȻ;T=L߹-U.:D>߹-U.:D>SQ:Aб =>U.=9V>D>9ԚU.V>D>1OISÄN989FT6߹-U.:D>߹-U.:D>#!߹-U.:DM=6S@Ԛ<#!߹-U.:DM=6S@Ԛ<߹-U.:D>߹-U.:D>)'U.>DP6:,Ȼ;T=.L#!U.>D6:,Ȼ;T=L߹-U.:D>߹-U.:D>ec1O .9.RU.,D6/EDCD>ڤ55J@C;ϵ>͵ATJ.PTVT1O.9.RU.,D6/EDCD>ܤ5J@Aϵ>͵ATJ.P߹-U.:D>߹-U.:D> ߹-1U.9TDSDA ߹-1U.9TDSDA߹-U.:D>߹-U.:D>_]AU.?:D>>Rٟ@6U>G1@:?;0@W;0:GȻ;T=.LYWAU.?:D>>Rٟ@6>G1@:?;0@W;0:GȻ;T=L߹-U.:D>߹-U.:D>GE3Ԛ9VC=6RMK -PT61TP6531U.D9VC6RK -PT61TڀP߹-U.:D>߹-U.:D>#!߹-U.:DM=6S@Ԛ<#!߹-U.:DM=6S@Ԛ<߹-U.:D>߹-U.:D>;9߹-U.:SM?B;BɵOMSB#**.T53߹-U.:SM?B;BֵOSB#**.  U8SUS/-1۠N -FɹKU=S5ۓR:ϡSFAT#!ޠN -FU=S5ۓR:ݡSA  U8SUS&$FMGMM>.3ˠS87TFMGMM>.87  U8SUSA?N,ˏR0#>ˌD3U=SNۥN&7><N,ˏR0#>ьDU=SNۥN&7  U8SUS)'VX?AM—PS>SM8GJ#!VX?AM—PS>SٶM1  U8SUS/-1۠N -FɹKU=S5ۓR:ϡSFAT#!ޠN -FU=S5ۓR:ݡSA  U8SUS/-U=Sб .65J?O4ʄ/&87&$U=Sб .6JO4ʄ/&8  U8SUSA?N,ˏR0#>ˌD3U=SNۥN&7><N,ˏR0#>ьDU=SNۥN&7  U8SUSVT70:7KU6A8>C¾98T—P7—PX>¾987;>C¾98—P7—PX>¾98;  U8SUS/-1۠N -FɹKU=S5ۓR:ϡSFAT#!ޠN -FU=S5ۓR:ݡSA  U8SUSA?U=Sб 7̛<87E7C77̛<(!53U=Sб 7̛<87E7C77̛<  U8SUSA?N,ˏR0#>ˌD3U=SNۥN&7><N,ˏR0#>ьDU=SNۥN&7  U8SUS86W7IU8>SESٟ@M߫U@U'@@@20W7IU>SESٟ@M߫U@U'@@  U8SUS/-1۠N -FɹKU=S5ۓR:ϡSFAT#!ޠN -FU=S5ۓR:ݡSA  U8SUS>< -4J6 NLF;8T786 -4J6 NLF87 N/,QEO. N/+O.53 N/,QI/E4OL/.DQET)' N/Q@E4OL/.T N/,QEO. N/+O.;9/,<7F NCN:QEI/4O5.L,*/<7F NN:+@4O5L N/,QEO. N/+O.53 N/,QI/E4OL/.DQET)' N/Q@E4OL/.T N/,QEO. N/+O.b`/QET N/QEVK/QEL9O/Լ=ET/QE/4/VQE1WJH/+T N/+V/+L9O/=T/+//V71W N/,QEO. N/+O.53 N/,QI/E4OL/.DQET)' N/Q@E4OL/.T N/,QEO. N/+O.&$ NCN/QEL)5E6>  )@?)@)ٟ@6E6>)5E6>)'UC9S;ٟ@>6E6>@Ԛ<)'UC9S;ٟ@>6E6>@Ԛ<)ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>&$VX,)EBٟ@&EϜVQTVX)E@&EϜVQ)ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>DBDԚ<(!ٟ@6ES>)%"6"&)'D5ES>)%"6")ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>#!;ښL)E6??OKT;ښL)E6?AT)ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>&$8V1)ٟ@>6E6>@Ԛ<#!81)ٟ@>6E6>@Ԛ<)ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>53K:S;ٟ@Sٟ@>6E66>GA7B/-K:S;ٟ@Sٟ@>6E66>G+)ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>\Z$B)KFE6>RDI6PGH>R5K9>66;NDSPԮK߀3VT$B)KFE6>RDI6PGH>RK9>66;NDSPٮK)ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>/-R—Pٟ@)%ٟ@6E6>DPDA&$R—Pٟ@)5E6>DPDA)ٟ@6E6>)5E6>  )@?)@)ٟ@6E6>)5E6>GE!Rٟ@6E6>ٟ@щQKB)B$&9U>щQ@Ԛ<;9!R5E6>ٟ@щQKB)BU>щQ@Ԛ< ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$hf$>I?9TWO$8$>I?9TWO$8Q$>I?9TWO$8,9PMK$>ɞ9WO$8$>ɞ9WO$8Q$>ɞ9WO$8,9P ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$)'֥>$8?9TW8QH.T#!֥>$8ɞ9W8QH.T ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$)'U"҈$4T޲F?9TU"4T޲Fɞ9 ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$trL:V1T>B;W8׫B!UH?I?9T$8CWO?98W8ɳQWQBHO_]L:V1T>;W8׫B!UH?Iɞ9$8CWOǞ9W8ɳQWвQH ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$;9µ$?9Tµ$?9T@M@>KT@/Bɞ9ɞ9@ܱM>K@/ ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$86<4T޲Fɞ9"A/4T޲Fɞ9"Q8+KTO ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$&$?9TQ0"lj:?9TɳQQɞ9Q0"lj:ɞ9ɳQQ ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$ec?9T88I?9T$8WO888O?98QD2CI0C98>ŒATSQɞ988Iɞ9$8WO888O?98QD2C0C9>ŒAT ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$20?9T$8ܞND֥>W8ݶ;UW89T#!ɞ9$8N֥>W΀8U89T ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$,*"҈$4T޲F?9Tlj:""4T޲Fɞ9lj:" ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$53"҈$4T޲F?9TQD2DT#!"4T޲Fɞ9QD2DT ?9T$ɞ9$A?W6J/?9T$8:W6J,HPHCI9I20WF/ɞ9$8:WF,HPHI9I ?9T$ɞ9$A?µ$?9TA=ULEQ?DZ.߰4>N.X8BLFJO;5ORܠ9ݩ5N.FJO:ݩ5ʅ>߰4>OXFJORܠ9љ55KUS̛SQT07>S7S˩5W˩5U˩5ORܠ9ݩ5JUS̛<N4NX8BLFաO5OMݩ5NFO:ݩ54OXFOMљ55US̛<աO3US̛SQT07>S7S˩5W˩5UOMߩ5US̛</N/40O;  N0աOMK/N/4ʅ>߰4>N.OX,FJO:9/N/4@@@,*N4NOX,FO:9N@@/N/40O;  N0աO/N/4ʅ>߰4>N.X8BLFJO;5ORܠ9ݩ5N.FJO:ݩ5ʅ>߰4>OXFJORܠ9љ55KUS̛SQT07>S7S˩5W˩5U˩5ORܠ9ݩ5JUS̛<N4NX8BLFաO5OMݩ5NFO:ݩ54OXFOMљ55US̛<աO3US̛SQT07>S7S˩5W˩5UOMߩ5US̛</N/40O;  N0աO>߰4>N.Xҥ3߫UBWOFJUQJ&$N4NXUBWOFUQJ/N/40O;  N0աO/N/4ʅ>߰4>N.X8BLFJO;5ORܠ9ݩ5N.FJO:ݩ5ʅ>߰4>OXFJORܠ9љ55KUS̛SQT07>S7S˩5W˩5U˩5ORܠ9ݩ5JUS̛<N4NX8BLFաO5OMݩ5NFO:ݩ54OXFOMљ55US̛<աO3US̛SQT07>S7S˩5W˩5UOMߩ5US̛</N/40O;  N0աO6Mӛ?6Mӛ?O;O/N/47>6744B9HS1HŞ1Kį?Dߋ5 Gބ24PK ۥNɿCR S2ބ2B@Bބ2ͩ- ۥN BDBM/N7߰4>N.X8BLFJO;5ORܠ9ݩ5N.FJO:ݩ5ʅ>߰4>OXFJORܠ9љ55KUS̛SQT07>S7S˩5W˩5U˩5ORܠ9ݩ5JUS̛<N4NX8BLFաO5OMݩ5NFO:ݩ54OXFOMљ55US̛<աO3US̛SQT07>S7S˩5W˩5UOMߩ5US̛</N/40O;  N0աO86/N/4ʅ>߰4>N.XWBOFJUQJ#!N4NXWBOFUQJ/N/40O;  N0աO/N/4ʅ>߰4>N.X8BLFJO;5ORܠ9ݩ5N.FJO:ݩ5ʅ>߰4>OXFJORܠ9љ55KUS̛SQT07>S7S˩5W˩5U˩5ORܠ9ݩ5JUS̛<N4NX8BLFաO5OMݩ5NFO:ݩ54OXFOMљ55US̛<աO3US̛SQT07>S7S˩5W˩5UOMߩ5US̛</N/40O;  N0աOSQ4/N/45F>JFJIݩ5ORܠ94/N/45F>JFJ,*4N54FIݩ5OM4N54F/N/40O;  N0աO/N/4ʅ>߰4>N.X8BLFJO;5ORܠ9ݩ5N.FJO:ݩ5ʅ>߰4>OXFJORܠ9љ55KUS̛SQT07>S7S˩5W˩5U˩5ORܠ9ݩ5JUS̛<N4NX8BLFաO5OMݩ5NFO:ݩ54OXFOMљ55US̛<աO3US̛SQT07>S7S˩5W˩5UOMߩ5US̛</N/40O;  N0աO 00,B4.Iַ;@?0,B4.I@/N/40O;  N0աO/N/4ʅ>߰4>N.X8BLFJO;5ORܠ9ݩ5N.FJO:ݩ5ʅ>߰4>OXFJORܠ9љ55KUS̛SQT07>S7S˩5W˩5U˩5ORܠ9ݩ5JUS̛<N4NX8BLFաO5OMݩ5NFO:ݩ54OXFOMљ55US̛<աO3US̛SQT07>S7S˩5W˩5UOMߩ5US̛</N/40O;  N0աO&$9:9;2—PX>9:;#!9:9;—PX>9:;,6BJ>P7BJ>P><76NJF3P;7N@N;JT;JQ;J/-7NJF3P;7NN;T;Q;,6BJ>P7BJ>P)'76BJPT;<̖@@TML&$7BJPT;<̖@@TML,6BJ>P7BJ>P/-176NJǭ;J2=>PQ@@@)'17NJǭ;J2=>PQ@@,6BJ>P7BJ>P,*CF76BJԿ7;˨OO/JIַ;&$CF7BJԿ7;˨OO/JI,6BJ>P7BJ>PUFJB76͎?/UFJB7͎?/,6BJ>P7BJ>P—PHIL2COJ—PHIL2COJسSB6BJDʿ7E>P—PHIL2COJ—PHIL2COJQ0N>>KJNBIL2COJBIL2COJ۳S6BJϿ7E>PBIL2COJBIL2COJQ0N׎>KɏJ,6BJ>P7BJ>P&$76BJ>PP/MGQT 7BJ>PPMGQT,6BJ>P7BJ>P,*JRJCJD6PV.6;JT)'JRJCJD6PV.6;T,6BJ>P7BJ>P 6BJDʿ7E>P@Ԛ<6BJϿ7E>P@Ԛ<,6BJ>P7BJ>P)'ARJBJD6PщQU;7P&$ARJBJD6PщQU;7,6BJ>P7BJ>P/-76BJF6F,QVMG.D6,*7BJF6F,QVMG.D6,6BJ>P7BJ>P53BܥNFCS7B76BR6HJ>AP/-BܥNFCS7B7B7HJ>AP,6BJ>P7BJ>P><76NJF3P;7N@N;JT;JQ;J/-7NJF3P;7NN;T;Q;,6BJ>P7BJ>PYWJǭ;N,6>PJǭ;DƂGщQJǭ;D@щQ,6>G3.ٟ@DƂGщQ@Ԛ<SQJǭ;N7>PJǭ;DƂGщQJǭ;D@щQ7>G3.ٟ@DƂGщQ@Ԛ<,6BJ>P7BJ>P/-176NJǭ;J2=>PQ@@@)'17NJǭ;J2=>PQ@@,6BJ>P7BJ>P/-FJō/NJD0PL36>;GB&$JNJD0PL36>;G,6BJ>P7BJ>PUFJB76͎?/UFJB7͎?/,6BJ>P7BJ>P#!6ǭ;>Q6NJ>P;7 6>Q6NJ>P;7,6BJ>P7BJ>P&$76BJ>PP/MGQT 7BJ>PPMGQT,6BJ>P7BJ>PCFJB6ǭ;@Ԛ<CFJB6@Ԛ<,6BJ>P7BJ>P 6BJDʿ7E>P@Ԛ<6BJϿ7E>P@Ԛ<,6BJ>P7BJ>P 6BJD6E>P@Ԛ< 6BJD6E>P@Ԛ<,6BJ>P7BJ>P/-76BJF6F,QVMG.D6,*7BJF6F,QVMG.D6,6BJ>P7BJ>P 76BJ>PHڶ>@Ԛ<7BJ>PHڶ>@Ԛ<,6BJ>P7BJ>P><76NJF3P;7N@N;JT;JQ;J/-7NJF3P;7NN;T;Q;,6BJ>P7BJ>P Lǭ;BϨHJ>PA7BLBϨHJ>P+,6BJ>P7BJ>P/-176NJǭ;J2=>PQ@@@)'17NJǭ;J2=>PQ@@,6BJ>P7BJ>P#!Lǭ;BϨHJ>PDG@KLBϨHJ>PD@K,6BJ>P7BJ>PUFJB76͎?/UFJB7͎?/,6BJ>P7BJ>P ;-M= ;-M=,6BJ>P7BJ>P&$76BJ>PP/MGQT 7BJ>PPMGQT,6BJ>P7BJ>P ;-M= ;-M=,6BJ>P7BJ>P 6BJDʿ7E>P@Ԛ<6BJϿ7E>P@Ԛ<,6BJ>P7BJ>P/-ϨHJō/BJ>PϨHJō/BJڶ>F=/-ϨHJō/BJ>PϨHJō/BJڶ>F=,6BJ>P7BJ>P/-76BJF6F,QVMG.D6,*7BJF6F,QVMG.D6,6BJ>P7BJ>P2076BCJ>P/G=Q>BD>ÐW,*7BCJ>PG=Q>BD>ÐW,6BJ>P7BJ>P><76NJF3P;7N@N;JT;JQ;J/-7NJF3P;7NN;T;Q;,6BJ>P7BJ>P Lǭ;BϨHJ>P:ÐW4LBϨHJ>P:ÐW44T7@<<>47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<> <6>7T<@9:T<6>7T?9:4T7@<<>47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<> M4TCT7@<@Ԛ<M4CT7@@Ԛ<4T7@<<>47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<>539TB@>TK7<:7@<ǭ;?AB,*9TB>TK7<:7@ՄNAB4T7@<<>47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<>A?>T<@>/26SCSET<@>-/7B6;9>T?>/26SCSET?>-/7B64T7@<<>47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<>/-T@47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<>,*4T<@HAVTJD8DAP4?HAVTD8A4T7@<<>47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<>4T47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<>;94TRF7@<5@2D0O6P6T,*4RI@5@20O6P64T7@<<>47@<>JH>CT<7@<6R>16R>7,O9ϪJ1<>@Ԛ<;9>CT<7@6>16>7,91<>@Ԛ<4T7@<<>47@<>,*C>8T<7@<1>DPDA&$C>8<7@1>DPDA6NBUC6O  @U66NBV1UC6O@V1U66NBUC6O  @U6866NBUC-9ԚETBET&$@6OCN3>ETBET,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9/-DHLKD‡?OAO6:,A7B)'DHLKD‡?OAO6:,+,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O986H:!DƇ>O-8WHOWK-4=RJ53H:!DƇ>O-8WHOWK-4RJ,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O986NA9=H5D‡?OJٟ@6:G2@@@,*N9H5D‡?OJ5:G2@@,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9SQDǭ;DQDUH:DO>&DB7DOOJDIPAFE>6MKDǭ;DQDUH:DO>&DB7DOOJDPAF>6,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9&$H=Dć?O=9=ϷAH@H=Dć?O9A@,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9A?,O-HDBٟ@;?=1PK@‡?O=9=@Ԛ<53,O-HD@?=1PK@‡?O9@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9JHH=WK=:B:D‡?O:D1=@9=D9D5@Ԛ<>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9&$H=Dć?O=9=DSDA H=Dć?O9DSDA,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9 VHLć?OD6L@Ԛ< VHLć?OD6L@Ԛ<,*@CӽD=HK:=-Ƈ>O=9=&$@CӽD=HK:=-Ƈ>O9>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>120H,82,ֈ;04VC7G/T>1)'H,82ڈ;4VCî7/T>1)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>1,*V@,1V2,7C7G.V@ M,1V27Cî7.M)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>153H82,7C7G/T>1?TJQ>,*H827Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>1/-W?A;OV2,7C7GA.T#!W?ҞMOV27Cî7A.)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>12,>B-45J2>B-5)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>1hfH8 -N2,ԓ4DC7G77BK;9/T>1KL/U5 -5>2,WFVTH8 -N24Cî77BK;9/T>1KL/U5 -5>2W)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>186H,7H82,RNVC7G/T7>1 H,>1)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>12C2C)'H8V2,7C7G/T>1#!H8V27Cî7/T>1GE2,߀3՟?4H8V2,7C7G/T>1?TJQ>;92߀3՟?4H8V27Cî7/T>1?TJQ)'H8V2,7C7G/T>1#!H8V27Cî7/T>1V2,7C7G¶;V27Cî7¶;BRADK BRADDBRADKC5—P=—Pބ2RADKC58:-9ET86RADC5=܉2RADC58:-9ETBRADK BRAD><@GD5ՂPRA5HDKϲLK2!QH9T;9@GD5ՂPRA5HDϲLK2!QH9TBRADK BRADJHH 5ՂP2CDKLARAK3DKMK5DKև9>TA?H 5ՂP2RKLARAK3DMK5DKև9>BRADK BRAD,*F7CPL߫WA=RADKS7)'F7CPL߫WA=RADS7BRADK BRADPNՂPLE;ߏGKCRADKCBAMKCK?KCCPD7LRABADK><,9;DR؇9U8ȴS>CPD7LRABADBRADK BRADDBRADKC5—P=—Pބ2RADKC58:-9ET86RADC5=܉2RADC58:-9ETBRADK BRAD\Z:DKCLCBCL5?LFL>HDKCRAK?MKߏGKCBùFPN:DCLCBC5?LL>HDCRAK?MKߏGKCBùFBRADK BRADJHH 5ՂP2CDKLARAK3DKMK5DKև9>TA?H 5ՂP2RKLARAK3DMK5DKև9>BRADK BRADqo -2CDKՂPLARAK3DKMKߏGK HӒC,NDK5=TUߋ5,,=>:J_] -2RKՂPLARAK3DMKߏGK HӒC,D5=TUߋ5,=>: 6ѤI; 6ѤI; ѤI;@?  ѤI;@ 6ѤI; 6ѤI;ѤI;;KFABѤI;;KFAB 6ѤI; 6ѤI; ѤI;@?  ѤI;@ 6ѤI; 6ѤI; ѤI;S>>ٟ@6;@Ԛ<ѤI;S>>5;@Ԛ< 6ѤI; 6ѤI; ѤI;@?  ѤI;@ 6ѤI; 6ѤI;JH3Ԛ<ѤI;>6;6SF;.TTD6;6SF;.TDTMSѤI22E7>>2OD@TDBѤI;A @69>TMSѤI22E7>>2OD@T 6ѤI; 6ѤI; ѤI;@?  ѤI;@ 6ѤI; 6ѤI;><ѤI;B2ѤI;2ѤI;0ѤI;SNѤI;NOFT><ѤI;B2ѤI;2ѤI;0ѤI;SNѤI;NOFT 6ѤI; 6ѤI; ѤI;@?  ѤI;@ 6ѤI; 6ѤI;53ѤI;>>;U0>;D6PGDSDA53ѤI;>>;U0>;D6PGDSDA 6ѤI; 6ѤI; ѤI;@?  ѤI;@ 6ѤI; 6ѤI;ѤI;M@KѤI;M@K 6ѤI; 6ѤI; ѤI;@?  ѤI;@ 6ѤI; 6ѤI; ѤI;8ٟ@>6CA7BѤI;8ٟ@>6C+&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG,*9EN39>ڹ3T21M1T)'9EN39>ڹ3T2M1T&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG,*:B7>B31CTCCԃP-C#!:B7>B31CCƠB7Dڹ32:TCG#!5D>B7Dڹ32:CG531TН?>/3>ND3>2HTC.:)'1?/3ND3>2HC.:&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG/-Ԋ/BNP92K1W>2Ԋ/Lؒ.=#!Ԋ/N9K1W>2Lؒ.=&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG,*V>NDͯ?ڹ3F1ȇN;9FGB)'V>NDͯ?ڹ3F1ׇN9FGB&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGDBNW>ڹ321%K9E?AFF?DJEʡH9?/86NW>ڹ32%K9E?AFFDJE9/&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG20;@7:TCUDTڹ3>NщQA7B#!;7:CUDڹ3>NщQ+&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG)'AFF?9E1ڹ321KW(#!AFF9E1ڹ32KW(&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG53EEO35B5Iٟ@7A:5Gς16T&$E>3B5@7A:5G+&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG86>>8RVGBڹ3;2F5>HK7<653>>8RVGBڹ3;2F5>H7<6&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGA?5BJH:ɚK73GHAVTJD8DAP865BJH:ɚK73GHAVTD8A&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGDBDNADV93>R9B>:D:TCS-@@@86DADV93>R9B>:D:CS@@&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGJB7>J3/:J7>J3/:&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG20AF?9C1NWڹ321K:&87)'AF9C1NWڹ32K:&8&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG,*9EN39>ڹ3T21M1T)'9EN39>ڹ3T2M1T&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGSQ9DBB3ҾW19659D:QTC2ʶU>3.ٟ@6ǽ=G@Ԛ<A?DBB3ҾW1965ՔDQC2ʶU>3ٟ@6G@Ԛ<&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG531TН?>/3>ND3>2HTC.:)'1?/3ND3>2HC.:&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGDB53>RD>B7HLTD>B7:LGDSDADB53>RD>B7HLTD>B7:LGDSDA&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG,*V>NDͯ?ڹ3F1ȇN;9FGB)'V>NDͯ?ڹ3F1ׇN9FGB&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG,*5DBڹ3G><97>?LS:)'5DBڹ3G><97>FS:&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG20;@7:TCUDTڹ3>NщQA7B#!;7:CUDڹ3>NщQ+&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGkiRV>NDڹ32į?1T9Fܫ7MN6K9D,K69.1R3RFBOBTec/>NDڹ32į?1T9Fܫ7MN6K9D,K69.1R3RFBOB&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG53EEO35B5Iٟ@7A:5Gς16T&$E>3B5@7A:5G+&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGSQ63P7S4DT9I871Dڹ32:TCRٍBKЅJCG>6DB63P7SCT871Dڹ32:CRٍBKЅJC>6&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CGA?5BJH:ɚK73GHAVTJD8DAP865BJH:ɚK73GHAVTD8A&$5D>B7Dڹ32:TCG#!5D>B7Dڹ32:CG&$>>8RVGBڹ3;2F5&$>>8RVGBڹ3;2F5يR28يR28DBOHD-6J=FHيR28>DН?>QTيR453OD6J=FHيR28D?QيR4يR28يR2886يR2A8>يR2A8>8J-IN=JT)'يR2ŞيR2Ş8J-I=JTيR28يR28DBOHD-6J=FHيR28>DН?>QTيR453OD6J=FHيR28D?QيR4يR28يR28_]>7JЁH?ʡHWOUA7J1HN=FFHيR28>G@K\Z>7JЁH?ʡHWOUA7J1HN=FFHيR28G@KيR28يR28DBOHD-6J=FHيR28>DН?>QTيR453OD6J=FHيR28D?QيR4يR28يR28zx(" -UA7J1H -N= FFHيR28>DA7JUA7DН?>QTيR453OD6J=FHيR28D?QيR4يR28يR28><يRJTيR8T يRDН?>QTيR453OD6J=FHيR28D?QيR4يR28يR2886يR2A8>يR2A8>8J-WN8T,*يR2ŞيR2Ş8J-WN8TيR28يR28DBOHD-6J=FHيR28>DН?>QTيR453OD6J=FHيR28D?QيR4يR28يR28VT -UA7J1H= FFHيR28>107 A@H۰M3AMK -UA7J1H= FFHيR28107 @H3AيR28يR28DBOHD-6J=FHيR28>DН?>QTيR453OD6J=FHيR28D?QيR4يR28يR28;9يR28>9KA8D6P>JщQN.6@Ԛ<53يR289KA8DP>JщQN.6@Ԛ<يR28يR28DBOHD-6J=FHيR28>DН?>QTيR453OD6J=FHيR28D?QيR4يR28يR28b` UA7J1H N=б FFHيR28>DA7JUA7D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕD/UPG,NKQM/UPG,KQMUP/ڶ>D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕD /JPC98?UPT/JP98?UPTUP/ڶ>D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕDA?//P//PO—P=-//PC?KP//Pĩ8>D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕD>9S9Ԛ9S1/9RK@Ԛ<UP/ڶ>D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕD2059P/ַ;/P/PA/P?PF7,*59P/ַ;/P/P/P?PFUP/ڶ>D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕDPNUP?İUHP.F-S51SSAPK85G6)ʪJHUP?İUH1F-S51SSAPK8G6)ʪUP/ڶ>D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕD UPʡH98CCH/TUP9CCH/TUP/ڶ>D UP/ŕD/PPQAP,9P/PPAP,9PUP/ڶ>D UP/ŕDPʰD/Fַ; PʰD/1 ܷT18W  U8WܷT18W@? U8W@ ܷT18W  U8WGE>ܷT18W>/26SCSEܷT18W>-/7B6A?>U8W>/26SCSEU8W>-/7B6 ܷT18W  U8WܷT18W@? U8W@ ܷT18W  U8WܷT1W>/>/CSܷT1W>/USܷT1W>/ܷT1W>/>8M6@66>ќ:0F6267(%!"~UW>/>/CSUW>/USUW>/UW>/>8M6@66>ќ:0F6267 ܷT18W  U8WܷT18W@? U8W@ ܷT18W  U8WGE>ܷT18W>/26SCSEܷT18W>-/7B6A?>U8W>/26SCSEU8W>-/7B6 ܷT18W  U8WܷT18W@? U8W@ ܷT18W  U8WA?6DQ66NیVOH2ܷT18W/Q66;6=;96Q66NیVOH2U8W/Q66;6= ܷT18W  U8WܷT18W@? U8W@ ܷT18W  U8W8ܷT1OW=;8UOW=; ܷT18W  U8WܷT18W@? U8W@ ܷT18W  U8W ܷT1WFM>Л6;@KUWF>Л6;@ ܷT18W  U8WܷT18W@? U8W@ ܷT18W  U8W20A89QEܷT1G4WE>FWAB)'A8ƋQEUG4WE>FAB:?9WΚI=X:?9WΚI=X86:?9>WΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=XMK:?:?LIMW#DE=XWΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=X WR:?9Iʉ5X@Ԛ<WR:?Iʉ5X@Ԛ<:?9WΚI=X:?9WΚI=X86:?9>WΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=XVT9WI>:?щQV46V6#6#%6# 6$ 6#88GE9WI>:?щQV6V6#6#6#66#88:?9WΚI=X:?9WΚI=X86:?9>WΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=X9Wʉ5X@N9Wʉ5X@N:?9WΚI=X:?9WΚI=X86:?9>WΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=X>ܤKV#%ѾCHTL6LT53WR:?IG>ܤKV#%5L6LT:?9WΚI=X:?9WΚI=X86:?9>WΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=X;9:?б 9WڶU5PRT53:?9WڶU5PRT:?9WΚI=X:?9WΚI=X86:?9>WΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=X53EWN6=A9S:?9I>WΚI5DXK8WщQ# @@@/-:?̖>WΚI5DXK8WщQ#@@:?9WΚI=X:?9WΚI=X86:?Gʉ5BW/UX7AE3D#&&$:?GBW/UXAED,?RFD,?RF;9,?RFQUBDAP;0T?6T)!&$,?RFQUA;T6T)D,?RFD,?RF/-?,FR>,62ɀ?EBP22>)'8FR>,62ɀ?EBP22D,?RFD,?RFMKD=D3Dٟ@FR?,1@?>19Kٟ@9ٟ@-4,@Ԛ<>19K9-4,@Ԛ<D,?RFD,?RF,*RF>BϨH,@?,6DPDA&$RF>B؋8@86DPDAD,?RFD,?RF>EщQ@Ԛ<53D,.F?H.JV9S6>EщQ@Ԛ<D,?RFD,?RF,*RF,?HAVTJD8DAP R8?HAVTD8AD,?RFD,?RFGED3DRIF,2?.@PیVDHAVDAPT>T0N6Q20S9Xֈ?NXIWN,ڶ>T0N6Q9CXֈ?NXIַ;9Xֈ?NXI,*9CXֈ?NXIַ;M/TۓR7K 9Xֈ?NXIMTۓR79CXֈ?NXIַ;9Xֈ?NXI)'9CXֈ?NXIַ;B<ނBB@2A6ODPDA&$17>B@2A6DPDA 7BO֊2  7BOPNDD7OC-SO֊2>SDɵO689HAVTJD8DAPA?DD7C-SO>SDɵO689HAVTD8A 7BO֊2  7BO)'17>B@2A6ODPDA&$17>B@2A6DPDA 7BO֊2  7BOA?78BBر/D2ѺKٟ@6TCMUB@2A6ODPDA&$17>B@2A6DPDA 7BO֊2  7BO 57:CDO֊2ѺK@Ԛ<57:CDOѺK@Ԛ< 7BO֊2  7BO)'17>B@2A6ODPDA&$17>B@2A6DPDA 7BO֊2  7BODBкB9N7:CO֊2>ٟ@6߇;1G3F7;Q67;QT><кB9N7:CO>5߇;1G3F7;Q67;QT 7BO֊2  7BO)'17>B@2A6ODPDA&$17>B@2A6DPDA 7BO֊2  7BODB7OB62>R@2A57;QԚ<7;QT7N3>M><7OB62>R@2A57;QԚ<7;QT7N> 7BO֊2  7BO)'17>B@2A6ODPDA&$17>B@2A6DPDA 7BO֊2  7BO)'VDD7BC92>/67T)'VDD7BC92>/67T 7BO֊2  7BO)'17>B@2A6ODPDA&$17>B@2A6DPDA 7BO֊2  7BO&$V7JR1:2R<@@@#!V7JR1:2R<@@28>1S6Mſ2>15M;9KS6MԚ<28DAP;0T?6T)!#!K5MԚ<ſ2A;T6T)28>1S6Mſ2>15M;928=S0M28GN06W,646T20ſ2=S0Mſ2GN06W,64628>1S6Mſ2>15M><28ٟ@6Q>D1.S6M>=6P6T20ſ25Q>D1.5M>=6P628>1S6Mſ2>15M/-28>M281S6MۓR9T,K&$ƿ2>Mƿ215MۓR9T,K28>1S6Mſ2>15M><281S6M>1H3PیVDHAVDAPT20ſ215M>1H3PیVDHAVAT28>1S6Mſ2>15M53A28=χ71S6MN1SщQχ7=RJ,*Aſ2=χ715MN1SщQχ7RJ28>1S6Mſ2>15M;9KS6MԚ<28DAP;0T?6T)!#!K5MԚ<ſ2A;T6T)28>1S6Mſ2>15MS6MES28@Ԛ<5MESſ2@Ԛ<28>1S6Mſ2>15M><28ٟ@6Q>D1.S6M>=6P6T20ſ25Q>D1.5M>=6P628>1S6Mſ2>15MYWDNԚ<281S6M1H3Vٟ@281DAP;0T?6T)!86Nſ215M1H3Vٟ@ſ21A;T6T)28>1S6Mſ2>15M><281S6M>1H3PیVDHAVDAPT20ſ215M>1H3PیVDHAVAT28>1S6Mſ2>15M53MVٟ@28DAP;0T?6T)!Mٟ@ſ2A;T6T)28>1S6Mſ2>15M;9KS6MԚ<28DAP;0T?6T)!#!K5MԚ<ſ2A;T6T)28>1S6Mſ2>15M#!A281S6MDSDAAƿ215MDSDA —PJ>RJЍ—PJ>RJ,*M: D>J6߻WDSDA,*M: D>J6߻WDSDA —PJ>RJЍ—PJ>RJA?VNN,̥6:D9SJ6OQNέ;LSDʡH9;86VN,̥6:D9SJ6QN٭;SDʡH9; —PJ>RJЍ—PJ>RJ/-R߻W—PۃJ>JR6߻W,ƛK9@Ԛ</-R߻W—PۃJ>JR6߻W,ƛK9@Ԛ< —PJ>RJЍ—PJ>RJ53DȂ3@>Q—PJ—PJ>RCRA7B/-DȂ3@>Q—PJ—PJ>RCR+ —PJ>RJЍ—PJ>RJqo=>QH,<5Wį?;>—PJB—PۃJD9SIF>J6RN7>809DSPԮK߀3hf=>QH,5Wį?;>—PJB—PۃJD9SIF>J6R7>809DSPٮK —PJ>RJЍ—PJ>RJ;95$,U, 6D>:5JЂJDJA7B/-5$,,Ѝ6D>:5JЂJDJ+ —PJ>RJЍ—PJ>RJPN P;>LCD9J9KBDL=Ė16ǽ=EX>PGEЍP;>LCD9J9KBDL=Ė16E>P —PJ>RJЍ—PJ>RJG7TQ-G7TQ- —PJ>RJЍ—PJ>RJ)'Q—PJR69:ADSDA#!Q—PJR9ADSDA —PJ>RJЍ—PJ>RJDBA2ûR9?A>;BTûR9?A>5653TA2ûR9?A>;BûR9?A>56 —PJ>RJЍ—PJ>RJ,*M: D>J6߻WDSDA,*M: D>J6߻WDSDA —PJ>RJЍ—PJ>RJ><6JD9SJ6ȻW̑-9ٟ@—PJ>RJ@Ԛ<866JD9SJ6ȻW̑- @—PJ>RJ@Ԛ< —PJ>RJЍ—PJ>RJ/-R߻W—PۃJ>JR6߻W,ƛK9@Ԛ</-R߻W—PۃJ>JR6߻W,ƛK9@Ԛ< —PJ>RJЍ—PJ>RJMK66DJQ—PL>JRJJQ—PL@BJ9Uڤ55@Ԛ<GE66DJQ—PL>JRJJQ—PL@BJUܤ5@Ԛ< —PJ>RJЍ—PJ>RJqo=>QH,<5Wį?;>—PJB—PۃJD9SIF>J6RN7>809DSPԮK߀3hf=>QH,5Wį?;>—PJB—PۃJD9SIF>J6R7>809DSPٮK —PJ>RJЍ—PJ>RJ4B4յGWGXF4B4WX —PJ>RJЍ—PJ>RJPN P;>LCD9J9KBDL=Ė16ǽ=EX>PGEЍP;>LCD9J9KBDL=Ė16E>P —PJ>RJЍ—PJ>RJ,* R:D>ڝJRK2DG@K&$ЍR:D>ڝJRK2D@K —PJ>RJЍ—PJ>RJ)'Q—PJR69:ADSDA#!Q—PJR9ADSDA —PJ>RJЍ—PJ>RJ;95$,U, 6D>:5JЂJ9WWC/-5$,,Ѝ6D>:5JЂJ9āRA —PJ>RJЍ—PJ>RJ,*M: D>J6߻WDSDA,*M: D>J6߻WDSDA —PJ>RJЍ—PJ>RJ AF8,TЍAF,T —PJ>RJЍ—PJ>RJ/-R߻W—PۃJ>JR6߻W,ƛK9@Ԛ</-R߻W—PۃJ>JR6߻W,ƛK9@Ԛ< —PJ>RJЍ—PJ>RJ  4B  4B —PJ>RJЍ—PJ>RJqo=>QH,<5Wį?;>—PJB—PۃJD9SIF>J6RN7>809DSPԮK߀3hf=>QH,5Wį?;>—PJB—PۃJD9SIF>J6R7>809DSPٮK —PJ>RJЍ—PJ>RJ/-UCUTʡH>/X>>A2  UUʡH>/X>2Ѝ —PJ>RJЍ—PJ>RJPN P;>LCD9J9KBDL=Ė16ǽ=EX>PGEЍP;>LCD9J9KBDL=Ė16E>P —PJ>RJЍ—PJ>RJ AFPCRJЍ—PJ>RJ)'Q—PJR69:ADSDA#!Q—PJR9ADSDA —PJ>RJЍ—PJ>RJ&$UR:D>JB/T&$UR:D>JB/TPC11ȯBPC1ȯB&$&DC1ȯBI91PI@Ԛ<#!&DC1ȯBI1PI@Ԛ<PC11ȯBPC1ȯB 2<;> 2<;>PC11ȯBPC1ȯB&$&DC1ȯBI91PI@Ԛ<#!&DC1ȯBI1PI@Ԛ<PC11ȯBPC1ȯB)'V2PKC4EȯB-;J6&$V2PKC4EȯB-;ϜJPC11ȯBPC1ȯB&$&DC1ȯBI91PI@Ԛ<#!&DC1ȯBI1PI@Ԛ<PC11ȯBPC1ȯB20T3=C;D9>:CO-֛7:CO-2P:ȯBK6NKDSDA,*>2P:ȯBK6NKDSDAPC11ȯBPC1ȯB&$&DC1ȯBI91PI@Ԛ<#!&DC1ȯBI1PI@Ԛ<PC11ȯBPC1ȯB#!PNȯB>9H-BV6#!PNȯB>9H-BV6PC11ȯBPC1ȯB&$&DC1ȯBI91PI@Ԛ<#!&DC1ȯBI1PI@Ԛ<PC11ȯBPC1ȯBJHDPRȯBIH,56:LIB,I;9V;KXܤK$GEDPRȯBIH,56:LIB,I;V;KXܤK$ X˩5R9: X˩5ֲ9/-' ڲ߹-:X>˩56I:,@Ԛ<,* ڲ߹-:X>˩56I:,@Ԛ< X˩5R9: X˩5ֲ9,*ȏBҲU>Rɸ˩5G@Ԛ<DBݩ5C B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ< X˩5R9: X˩5ֲ9GE B߹-;:XܷT6˩5J˩54 B߹-;:XܷT6˩5/7;9 B-:XܷT6J˩54 B-:XܷT6/7 X˩5R9: X˩5ֲ9DB: >˩5AKB: >˩5AK X˩5R9: X˩5ֲ96T' ߹-X6˩56T ߹-X6 X˩5R9: X˩5ֲ9/-' ڲ߹-:X>˩56I:,@Ԛ<,* ڲ߹-:X>˩56I:,@Ԛ< X˩5R9: X˩5ֲ9A?Uٟ@5߹-:XD˩5I:XB9D˩5ƛK6@@@;9Uٟ@5߹-:XD˩5I:XBD˩5ƛK6@@ X˩5R9: X˩5ֲ9GEݩ5TC B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ<DBݩ5C B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ< X˩5R9: X˩5ֲ9GE BI;:XܷT6˩5J˩54 BI;:XܷT6˩5/7A? BI;:XܷT6J˩54 BI;:XܷT6/7 X˩5R9: X˩5ֲ9DB: >˩5AKB: >˩5AK X˩5R9: X˩5ֲ9;95˱U̾-C3CIQ:> :X>6˩5,;865˱U̾-C3CIQ:> :X>6,; X˩5R9: X˩5ֲ9/-' ڲ߹-:X>˩56I:,@Ԛ<,* ڲ߹-:X>˩56I:,@Ԛ< X˩5R9: X˩5ֲ9\Z BH 6ӻBO ߹-:XܷTBH ߹-XܷT;W; N= FJ˩54YW BH 6O ߹-:XܷTBH ߹-XܷT;W; N= FJ˩54 X˩5R9: X˩5ֲ9GEݩ5TC B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ<DBݩ5C B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ< X˩5R9: X˩5ֲ9)' >6˩55WR9:DSDA  >65Wֲ9DSDA X˩5R9: X˩5ֲ9DB: >˩5AKB: >˩5AK X˩5R9: X˩5ֲ9A?б = F߹-=X B˩5HFST:TʡH?CگD/86б = F߹-=X BHFSTT9CگD/ X˩5R9: X˩5ֲ9/-' ڲ߹-:X>˩56I:,@Ԛ<,* ڲ߹-:X>˩56I:,@Ԛ< X˩5R9: X˩5ֲ9nlRA߹-:X> B6˩51D0;Hٟ@R9:KBB>5IBEKRFTD>6@Ԛ<_]RA߹-:X> B610Hٟ@ֲ9KBB>5IBEKRFTD>6@Ԛ< X˩5R9: X˩5ֲ9GEݩ5TC B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ<DBݩ5C B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ< X˩5R9: X˩5ֲ9GE KB6NEI:X5R9:˩5UIR>:DSDAA? KB6NEI:X5ֲ9˩5UIR>:DSDA X˩5R9: X˩5ֲ9DB: >˩5AKB: >˩5AK X˩5R9: X˩5ֲ9DB߹-:XܷT6H߹-:XܷT6˩5Q' ѲB6ӻBO453߹-:XܷT6߹-:XܷT6Q ѲB04 X˩5R9: X˩5ֲ9/-' ڲ߹-:X>˩56I:,@Ԛ<,* ڲ߹-:X>˩56I:,@Ԛ< X˩5R9: X˩5ֲ9/-0:X6˩50:X6˩5>464T&$0:X60:X6>46T X˩5R9: X˩5ֲ9GEݩ5TC B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ<DBݩ5C B6:X/ݩ5ٟ@5UI: .X>˩5G@Ԛ< X˩5R9: X˩5ֲ9A?б = F߹-=X B˩5HFST:TʡH?CگD/86б = F߹-=X BHFSTT9CگD/ X˩5R9: X˩5ֲ9DB: >˩5AKB: >˩5AK X˩5R9: X˩5ֲ9\ZDRA9į?߹-=X>6˩5H0-DE06EщQI.6щQ22DSDAYWDRA9į?߹-=X>6H0-DE06EщQI.6щQ22DSDA NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK#! N;WHEK9ݠ.ET N;WHE9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK&$ N;WHEKK9ݠ.ET N;WHE9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK)' N9ݠ.;WKE 9ݠ.ET  N9;WK 9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK#! N;WKEK9ݠ.ET N;WKK9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK#! NF;WEK9ݠ.ET  NF;WEK9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK)' NF;W2T9K9ݠ.ET&$ NF;W2T9K9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK#! NF;WHK9ݠ.ET  NF;WHK9ET NF;WH4EK NF;WH4EK&$ NF;WHEK9ݠ.ET#! NF;WHEK9ET NF;WH4EK NF;WH4EK)' N9ݠ.;WHE 9ݠ.ET#! N9;WHE 9ET:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I/-:T678:T67Iַ;—P=8-)':T678:T67I=8-:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I T18:CT67Iַ;T18:T67I:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67Iܥ60T67ȣ8Iַ;ܥ60T67I:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I,*:0EUPU,I:T67Iַ;#!:08P,I:T67I:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I0T67ȣ8Iַ;0T67I:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I:CT67Iַ;@?:T67I@:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I/-:CT67Iַ;:CT67Iַ;;#!:T67I:T67I;:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I,*0EU4J8:CT67Iַ;ܥ6 084J:T67Iܥ6:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I)':CT67Iַ;M/TۓR7K:T67IMTۓR7:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I)':CT67Iַ;M/TۓR7K:T67IMTۓR7:CT67Iַ;:T67I 7EU:CT67Iַ;78:T67I:CT67Iַ;:T67I P P4X>E1; 4X>B E1;>XHMʭBWTB>XHMʭBW4X>E1; 4X>B:D>75.T:D>75.T4X>E1; 4X>B/--XE1;7߹-WD7ՕNծH-XBՕNծH4X>E1; 4X>BMK%X6Xޡ8XSX8XNX.XCXCXFX2X4XCA?%X6Xޡ8XX8XNXXXXFX2X4XC4X>E1; 4X>B E1;/64DG@KB/6D@K4X>E1; 4X>BSQİFE1;/64X۹/>OX۹/>TʭBS>OʭBS>TU>6K53İFB/6X>OX>B>OB>U>64X>E1; 4X>BkiE1;MIB>ю2/4AT23WS;XIю2Xю2>ю2UATXIX)PNBMI>ю2/4F23WS;X22ю2UFXI)4X>E1; 4X>B/4?BOBT/4?BOB4X>E1; 4X>B E1;>XHMʭBWTB>XHMʭBW4X>E1; 4X>B)'/43>L??HF? FT#!/43>L?HF FT4X>E1; 4X>B/--XE1;7߹-WD7ՕNծH-XBՕNծH4X>E1; 4X>BGE˛59/=T4>X?ޡ8RV4>E1;6T44K2,*-4>/4>B6T5K24X>E1; 4X>B E1;/64DG@KB/6D@K4X>E1; 4X>BVTDG:/4X>3?X?FBTF?ޡ8H?.:FʭB.4?F6>3?XFBTFޡ8H.FF64X>E1; 4X>BkiE1;MIB>ю2/4AT23WS;XIю2Xю2>ю2UATXIX)PNBMI>ю2/4F23WS;X22ю2UFXI)4X>E1; 4X>BVTE1;>C6PKH,-X?71E70NʡHH064TDBB>C6PKH,-X0NʡHH064T4X>E1; 4X>B E1;>XHMʭBWTB>XHMʭBW4X>E1; 4X>B#!U/4X>3B?8,T U/4X>3B?,T4X>E1; 4X>B/--XE1;7߹-WD7ՕNծH-XBՕNծH4X>E1; 4X>B/--446M; ->>@W>W>)'-56M; ->>@>W>4X>E1; 4X>B E1;/64DG@KB/6D@K4X>E1; 4X>B53?41K>F7>>D<(6հL3T53?41K>F7>>D<(6հL3T4X>E1; 4X>BkiE1;MIB>ю2/4AT23WS;XIю2Xю2>ю2UATXIX)PNBMI>ю2/4F23WS;X22ю2UFXI)4X>E1; 4X>B_]E1;1-X?P@4BS?H-M>ԁ:FT,;J8L0(!MKB1-X?P@4BS?H-M>ԁ:FT,;J8L0X4X>E1; 4X>B E1;>XHMʭBWTB>XHMʭBW4X>E1; 4X>B)'/43?>LHJX/ENB#!/43?>LHJXEN4X>E1; 4X>B/--XE1;7߹-WD7ՕNծH-XBՕNծH4X>E1; 4X>B3H24CM΄/ǟ9=Tޡ8?$ڻ($!3H24CMτ/-8$ڻ($!4X>E1; 4X>B E1;/64DG@KB/6D@K4X>E1; 4X>B,*/43?>LHJX/E1ʞ:-)'/43?>LHJXE1ʞ:-4X>E1; 4X>BkiE1;MIB>ю2/4AT23WS;XIю2Xю2>ю2UATXIX)PNBMI>ю2/4F23WS;X22ю2UFXI)4X>E1; 4X>B,*HE1;>/4H?LB/4HLBBQT2>FWA>BL>F)'4>BWAQT2>JF@Ԛ< 4>BWAL>S@Ԛ<WA>BQT2>FWA>BL>F869GOBQT2>PVP.5AJ>P)'9GOBL>PP.J>PWA>BQT2>FWA>BL>F20R0W6>BйSQT2>FDSDA,*R0W6>BйSL>FDSDAWA>BQT2>FWA>BL>FA?COW>M>BWAQT296O8GDSDA;9COW>M>BWAL96O8GDSDAWA>BQT2>FWA>BL>FMKR9GMWWAI>BN==9=A>MNS9=A7B86RךGWWAI>BN==>MNS9=+WA>BQT2>FWA>BL>F209G>BQT2>VJ768T7=&$9G>BL>VќJ687=WA>BQT2>FWA>BL>F;9D9DI>BWRQT2>SQU>V@Ԛ<&$9I>BWRLS>V@Ԛ<WA>BQT2>FWA>BL>F/- -FWLSJ$2AB/- -FWLSJ$2ABWA>BQT2>FWA>BL>F86DS8G>BWAQT2>M@?@@@/-DS8G>BWAL>M@?@@WA>BQT2>FWA>BL>F)'NFHFOFOVVA4@K&$NFHFOFOVVA4@WA>BQT2>FWA>BL>F&$W>V>BWAQT2@Ԛ< W>V>BWAL@Ԛ<WA>BQT2>FWA>BL>F86>BQT2>΂PF;/U N5LUٶ,*>BL>΂PF;/U N5LUWA>BQT2>FWA>BL>F)'4>BWAQT2>JF@Ԛ< 4>BWAL>S@Ԛ<WA>BQT2>FWA>BL>F#!>BWBQT2>F@Ԛ<>BWBL>F@Ԛ<WA>BQT2>FWA>BL>F20R0W6>BйSQT2>FDSDA,*R0W6>BйSL>FDSDAWA>BQT2>FWA>BL>F#!W2E>DQT2ϩNFBWE>DLϩNFWA>BQT2>FWA>BL>FMKR9GMWWAI>BN==9=A>MNS9=A7B86RךGWWAI>BN==>MNS9=+WA>BQT2>FWA>BL>F8F5R.UES28F=.UESWA>BQT2>FWA>BL>F;9D9DI>BWRQT2>SQU>V@Ԛ<&$9I>BWRLS>V@Ԛ<WA>BQT2>FWA>BL>F)'J>R8"FK%FJ>RF%FWA>BQT2>FWA>BL>F86DS8G>BWAQT2>M@?@@@/-DS8G>BWAL>M@?@@WA>BQT2>FWA>BL>F53D96MEK>BQT2>VD@@@&$D96EK>BL>V@@WA>BQT2>FWA>BL>F&$W>V>BWAQT2@Ԛ< W>V>BWAL@Ԛ<WA>BQT2>FWA>BL>F ֖F>PMމ6J6J7+Mމ6J6J7WA>BQT2>FWA>BL>F)'4>BWAQT2>JF@Ԛ< 4>BWAL>S@Ԛ<WA>BQT2>FWA>BL>F/-UMӛ?1?7F,7MRQ#!UMӛ?1?MRQWA>BQT2>FWA>BL>F20R0W6>BйSQT2>FDSDA,*R0W6>BйSL>FDSDAWA>BQT2>FWA>BL>F/-D96M>BWAQT2DSDA&$D96>BWALDSDAIDT0I ID0IDBDT30IDT30I4DT30IDZ.>4I?86D30ID30I4D30IDZ.>4I2IDT0I ID0I,*IDT01,ICDT0IDT#!IDT01ID0IDTIDT0I ID0I&$RIDTN0I0I4@Ԛ<RID00I4@Ԛ<IDT0I ID0I/-0I9Q6S=KI8KI:@Ԛ</-0I9Q6S=KI8KI:@Ԛ<IDT0I ID0I0I5I?0I5I2IDT0I ID0IA?DT3I58DT38I5DT3X58I?53D3I58D38I5D3X58I2IDT0I ID0I0IIַ;4DG@K0II4D@KIDT0I ID0I&$IDTPDN0I0I@Ԛ<IDPD00I@Ԛ<IDT0I ID0I#!I0IػKI0I4I0I#!I0IػKI0I4I0IIDT0I ID0I/-0IDTFַ;8- 8T !0D18- 8IDT0I ID0I0I4@Ԛ<0I4@Ԛ<IDT0I ID0I20D0ID0I4D0IDZ.>4I?/-D0ID0I4D0IDZ.>4I2IDT0I ID0I0IDZ.>4@Ԛ<0IDZ.>4@Ԛ<IDT0I ID0I20IDT,;01,ICDT0IDT)'IDT,;01ID0IDTIDT0I ID0IDBDT30IDT30I4DT30IDZ.>4I?86D30ID30I4D30IDZ.>4I2IDT0I ID0IDB,TܷT0I,TܷT0I4,TܷT0IDZ.>4I?86,ܷT0I,ܷT0I4,ܷT0IDZ.>4I2IDT0I ID0I&$RIDTN0I0I4@Ԛ<RID00I4@Ԛ<IDT0I ID0IMKDT3N0IDT3N0I4DT3N0IDZ.>4I?/-D30D304D30DZ.>4I2IDT0I ID0I0I5I?0I5I2IDT0I ID0I0IػK4@K0IػK4@KIDT0I ID0I0IIַ;4DG@K0II4D@KIDT0I ID0I0IDG@K0ID@KIDT0I ID0I#!I0IػKI0I4I0I#!I0IػKI0I4I0IIDT0I ID0I53DT3IDT3Iַ;DT3OII?#!D3ID3ID3OI2L7ٟ@8 Lٟ@8 L7@?L@L7ٟ@8 Lٟ@8 L7B6  LB6L7ٟ@8 Lٟ@8,*6—P,L7ٟ@8H7@K7@Ԛ<#!6ٟ@8H7@K7@Ԛ<L7ٟ@8 Lٟ@8 L7@K  L@KL7ٟ@8 Lٟ@8 L7@?L@L7ٟ@8 Lٟ@8 L76?  L6?L7ٟ@8 Lٟ@8,*6—P,L7ٟ@8H7@K7@Ԛ<#!6ٟ@8H7@K7@Ԛ<L7ٟ@8 Lٟ@8,*6L78>ٟ@HF@F76>P)'6L8>ٟ@HF@F76>PL7ٟ@8 Lٟ@8 L7@?L@L7ٟ@8 Lٟ@820A7L7Hٟ@8EP;:PO@@@,*A7LHٟ@8EP;:PO@@L7ٟ@8 Lٟ@8,*6—P,L7ٟ@8H7@K7@Ԛ<#!6ٟ@8H7@K7@Ԛ<L7ٟ@8 Lٟ@8—P,L7?60 ?60L7ٟ@8 Lٟ@8 L7@?L@L7ٟ@8 Lٟ@8/-L7ٟ@8AR>:6>NDSDA,*Lٟ@8AR>:6>NDSDAL7ٟ@8 Lٟ@8,*6—P,L7ٟ@8H7@K7@Ԛ<#!6ٟ@8H7@K7@Ԛ<L7ٟ@8 Lٟ@8wuL7DF6L7B7L76<6—P,L7 -Gٟ@867@75L78>ٟ@;FJ>N1S_]LDF6LB7L6Ǥ< -Gٟ@867@75L8>ٟ@;FJ>N1S¨0A=Tɾ=S0=Tɾ=S20¨0A=Tɾ=SN.W0AT("0=T̗<.0AT¨0A=Tɾ=S0=Tɾ=S,*¨0A=Tɾ=CPI/C/9?T#!0=PI/C/9?T¨0A=Tɾ=S0=Tɾ=S¨0ʽ=>=Tɾ=R@Ԛ<0>=Tɾ=R@Ԛ<¨0A=Tɾ=S0=Tɾ=S86¨0ʽ==Tɾ=C6=Tɾ=C6AANTAT#!0=T6=T6AATA¨0A=Tɾ=S0=Tɾ=S20¨0ʽ=Dٟ@ޢ7C7C=Tɾ=CѲ/DT("0D>=Ѳ/DT¨0A=Tɾ=S0=Tɾ=S/-=Tɾ=>¨0ʽ=ʇXQޢ0ʇXQޢΉX˛5¨0A/TD¨0A/A4J53-ʇXDQ=>ΉX˛50/TD0/AJ¨0A=Tɾ=S0=Tɾ=S¨0ʽ=>=Tɾ=R@Ԛ<0>=Tɾ=R@Ԛ<¨0A=Tɾ=S0=Tɾ=S)'¨0A=Tɾ=W9L/͒A4T0=Tɾ=W/͒A4¨0A=Tɾ=S0=Tɾ=S20¨0ʽ=Dٟ@ޢ=Tɾ=CѲ/DT("0D>=Ѳ/DT¨0A=Tɾ=S0=Tɾ=S,*¨0A=Tɾ=6=T3OTDA4&$0=Tɾ=6=T3OTDA¨0A=Tɾ=S0=Tɾ=S53¨0A=Tɾ=CDA4AATUʡH9A/,*0=T˾=DAAATUʡH9A/¨0A=Tɾ=S0=Tɾ=S ¨0A=Tɾ=68,T0=Tɾ=6,T¨0A=Tɾ=S0=Tɾ=S20¨0A=Tɾ=SN.W0AT("0=T̗<.0AT¨0A=Tɾ=S0=Tɾ=SDBS48¨0ʽ=P=Tɾ=Cϛ)ϛ)))QTɾ=C98KT/-ФO8-=ϛ)ϛ)))Q98KT¨0A=Tɾ=S0=Tɾ=S¨0ʽ=>=Tɾ=R@Ԛ<0>=Tɾ=R@Ԛ<¨0A=Tɾ=S0=Tɾ=S#!E=¨0ʽ==Tɾ=.8?̛<=0=Tɾ=.?¨0A=Tɾ=S0=Tɾ=S20¨0ʽ=Dٟ@ޢ=Tɾ=CѲ/DT("0D>=Ѳ/DT¨0A=Tɾ=S0=Tɾ=S ¨0AD>=Tɾ=C@K0D>=@K¨0A=Tɾ=S0=Tɾ=S53¨0A=Tɾ=CDA4AATUʡH9A/,*0=T˾=DAAATUʡH9A/¨0A=Tɾ=S0=Tɾ=S=Tɾ=C6¨0AT=T60T  S1/W/߹-CʡH97Qן9ں-ʡH97Qן9  S1/W/)'//Æ.J:NLJS1/GB //Æ.J:NLW/G  S1/W/S1/B;AATW/BAAT  S1/W/ S1D?  WD?  S1/W/߹-CʡH97Qן9ں-ʡH97Qן9  S1/W/53S1/E70C/77S1/AB#!W/E7W/ABw  S1/W/S1/B;AATW/BAAT  S1/W/ABAB  S1/W/߹-CʡH97Qן9ں-ʡH97Qן9  S1/W/209J/?ſQ5ߕJCMCRURН?QT)'9J/?ſQ5ߕJCMCRQ  S1/W/S1/B;AATW/BAAT  S1/W/>9@VWF?Wַ;;E-S1Н?>AT20B>9@VWF?Wַ;;E-W?A  S1/W/߹-CʡH97Qן9ں-ʡH97Qן9  S1/W/S1/IA6W/IA6EG?>-EG?>-/-G?R142TN5=7@P:J#!G?142T5=@PJEG?>-EG?>-)'G?>-PL΅/Bڶ>SJ@Ԛ< G?>-΅/BSJ@Ԛ<EG?>-EG?>- G?T4 G?T4EG?>-EG?>- GW-TG*EG?>-EG?>-E,G?/-"D:EG?/-"DEG?>-EG?>-20G?>-G64?9ʉ5;˫N¶;PNT,*G?>-G4?9ʉ5;ΫNPNTEG?>-EG?>-86G?>-22΅/8B?¶7ģCCщQDPDA,*G?>-΅/8BNCщQDPDAEG?>-EG?>-20NE,G?>-?¶7ʡHWB:ģCO#!NEG?>-NW:CEG?>-EG?>-&$G?>-8G?>-4-2#!G?>-G?>-4-2EG?>-EG?>- G?>-/.BʭBѡ8¶;G?>-/BʭBѡ8¶;>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>,*K=9:ׄ9?DϪJP>؞C@@@ =:ׄ9?DϪJPρ>@@>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>>؞C19Tׄ9?@Ԛ<ρ>19Tׄ9?@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>Ư8Hׄ9?>؞C@@@Ư8Hׄ9?ρ>@@>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1> ׄ9?=7 ׄ9?=7>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>&$>؞Cׄ9?6R1TDPDA ρ>ׄ9?61TDPDA>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>DB=>19Tׄ9?ׄ9B9>>Ư8I>؞Cб †M86><=>19Tׄ9?ׄ9B9>>Ư8Iρ>б †M8>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>#!U—P۴2>MN,BMСGTUP>MNBMСGT>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>hf1 TSׄ9?AJ9JOT,Q SF>T9P,1R>؞Cб :6)ʪ_]1 TSׄ9?A˱9OT,Q SF>T9P,1Rρ>б :6)ʪ>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>;9>؞Cׄ9?B:9ڶ>ST=O>I,TJ@Ԛ<53ρ>ׄ9?B:9ST=O>I,TJ@Ԛ<>ׄ9?ϪJJ1>>ׄ9?ϪJJ1>A?:91STׄ9?9M,.T>BϪJ9>؞C@@@;9:91STׄ9?9M,.T>BϪJ9ρ>@@#!&6D>49@P>2#!&6D>49@P>26942A7B694+#!&6D>49@P>2#!&6D>49@P>2#!C14>@D2>@Ԛ<#!C14>@D2>@Ԛ<#!&6D>49@P>2#!&6D>49@P>2;9&FD6D249@D2>1XJVV53&FD6D249@D2>1JV#!&6D>49@P>2#!&6D>49@P>2SQ&L492IщQ—P=&1X4BD71XG:&T6GEީ L492IщQ=&14BD71G:&T6#!&6D>49@P>2#!&6D>49@P>26942A7B694+#!&6D>49@P>2#!&6D>49@P>2 Cڜ>42K.B@KCڜ>4K.@K#!&6D>49@P>2#!&6D>49@P>2;9&FD6D249@D2>1XJVV53&FD6D249@D2>1JV#!&6D>49@P>2#!&6D>49@P>2;961&6P>429Q1@&@@@2061&6P>4ƋQ1@&@@#!&6D>49@P>2#!&6D>49@P>26942A7B694+#!&6D>49@P>2#!&6D>49@P>2#! 6E424ڜ>2AЍ6E44ڜ>2A#!&6D>49@P>2#!&6D>49@P>2;9&FD6D249@D2>1XJVV53&FD6D249@D2>1JV#!&6D>49@P>2#!&6D>49@P>26>42EX@N6>4E@N#!&6D>49@P>2#!&6D>49@P>26942A7B694+#!&6D>49@P>2#!&6D>49@P>2>4ڜ>F5@Ԛ<>4ڜ>F5@Ԛ<#!&6D>49@P>2#!&6D>49@P>2;9&FD6D249@D2>1XJVV53&FD6D249@D2>1JV#!&6D>49@P>2#!&6D>49@P>2;9CRW6?۱URT:R&6D>62486CRW6?۱URT:R&6D>D4UEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@8MKUE;6֊2>W6,ϨH@FL6,B,TE;>A7BDBǠ2;6֊2>W6,ϨH@FL6,B,TE;>+UEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@853DUE>W@P21HSV9;W@Ԛ<,*DǠ2>W@71HSV9=@Ԛ<UEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@886UE2V=L296T=ȟN2DS>؞CԚ<20Ǡ22V=L296T=ȟN2DSρ>Ԛ<UEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@8JH>؞CXAN;WSV626DUE=WL6,6@Ԛ<A?ρ>XAN=SV626DǠ2=WL6,6@Ԛ<UEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@8PN9;2UEDSV16=GB<6>؞Cб :6)ʪDB9;2Ǡ2DSV16=G<ρ>б :6)ʪUEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@8DBD7>UE;ASVϨH,ϨHW;62>T6@Ԛ<>Ǡ2;ASV؋8ϨHW;62>T6@Ԛ<UEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@8;9>UE;FWOT7,>A8SVDPDA20>Ǡ2;FW37,>ASVDPDAUEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@886UE616=V6>6L=>؞C@@@/-Ǡ2616=V6>6L=ρ>@@UEϨHWV@8Ǡ2ϨHWV@8>V;>Wٟ@2>6@2>6>؞C@@@53Ǡ2>V;>Wٟ@2>6@2>6ρ>@@UEϨHWV@8Ǡ2ϨHWV@8)'>؞CUE;V626DSDA#!ρ>Ǡ2;V626DSDA# UDT #UDT@@@# UDT #UDTJHR6># HL6M9ٟ@UVUӁGDܤK8<# @@@;9R6>#HL6M@UVUӁGA8<#@@# UDT #UDT@@@# UDT #UDT UUD,A#%@@@UUD,A#@@# UDT #UDT@@@# UDT #UDT20UN.T5ƛK,6I16#%@@@,*UN.T5ƛK,6I16#@@# UDT #UDT@@@# UDT #UDTDB9Ԛ<6ϪJ># >Q@D9DFҾWSܤK# @@@,*16>#>Q@9FҾWS#@@# UDT #UDT@@@# UDT #UDT20#%>UӁGD9D.7>#%@@@#!#>UӁG9.7>#@@# UDT #UDT@@@# UDT #UDTMKR, 9S=ɵOʡH9B>UUD=UL9TM# @@@;9R, SɵO9>UUD=UL9TM#@@# UDT #UDT@@@# UDT #UDT&$DПC,UӁGDܤK# @@@DПC,UӁGA#@@# UDT #UDT@@@# UDT #UDT&$E>F# UDK0@@@ E>F#UDK0@@# UDT #UDT@@@# UDT #UDT,*# UDK-щQRQ# @@@#!#UDK-щQRQ#@@T;JC;XH-T;C;XH-DB;JIٟ@FXH-EDܤKV3ET)ʪ86;@FحXH-EDV3ET)ʪT;JC;XH-T;C;XH-YW9T:B7ٟ@)X-;J%)ѾCTO7%T87FD0A?9T:B7ٟ@X-;)ѾCTO7%T8F0T;JC;XH-T;C;XH-86;J85SXH-8E6O@Ԛ<&$;85SXH-8E6@Ԛ<T;JC;XH-T;C;XH-&$5M;J.B7H1R@Ԛ<#!5M;.B7H1R@Ԛ<T;JC;XH-T;C;XH-><;J85SXH-8E6ODSDA,*;85SXH-8E6DSDAT;JC;XH-T;C;XH-PN;JA5DN8R8EBS;76XH-NFK,DPDAA?;A5DNRNBS;5XH-NF,DPDAT;JC;XH-T;C;XH-DB;JIٟ@FXH-EDܤKV3ET)ʪ86;@FحXH-EDV3ET)ʪT;JC;XH-T;C;XH-_]5M;JDCٟ@F26K:X-RB9S8@D69>ҾWD,DPDAPN5M;D@F26K:X-RB9S8@6ߖ>D,DPDAT;JC;XH-T;C;XH-86;J85SXH-8E6O@Ԛ<&$;85SXH-8E6@Ԛ<T;JC;XH-T;C;XH-;J١-ܤKS/@N;١-ܤKS@NT;JC;XH-T;C;XH-><;J85SXH-8E6ODSDA,*;85SXH-8E6DSDAT;JC;XH-T;C;XH-866C; X-NWHT;J)ʪ/-6C;X-NWH;)ʪ/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7GE5DR93A7.8RAƛK2TH?T!HA†M86A?5DR93A78RAƛK2TH?T!HA†M8/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7865RAб D93A7.8RA!@@@205RAб D93A78RA!@@/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7><ʡH9BR93AV7.RAϪJHA@@@209R93AV7RAϪJHA@@/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7;9!HA5DR9L9BR7.RϪJ,@Ԛ<86!HA5DR9L9BR7RϪJ,@Ԛ</-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7ki5D93AJR7.BRFD3Bٟ@75Dٟ@7>HAKADP!HA†M86ec5D93AJR7BRFD3Bٟ@75Dٟ@7>HAKADP!HA†M8/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7b`5DR9L9DR7.3>3RQKUDA-D3D!HA†M86\Z5DR9L9DR73>3RQKUDA-D3D!HA†M8/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7/-5D9L9D7.RƭI!@@@)'5D9L9D7RƭI!@@/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7hf95L9DRG7.3AWDEWKѾCHT7HA7:6)ʪVT95L9DRG73AWDEWK5:6)ʪ/-5DR9D93A8RR7.,*5DR9D93A8RR720R9D93AR7.6ǽ=DPDA,*R9D93AR76DPDA/-5DR9D93A8RR7.,*5DR9D93A8RR7><ϪJAHARA9D93ARADϪJ7.K5;9ϪJAHARA9D93ARADϪJ7K5.6O<-  .O</-<-N<-%%O܊7<0>T&$<-N<%%O܊70>.6O<-  .O<DBW<-7R:.6O/1EPٟ@9ٟ@MBʔ77>P/-W<7R:1EP9MBݔ7>P.6O<-  .O</-<-N<-%%O܊7<0>T&$<-N<%%O܊70>.6O<-  .O<R-R-.6O<-  .O</-<-N<-%%O܊7<0>T&$<-N<%%O܊70>.6O<-  .O<539.T&$<-N<%%O܊70>.6O<-  .O<20.6O37;0G .6.6T&$<-N<%%O܊70>.6O<-  .O<.6O8I6T.O8I6T.6O<-  .O</-<-N<-%%O܊7<0>T&$<-N<%%O܊70>.6O<-  .O<.6OTK6.OTK6.6O<-  .O</-<-N<-%%O܊7<0>T&$<-N<%%O܊70>.6O<-  .O<20 .6O/EED.6O/EE"W#! ED.6O/E"W.6O<-  .O</-<-N<-%%O܊7<0>T&$<-N<%%O܊70>.6O<-  .O<DBW<-7R:.6O/1EPٟ@9ٟ@MBDSDA20W<7R:1EP9MBDSDA.6O<-  .O</-<-N<-%%O܊7<0>T&$<-N<%%O܊70>.6O<-  .O<>4M54Н?A3AT UD1ձM4M54AA  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1,*UD1CT%8>9S1ME;)'UD1CT%8>9S1M;  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1 UD1U3ʡHWRDU UD1U3ʡHWRDU  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1 UD1M>4M5G3UD1ձM4M5G  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1/-U8JD1UHAʡH RGM=T,*U8JD1UHAʡH RGM=  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD120UD1FBLL¶7JѾC4W,M4;#!UD1BNJCW14;  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1&$UD1ʡHR:DGAʈO>6#!UD1ʡHR:DGA>6  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1)'UȂ3.1PD>J١-- AB&$UȂ3.1P>J١-- AB  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1SQ>KU9D1M.OGUʡH9>9U199>U69IT@Ԛ<DB>KU9D1M.ǼOU>9U19>U6IT@Ԛ<  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1#!UD19ҧK1B—PϪJ>D UD19ҧK1BJ>D  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1#!UD19ҧK1B—PϪJ>D UD19ҧK1BJ>D  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1)'UȂ31MC—PQ>DԃPEAB U͂3M—PQ>DUAB  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1)'UD1—PRޚ6HU49QÐWB&$UD1Rޚ6HU49QÐWB  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1&$UD1W>β7UщQDG@K UD1W>ƴ7щQD@K  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1UD1MʡHRHUUD1MʡHRHU  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1/-UD1@1GM3̛<:9T!#!UD1@1GM3:TW  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1UD1>NVNFUD1>NVNF  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1,*UD1MʡHWRHUJ6J7,*UD1MʡHWRHUJ6J7  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1JHUBMBUD19ҧK1B—PϪJ>DS1UBDBN@Ԛ<>Dū1UDBN@Ԛ<  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD120AʋMQU,D1U>4,3T5=T&$AQU,D1U>435=  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1534UD1M/5S7H47 N H)'4UD1M5S N H  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1)'UD1U/VӲU>/=WQT UD1*ӲU>/=WQ  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1DBUD1UʡHWR6U>G=SU/T()!/-UD1UʡHWR6U>GSUT  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1#!UD1M>U.61D3̛<2/ќ6HQT&$UD1KU>1D3/HQ  UD1  UD153UD1ۓRD;16ǁRK3K"'!)'UD1ۓRD;16ǁRK3K  UD1  UD1DBUD1ۓR4H5B—PϪJ>D3KT((!)'UD1ۓR4H5BJ>D3KEԼOR@C/8EC/8#!HԼOR@C/8>ٟ@@Ԛ<HC/8>ٟ@@Ԛ<EԼOR@C/8EC/8qoHԼOR@C/D8>ٟ@8 P@NLΊ;J@>@BΊ;RP@NLΊ;J@>DOָ:?ThfHC/D8>ٟ@8 P@NLΊ;J@>@BΊ;RP@NLΊ;J@>Dָ:?TEԼOR@C/8EC/8ԼO@K@K OK@KEԼOR@C/8EC/8nlHԼOR@C/8>ٟ@8PHۇLBDCɕH5ǟVGRPHGLBD=ږH5DOָ:?TecHC/8>ٟ@8PHۇLBDCɕH5ǟVGRPHGLBD=ږH5Dָ:?TEԼOR@C/8EC/8><ԼOR@E>1>THIԓ4C/8>ٟ@A7B,*E>1>TC/8>ٟ@+EԼOR@C/8EC/8}{M:İU;ԼOR@?R5BD5@E7K՞RWKD5C/8>ٟ@>:T(!K;86_]:?R5BD5@EG՞RʼGD5C/8>ٟ@>:TK;86EԼOR@C/8EC/8#!HԼOR@C/8>ٟ@@Ԛ<HC/8>ٟ@@Ԛ<EԼOR@C/8EC/8#!ß<:Dć?ԼO@C/8>ٟ@ ß<:Dć?OC/8>ٟ@EԼOR@C/8EC/8ԼO@K@K OK@KEԼOR@C/8EC/8,*HԼOR@NܒM̺2C/D8>ٟ@#!HNMC/D8>ٟ@EԼOR@C/8EC/8><ԼOR@E>1>THIԓ4C/8>ٟ@A7B,*E>1>TC/8>ٟ@+EԼOR@C/8EC/853HԼOR@C/D8>ٟ@RE@>DW/-HC/D8>ٟ@RE@>DWEԼOR@C/8EC/8#!HԼOR@C/8>ٟ@@Ԛ<HC/8>ٟ@@Ԛ<EԼOR@C/8EC/8&$EԼO@C/8>ٟ@DPDA#!EOC/8>ٟ@DPDAEԼOR@C/8EC/8ԼO@K@K OK@KEԼOR@C/8EC/8HԼOR@C/8>ٟ@D>AIH!D>HIH:@>Hٟ@/ў7:@՞R.ٟ@/ў79OEEXqoHC/8>ٟ@D>AIH!D>HIH:>Hٟ@/ў7:>ٟ@/ў79EEXEԼOR@C/8EC/8><ԼOR@E>1>THIԓ4C/8>ٟ@A7B,*E>1>TC/8>ٟ@+EԼOR@C/8EC/8;9HԼO@C/8>ٟ@H2992653HOC/8>ٟ@H2926EԼOR@C/8EC/8#!HԼOR@C/8>ٟ@@Ԛ<HC/8>ٟ@@Ԛ<EԼOR@C/8EC/820H?RSHIԓ4>ԼO@‹7C/8>ٟ@)'H?RS>O‹7C/8>ٟ@ \ No newline at end of file diff --git a/paddle/trainer/tests/gen_proto_data.py b/paddle/trainer/tests/gen_proto_data.py deleted file mode 100644 index 8cc6d44673b9f992c28ae95cc06db5ea5aca0642..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/gen_proto_data.py +++ /dev/null @@ -1,279 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from cStringIO import StringIO - -import paddle.proto.DataFormat_pb2 as DataFormat -from google.protobuf.internal.encoder import _EncodeVarint - -import logging -import pprint - -logging.basicConfig( - format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', ) -logger = logging.getLogger('paddle') -logger.setLevel(logging.INFO) - -OOV_POLICY_IGNORE = 0 -OOV_POLICY_USE = 1 -OOV_POLICY_ERROR = 2 - -num_original_columns = 3 - -# Feature combination patterns. -# [[-1,0], [0,0]] means previous token at column 0 and current token at -# column 0 are combined as one feature. -patterns = [ - [[-2, 0]], - [[-1, 0]], - [[0, 0]], - [[1, 0]], - [[2, 0]], - [[-1, 0], [0, 0]], - [[0, 0], [1, 0]], - [[-2, 1]], - [[-1, 1]], - [[0, 1]], - [[1, 1]], - [[2, 1]], - [[-2, 1], [-1, 1]], - [[-1, 1], [0, 1]], - [[0, 1], [1, 1]], - [[1, 1], [2, 1]], - [[-2, 1], [-1, 1], [0, 1]], - [[-1, 1], [0, 1], [1, 1]], - [[0, 1], [1, 1], [2, 1]], -] - - -def make_features(sequence): - length = len(sequence) - num_features = len(sequence[0]) - - def get_features(pos): - if pos < 0: - return ['#B%s' % -pos] * num_features - if pos >= length: - return ['#E%s' % (pos - length + 1)] * num_features - return sequence[pos] - - for i in xrange(length): - for pattern in patterns: - fname = '/'.join([get_features(i + pos)[f] for pos, f in pattern]) - sequence[i].append(fname) - - -''' -Source file format: -Each line is for one timestep. The features are separated by space. -An empty line indicates end of a sequence. - -cutoff: a list of numbers. If count of a feature is smaller than this, - it will be ignored. -if oov_policy[i] is OOV_POLICY_USE, id 0 is reserved for OOV features of -i-th column. - -return a list of dict for each column -''' - - -def create_dictionaries(filename, cutoff, oov_policy): - def add_to_dict(sequence, dicts): - num_features = len(dicts) - for features in sequence: - l = len(features) - assert l == num_features, "Wrong number of features " + line - for i in xrange(l): - if features[i] in dicts[i]: - dicts[i][features[i]] += 1 - else: - dicts[i][features[i]] = 1 - - num_features = len(cutoff) - dicts = [] - for i in xrange(num_features): - dicts.append(dict()) - - f = open(filename, 'rb') - - sequence = [] - - for line in f: - line = line.strip() - if not line: - make_features(sequence) - add_to_dict(sequence, dicts) - sequence = [] - continue - features = line.split(' ') - sequence.append(features) - - for i in xrange(num_features): - dct = dicts[i] - n = 1 if oov_policy[i] == OOV_POLICY_USE else 0 - todo = [] - for k, v in dct.iteritems(): - if v < cutoff[i]: - todo.append(k) - else: - dct[k] = n - n += 1 - - if oov_policy[i] == OOV_POLICY_USE: - # placeholder so that len(dct) will be the number of features - # including OOV - dct['#OOV#'] = 0 - - logger.info('column %d dict size=%d, ignored %d' % (i, n, len(todo))) - for k in todo: - del dct[k] - - f.close() - return dicts - - -def encode_varint(v): - out = StringIO() - _EncodeVarint(out.write, v) - return out.getvalue() - - -def write_proto(file, message): - s = message.SerializeToString() - packed_len = encode_varint(len(s)) - file.write(packed_len + s) - - -''' -if oov_policy[i] == OOV_POLICY_USE, features in i-th column which are not -existed in dicts[i] will be assigned to id 0. -if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist -in dicts[i]. -''' - - -def gen_proto_file(input_file, dicts, oov_policy, output_file): - def write_sequence(out, sequence): - num_features = len(dicts) - is_beginning = True - for features in sequence: - assert len(features) == num_features, \ - "Wrong number of features: " + line - sample = DataFormat.DataSample() - for i in xrange(num_original_columns): - id = dicts[i].get(features[i], -1) - if id != -1: - sample.id_slots.append(id) - elif oov_policy[i] == OOV_POLICY_IGNORE: - sample.id_slots.append(0xffffffff) - elif oov_policy[i] == OOV_POLICY_ERROR: - logger.fatal("Unknown token: %s" % features[i]) - else: - sample.id_slots.append(0) - - if patterns: - dim = 0 - vec = sample.vector_slots.add() - for i in xrange(num_original_columns, num_features): - id = dicts[i].get(features[i], -1) - if id != -1: - vec.ids.append(dim + id) - elif oov_policy[i] == OOV_POLICY_IGNORE: - pass - elif oov_policy[i] == OOV_POLICY_ERROR: - logger.fatal("Unknown token: %s" % features[i]) - else: - vec.ids.append(dim + 0) - - dim += len(dicts[i]) - - sample.is_beginning = is_beginning - is_beginning = False - write_proto(out, sample) - - num_features = len(dicts) - f = open(input_file, 'rb') - out = open(output_file, 'wb') - - header = DataFormat.DataHeader() - if patterns: - slot_def = header.slot_defs.add() - slot_def.type = DataFormat.SlotDef.VECTOR_SPARSE_NON_VALUE - slot_def.dim = sum( - [len(dicts[i]) for i in xrange(num_original_columns, len(dicts))]) - logger.info("feature_dim=%s" % slot_def.dim) - - for i in xrange(num_original_columns): - slot_def = header.slot_defs.add() - slot_def.type = DataFormat.SlotDef.INDEX - slot_def.dim = len(dicts[i]) - - write_proto(out, header) - - num_sequences = 0 - sequence = [] - for line in f: - line = line.strip() - if not line: - make_features(sequence) - write_sequence(out, sequence) - sequence = [] - num_sequences += 1 - continue - features = line.split(' ') - sequence.append(features) - - f.close() - out.close() - - logger.info("num_sequences=%s" % num_sequences) - - -dict2 = { - 'B-ADJP': 0, - 'I-ADJP': 1, - 'B-ADVP': 2, - 'I-ADVP': 3, - 'B-CONJP': 4, - 'I-CONJP': 5, - 'B-INTJ': 6, - 'I-INTJ': 7, - 'B-LST': 8, - 'I-LST': 9, - 'B-NP': 10, - 'I-NP': 11, - 'B-PP': 12, - 'I-PP': 13, - 'B-PRT': 14, - 'I-PRT': 15, - 'B-SBAR': 16, - 'I-SBAR': 17, - 'B-UCP': 18, - 'I-UCP': 19, - 'B-VP': 20, - 'I-VP': 21, - 'O': 22 -} - -if __name__ == '__main__': - cutoff = [3, 1, 0] - cutoff += [3] * len(patterns) - oov_policy = [OOV_POLICY_IGNORE, OOV_POLICY_ERROR, OOV_POLICY_ERROR] - oov_policy += [OOV_POLICY_IGNORE] * len(patterns) - dicts = create_dictionaries('trainer/tests/train.txt', cutoff, oov_policy) - dicts[2] = dict2 - gen_proto_file('trainer/tests/train.txt', dicts, oov_policy, - 'trainer/tests/train_proto.bin') - gen_proto_file('trainer/tests/test.txt', dicts, oov_policy, - 'trainer/tests/test_proto.bin') diff --git a/paddle/trainer/tests/test.txt b/paddle/trainer/tests/test.txt deleted file mode 100644 index 3ad503b34f2e1a84c632d0894f180b5cf9ac550a..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/test.txt +++ /dev/null @@ -1,1000 +0,0 @@ -Confidence NN B-NP -in IN B-PP -the DT B-NP -pound NN I-NP -is VBZ B-VP -widely RB I-VP -expected VBN I-VP -to TO I-VP -take VB I-VP -another DT B-NP -sharp JJ I-NP -dive NN I-NP -if IN B-SBAR -trade NN B-NP -figures NNS I-NP -for IN B-PP -September NNP B-NP -, , O -due JJ B-ADJP -for IN B-PP -release NN B-NP -tomorrow NN B-NP -, , O -fail VB B-VP -to TO I-VP -show VB I-VP -a DT B-NP -substantial JJ I-NP -improvement NN I-NP -from IN B-PP -July NNP B-NP -and CC I-NP -August NNP I-NP -'s POS B-NP -near-record JJ I-NP -deficits NNS I-NP -. . O - -Chancellor NNP O -of IN B-PP -the DT B-NP -Exchequer NNP I-NP -Nigel NNP B-NP -Lawson NNP I-NP -'s POS B-NP -restated VBN I-NP -commitment NN I-NP -to TO B-PP -a DT B-NP -firm NN I-NP -monetary JJ I-NP -policy NN I-NP -has VBZ B-VP -helped VBN I-VP -to TO I-VP -prevent VB I-VP -a DT B-NP -freefall NN I-NP -in IN B-PP -sterling NN B-NP -over IN B-PP -the DT B-NP -past JJ I-NP -week NN I-NP -. . O - -But CC O -analysts NNS B-NP -reckon VBP B-VP -underlying VBG B-NP -support NN I-NP -for IN B-PP -sterling NN B-NP -has VBZ B-VP -been VBN I-VP -eroded VBN I-VP -by IN B-PP -the DT B-NP -chancellor NN I-NP -'s POS B-NP -failure NN I-NP -to TO B-VP -announce VB I-VP -any DT B-NP -new JJ I-NP -policy NN I-NP -measures NNS I-NP -in IN B-PP -his PRP$ B-NP -Mansion NNP I-NP -House NNP I-NP -speech NN I-NP -last JJ B-NP -Thursday NNP I-NP -. . O - -This DT B-NP -has VBZ B-VP -increased VBN I-VP -the DT B-NP -risk NN I-NP -of IN B-PP -the DT B-NP -government NN I-NP -being VBG B-VP -forced VBN I-VP -to TO I-VP -increase VB I-VP -base NN B-NP -rates NNS I-NP -to TO B-PP -16 CD B-NP -% NN I-NP -from IN B-PP -their PRP$ B-NP -current JJ I-NP -15 CD I-NP -% NN I-NP -level NN I-NP -to TO B-VP -defend VB I-VP -the DT B-NP -pound NN I-NP -, , O -economists NNS B-NP -and CC O -foreign JJ B-NP -exchange NN I-NP -market NN I-NP -analysts NNS I-NP -say VBP B-VP -. . O - -`` `` O -The DT B-NP -risks NNS I-NP -for IN B-PP -sterling NN B-NP -of IN B-PP -a DT B-NP -bad JJ I-NP -trade NN I-NP -figure NN I-NP -are VBP B-VP -very RB B-ADVP -heavily RB I-ADVP -on IN B-PP -the DT B-NP -down JJ I-NP -side NN I-NP -, , O -'' '' O -said VBD B-VP -Chris NNP B-NP -Dillow NNP I-NP -, , O -senior JJ B-NP -U.K. NNP I-NP -economist NN I-NP -at IN B-PP -Nomura NNP B-NP -Research NNP I-NP -Institute NNP I-NP -. . O - -`` `` O -If IN B-SBAR -there EX B-NP -is VBZ B-VP -another DT B-NP -bad JJ I-NP -trade NN I-NP -number NN I-NP -, , O -there EX B-NP -could MD B-VP -be VB I-VP -an DT B-NP -awful JJ I-NP -lot NN I-NP -of IN B-PP -pressure NN B-NP -, , O -'' '' O -noted VBD B-VP -Simon NNP B-NP -Briscoe NNP I-NP -, , O -U.K. NNP B-NP -economist NN I-NP -for IN B-PP -Midland NNP B-NP -Montagu NNP I-NP -, , O -a DT B-NP -unit NN I-NP -of IN B-PP -Midland NNP B-NP -Bank NNP I-NP -PLC NNP I-NP -. . O - -Forecasts NNS B-NP -for IN B-PP -the DT B-NP -trade NN I-NP -figures NNS I-NP -range VBP B-VP -widely RB B-ADVP -, , O -but CC O -few JJ B-NP -economists NNS I-NP -expect VBP B-VP -the DT B-NP -data NNS I-NP -to TO B-VP -show VB I-VP -a DT B-NP -very RB I-NP -marked VBN I-NP -improvement NN I-NP -from IN B-PP -the DT O -# # O -2 CD O -billion CD O --LRB- ( O -$ $ B-ADJP -3.2 CD O -billion CD O --RRB- ) O -deficit NN B-NP -in IN B-PP -the DT B-NP -current JJ I-NP -account NN I-NP -reported VBD B-VP -for IN B-PP -August NNP B-NP -. . O - -The DT B-NP -August NNP I-NP -deficit NN I-NP -and CC O -the DT B-NP -# # I-NP -2.2 CD I-NP -billion CD I-NP -gap NN I-NP -registered VBN B-VP -in IN B-PP -July NNP B-NP -are VBP B-VP -topped VBN I-VP -only RB B-ADVP -by IN B-PP -the DT B-NP -# # I-NP -2.3 CD I-NP -billion CD I-NP -deficit NN I-NP -of IN B-PP -October NNP B-NP -1988 CD I-NP -. . O - -Sanjay NNP B-NP -Joshi NNP I-NP -, , O -European JJ B-NP -economist NN I-NP -at IN B-PP -Baring NNP B-NP -Brothers NNPS I-NP -& CC I-NP -Co. NNP I-NP -, , O -said VBD B-VP -there EX B-NP -is VBZ B-VP -no DT B-NP -sign NN I-NP -that IN B-SBAR -Britain NNP B-NP -'s POS B-NP -manufacturing NN I-NP -industry NN I-NP -is VBZ B-VP -transforming VBG I-VP -itself PRP B-NP -to TO B-VP -boost VB I-VP -exports NNS B-NP -. . O - -At IN B-PP -the DT B-NP -same JJ I-NP -time NN I-NP -, , O -he PRP B-NP -remains VBZ B-VP -fairly RB B-ADJP -pessimistic JJ I-ADJP -about IN B-PP -the DT B-NP -outlook NN I-NP -for IN B-PP -imports NNS B-NP -, , O -given VBN B-PP -continued VBD B-NP -high JJ I-NP -consumer NN I-NP -and CC I-NP -capital NN I-NP -goods NNS I-NP -inflows NNS I-NP -. . O - -He PRP B-NP -reckons VBZ B-VP -the DT B-NP -current JJ I-NP -account NN I-NP -deficit NN I-NP -will MD B-VP -narrow VB I-VP -to TO B-PP -only RB B-NP -# # I-NP -1.8 CD I-NP -billion CD I-NP -in IN B-PP -September NNP B-NP -. . O - -However RB B-ADVP -, , O -Mr. NNP B-NP -Dillow NNP I-NP -said VBD B-VP -he PRP B-NP -believes VBZ B-VP -that IN B-SBAR -a DT B-NP -reduction NN I-NP -in IN B-PP -raw JJ B-NP -material NN I-NP -stockbuilding VBG I-NP -by IN B-PP -industry NN B-NP -could MD B-VP -lead VB I-VP -to TO B-PP -a DT B-NP -sharp JJ I-NP -drop NN I-NP -in IN B-PP -imports NNS B-NP -. . O - -Combined VBN B-PP -with IN B-PP -at IN B-ADVP -least JJS I-ADVP -some DT B-NP -rebound NN I-NP -in IN B-PP -exports NNS B-NP -after IN B-PP -August NNP B-NP -'s POS B-NP -unexpected JJ I-NP -decline NN I-NP -, , O -the DT B-NP -deficit NN I-NP -could MD B-VP -narrow VB I-VP -to TO B-PP -as RB B-NP -little JJ I-NP -as IN I-NP -# # I-NP -1.3 CD I-NP -billion CD I-NP -. . O - -Mr. NNP B-NP -Briscoe NNP I-NP -, , O -who WP B-NP -also RB B-ADVP -forecasts VBZ B-VP -a DT B-NP -# # I-NP -1.3 CD I-NP -billion CD I-NP -current JJ I-NP -account NN I-NP -gap NN I-NP -, , O -warns VBZ B-VP -that IN B-SBAR -even RB B-SBAR -if IN I-SBAR -the DT B-NP -trade NN I-NP -figures NNS I-NP -are VBP B-VP -bullish JJ B-ADJP -for IN B-PP -sterling NN B-NP -, , O -the DT B-NP -currency NN I-NP -wo MD B-VP -n't RB I-VP -advance VB I-VP -much JJ B-NP -because IN B-SBAR -investors NNS B-NP -will MD B-VP -want VB I-VP -to TO I-VP -see VB I-VP -further JJ B-NP -evidence NN I-NP -of IN B-PP -the DT B-NP -turnaround NN I-NP -before IN B-PP -adjusting VBG B-VP -positions NNS B-NP -. . O - -Nevertheless RB B-ADVP -, , O -he PRP B-NP -noted VBD B-VP -, , O -`` `` O -No DT B-NP -one PRP I-NP -will MD B-VP -want VB I-VP -to TO I-VP -go VB I-VP -into IN B-PP -the DT B-NP -trade NN I-NP -figures NNS I-NP -without IN B-PP -a DT B-NP -flat JJ I-NP -position NN I-NP -'' '' O -in IN B-PP -the DT B-NP -pound NN I-NP -. . O - -Meanwhile RB B-ADVP -, , O -overall JJ B-NP -evidence NN I-NP -on IN B-PP -the DT B-NP -economy NN I-NP -remains VBZ B-VP -fairly RB B-ADJP -clouded VBN I-ADJP -. . O - -In IN B-PP -his PRP$ B-NP -Mansion NNP I-NP -House NNP I-NP -speech NN I-NP -, , O -Mr. NNP B-NP -Lawson NNP I-NP -warned VBD B-VP -that IN B-SBAR -a DT B-NP -further JJ I-NP -slowdown NN I-NP -can MD B-VP -be VB I-VP -expected VBN I-VP -as IN B-SBAR -the DT B-NP -impact NN I-NP -of IN B-PP -the DT B-NP -last JJ I-NP -rise NN I-NP -in IN B-PP -interest NN B-NP -rates NNS I-NP -earlier RBR B-NP -this DT I-NP -month NN I-NP -takes VBZ B-VP -effect NN B-NP -. . O - -U.K. JJ B-NP -base NN I-NP -rates NNS I-NP -are VBP B-VP -at IN B-PP -their PRP$ B-NP -highest JJS I-NP -level NN I-NP -in IN B-PP -eight CD B-NP -years NNS I-NP -. . O - -But CC O -consumer NN B-NP -expenditure NN I-NP -data NNS I-NP -released VBD B-VP -Friday NNP B-NP -do VBP B-VP -n't RB I-VP -suggest VB I-VP -that IN B-SBAR -the DT B-NP -U.K. NNP I-NP -economy NN I-NP -is VBZ B-VP -slowing VBG I-VP -that DT B-ADVP -quickly RB I-ADVP -. . O - -The DT B-NP -figures NNS I-NP -show VBP B-VP -that DT O -spending NN B-NP -rose VBD B-VP -0.1 CD B-NP -% NN I-NP -in IN B-PP -the DT B-NP -third JJ I-NP -quarter NN I-NP -from IN B-PP -the DT B-NP -second JJ I-NP -quarter NN I-NP -and CC O -was VBD B-VP -up IN B-ADVP -3.8 CD B-NP -% NN I-NP -from IN B-PP -a DT B-NP -year NN I-NP -ago RB B-ADVP -. . O - -This DT B-NP -compares VBZ B-VP -with IN B-PP -a DT B-NP -1.6 CD I-NP -% NN I-NP -rise NN I-NP -in IN B-PP -the DT B-NP -second NN I-NP -from IN B-PP -the DT B-NP -first JJ I-NP -quarter NN I-NP -and CC O -a DT B-NP -5.4 CD I-NP -% NN I-NP -increase NN I-NP -from IN B-PP -the DT B-NP -second JJ I-NP -quarter NN I-NP -of IN B-PP -1988 CD B-NP -. . O - -Mr. NNP B-NP -Dillow NNP I-NP -said VBD B-VP -the DT B-NP -data NNS I-NP -show VBP B-VP -the DT B-NP -economy NN I-NP -`` `` O -is VBZ B-VP -still RB B-ADVP -quite RB B-ADJP -strong JJ I-ADJP -, , O -'' '' O -but CC O -suggestions NNS B-NP -that IN B-SBAR -much NN B-NP -of IN B-PP -the DT B-NP -spending NN I-NP -went VBD B-VP -on IN B-PP -services NNS B-NP -rather RB B-PP -than IN I-PP -consumer NN B-NP -goods NNS I-NP -should MD B-VP -reduce VB I-VP -fears NNS B-NP -of IN B-PP -more JJR B-NP -import NN I-NP -rises NNS I-NP -. . O - -Certainly RB B-ADVP -, , O -the DT B-NP -chancellor NN I-NP -has VBZ B-VP -made VBN I-VP -it PRP B-NP -clear JJ B-ADJP -that IN B-SBAR -he PRP B-NP -is VBZ B-VP -prepared VBN I-VP -to TO I-VP -increase VB I-VP -interest NN B-NP -rates NNS I-NP -again RB B-ADVP -if IN B-SBAR -necessary JJ B-ADJP -to TO B-VP -both DT I-VP -ensure VB I-VP -that IN B-SBAR -a DT B-NP -substantial JJ I-NP -slowdown NN I-NP -does VBZ B-VP -take VB I-VP -place NN B-NP -and CC O -that DT O -sterling NN B-NP -does VBZ B-VP -n't RB I-VP -decline VB I-VP -further JJ B-ADVP -. . O - -Thursday NNP B-NP -, , O -he PRP B-NP -reminded VBD B-VP -his PRP$ B-NP -audience NN I-NP -that IN B-SBAR -the DT B-NP -government NN I-NP -`` `` O -can MD B-VP -not RB I-VP -allow VB I-VP -the DT B-NP -necessary JJ I-NP -rigor NN I-NP -of IN B-PP -monetary JJ B-NP -policy NN I-NP -to TO B-VP -be VB I-VP -undermined VBN I-VP -by IN B-PP -exchange NN B-NP -rate NN I-NP -weakness NN I-NP -. . O -'' '' O - -Analysts NNS B-NP -agree VBP B-VP -there EX B-NP -is VBZ B-VP -little JJ B-NP -holding NN B-VP -sterling NN B-NP -firm NN B-ADJP -at IN B-PP -the DT B-NP -moment NN I-NP -other JJ B-ADJP -than IN B-PP -Mr. NNP B-NP -Lawson NNP I-NP -'s POS B-NP -promise NN I-NP -that IN B-SBAR -rates NNS B-NP -will MD B-VP -be VB I-VP -pushed VBN I-VP -higher JJR B-ADJP -if IN B-SBAR -necessary JJ B-ADJP -. . O - -And CC O -, , O -they PRP B-NP -warn VBP B-VP -, , O -any DT B-NP -further JJ I-NP -drop NN I-NP -in IN B-PP -the DT B-NP -government NN I-NP -'s POS B-NP -popularity NN I-NP -could MD B-VP -swiftly RB I-VP -make VB I-VP -this DT B-NP -promise NN I-NP -sound NN B-VP -hollow JJ B-ADJP -. . O - -Sterling NNP B-NP -was VBD B-VP -already RB I-VP -showing VBG I-VP -some DT B-NP -signs NNS I-NP -of IN B-PP -a DT B-NP -lack NN I-NP -of IN B-PP -confidence NN B-NP -in IN B-PP -Mr. NNP B-NP -Lawson NNP I-NP -'s POS B-NP -promise NN I-NP -Friday NNP B-NP -. . O - -In IN B-PP -European JJ B-NP -trading NN I-NP -it PRP B-NP -declined VBD B-VP -to TO B-PP -$ $ B-NP -1.5890 CD I-NP -and CC O -2.9495 CD B-NP -marks NNS I-NP -from IN B-PP -$ $ B-NP -1.5940 CD I-NP -and CC O -2.9429 CD B-NP -marks NNS I-NP -late JJ B-NP -Thursday NNP I-NP -. . O - -Economists NNS B-NP -suggested VBD B-VP -that IN B-SBAR -if IN B-SBAR -the DT B-NP -pound NN I-NP -falls VBZ B-VP -much JJ B-NP -below IN B-PP -2.90 CD B-NP -marks NNS I-NP -, , O -the DT B-NP -government NN I-NP -will MD B-VP -be VB I-VP -forced VBN I-VP -to TO I-VP -increase VB I-VP -rates NNS B-NP -to TO B-PP -16 CD B-NP -% NN I-NP -, , O -both DT B-VP -to TO I-VP -halt VB B-VP -any DT B-NP -further JJ I-NP -decline NN I-NP -and CC O -ensure VB B-VP -that IN B-SBAR -the DT B-NP -balance NN I-NP -of IN B-PP -monetary JJ B-NP -policy NN I-NP -remains VBZ B-VP -unchanged JJ B-ADJP -. . O - -Friday NNP B-NP -'s POS B-NP -Market NNP I-NP -Activity NN I-NP - -The DT B-NP -dollar NN I-NP -posted VBD B-VP -gains NNS B-NP -in IN B-PP -quiet JJ B-NP -trading NN I-NP -as IN B-SBAR -concerns NNS B-NP -about IN B-PP -equities NNS B-NP -abated VBN B-VP -. . O - -Foreign JJ B-NP -exchange NN I-NP -dealers NNS I-NP -said VBD B-VP -that IN B-SBAR -the DT B-NP -currency NN I-NP -market NN I-NP -has VBZ B-VP -begun VBN I-VP -to TO I-VP -distance VB I-VP -itself PRP B-NP -from IN B-PP -the DT B-NP -volatile JJ I-NP -stock NN I-NP -exchange NN I-NP -, , O -which WDT B-NP -has VBZ B-VP -preoccupied VBN I-VP -the DT B-NP -market NN I-NP -since IN B-PP -Oct. NNP B-NP -13 CD I-NP -, , O -when WRB B-ADVP -the DT B-NP -Dow NNP I-NP -Jones NNP I-NP -Industrial NNP I-NP -Average NNP I-NP -plunged VBD B-VP -more JJR B-NP -than IN I-NP -190 CD I-NP -points NNS I-NP -. . O - -Currency NN B-NP -analysts NNS I-NP -predict VBP B-VP -that IN B-SBAR -in IN B-PP -the DT B-NP -coming VBG I-NP -week NN I-NP -the DT B-NP -foreign JJ I-NP -exchange NN I-NP -market NN I-NP -will MD B-VP -shift VB I-VP -its PRP$ B-NP -focus NN I-NP -back RB B-ADVP -to TO B-PP -economic JJ B-NP -fundamentals NNS I-NP -, , O -keeping VBG B-VP -a DT B-NP -close NN I-NP -eye NN I-NP -out IN B-ADVP -for IN B-PP -any DT B-NP -signs NNS I-NP -of IN B-PP -monetary JJ B-NP -easing NN I-NP -by IN B-PP -U.S. NNP B-NP -Federal NNP I-NP -Reserve NNP I-NP -. . O - -Late RB B-ADVP -in IN B-PP -the DT B-NP -New NNP I-NP -York NNP I-NP -trading NN I-NP -day NN I-NP -, , O -the DT B-NP -dollar NN I-NP -was VBD B-VP -quoted VBN I-VP -at IN B-PP -1.8578 CD B-NP -marks NNS I-NP -, , O -up IN B-ADVP -from IN B-PP -1.8470 CD B-NP -marks NNS I-NP -late JJ B-NP -Thursday NNP I-NP -in IN B-PP -New NNP B-NP -York NNP I-NP -. . O - -The DT B-NP -U.S. NNP I-NP -currency NN I-NP -was VBD B-VP -also RB I-VP -changing VBG I-VP -hands NNS B-NP -at IN B-PP -142.43 CD B-NP -yen NN I-NP -, , O -up IN B-ADVP -from IN B-PP -141.70 CD B-NP -yen NN I-NP -in IN B-PP -New NNP B-NP -York NNP I-NP -late JJ B-NP -Thursday NNP I-NP -. . O - -In IN B-PP -Tokyo NNP B-NP -on IN B-PP -Monday NNP B-NP -, , O -the DT B-NP -U.S. NNP I-NP -currency NN I-NP -opened VBD B-VP -for IN B-PP -trading NN B-NP -at IN B-PP -141.95 CD B-NP -yen NN I-NP -, , O -up IN B-ADVP -from IN B-PP -Friday NNP B-NP -'s POS B-NP -Tokyo NNP I-NP diff --git a/paddle/trainer/tests/test_Trainer.cpp b/paddle/trainer/tests/test_Trainer.cpp index 425b3d10a38086463784ba2a18db1293efe96e92..394038cf730f13cb957fbbc5ae0e5719b8fe9db6 100644 --- a/paddle/trainer/tests/test_Trainer.cpp +++ b/paddle/trainer/tests/test_Trainer.cpp @@ -24,7 +24,6 @@ using namespace std; // NOLINT static const string& configFile1 = "trainer/tests/sample_trainer_config.conf"; static const string& configFile2 = "trainer/tests/sample_trainer_config_hsigmoid.conf"; -static const string& configFile3 = "trainer/tests/chunking.conf"; static const string& configFile4 = "trainer/tests/sample_trainer_config_parallel.conf"; @@ -95,13 +94,6 @@ TEST(checkGradient, multi) { TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); } -TEST(checkGradient, chunk) { - checkGradientTest(configFile3, false, false); -#ifdef PADDLE_WITH_CUDA - checkGradientTest(configFile3, true, true); -#endif -} - TEST(checkGradient, non_parallel) { checkGradientTest(configFile4, false, false); } diff --git a/paddle/trainer/tests/test_config.conf b/paddle/trainer/tests/test_config.conf index d1bb9b877fe26702948586dbe90b9ff0ee27c1d6..2f86aaa75316fa2a5a28edfef31c01e15a44b3d0 100644 --- a/paddle/trainer/tests/test_config.conf +++ b/paddle/trainer/tests/test_config.conf @@ -15,12 +15,7 @@ from paddle.trainer_config_helpers import * -TrainData(ProtoData( - files = "dummy_list", - constant_slots = [1.0], - async_load_data = True)) - -TestData(SimpleData( +TrainData(SimpleData( files = "trainer/tests/sample_filelist.txt", feat_dim = 3, context_len = 0, diff --git a/paddle/trainer/tests/test_files.txt b/paddle/trainer/tests/test_files.txt deleted file mode 100644 index 49002677a848c499610d5e869ce61efb2105e3c8..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/test_files.txt +++ /dev/null @@ -1 +0,0 @@ -trainer/tests/test_proto.bin diff --git a/paddle/trainer/tests/train.list b/paddle/trainer/tests/train.list deleted file mode 100644 index f41e8e8893de6068deb43b08ec6a3bcdd4039326..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/train.list +++ /dev/null @@ -1 +0,0 @@ -trainer/tests/data_bin_part diff --git a/paddle/trainer/tests/train.txt b/paddle/trainer/tests/train.txt deleted file mode 100644 index 2313aee987ba71ba7ea779d3cf7705478e7fbde2..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/train.txt +++ /dev/null @@ -1,5000 +0,0 @@ -Confidence NN B-NP -in IN B-PP -the DT B-NP -pound NN I-NP -is VBZ B-VP -widely RB I-VP -expected VBN I-VP -to TO I-VP -take VB I-VP -another DT B-NP -sharp JJ I-NP -dive NN I-NP -if IN B-SBAR -trade NN B-NP -figures NNS I-NP -for IN B-PP -September NNP B-NP -, , O -due JJ B-ADJP -for IN B-PP -release NN B-NP -tomorrow NN B-NP -, , O -fail VB B-VP -to TO I-VP -show VB I-VP -a DT B-NP -substantial JJ I-NP -improvement NN I-NP -from IN B-PP -July NNP B-NP -and CC I-NP -August NNP I-NP -'s POS B-NP -near-record JJ I-NP -deficits NNS I-NP -. . O - -Chancellor NNP O -of IN B-PP -the DT B-NP -Exchequer NNP I-NP -Nigel NNP B-NP -Lawson NNP I-NP -'s POS B-NP -restated VBN I-NP -commitment NN I-NP -to TO B-PP -a DT B-NP -firm NN I-NP -monetary JJ I-NP -policy NN I-NP -has VBZ B-VP -helped VBN I-VP -to TO I-VP -prevent VB I-VP -a DT B-NP -freefall NN I-NP -in IN B-PP -sterling NN B-NP -over IN B-PP -the DT B-NP -past JJ I-NP -week NN I-NP -. . O - -But CC O -analysts NNS B-NP -reckon VBP B-VP -underlying VBG B-NP -support NN I-NP -for IN B-PP -sterling NN B-NP -has VBZ B-VP -been VBN I-VP -eroded VBN I-VP -by IN B-PP -the DT B-NP -chancellor NN I-NP -'s POS B-NP -failure NN I-NP -to TO B-VP -announce VB I-VP -any DT B-NP -new JJ I-NP -policy NN I-NP -measures NNS I-NP -in IN B-PP -his PRP$ B-NP -Mansion NNP I-NP -House NNP I-NP -speech NN I-NP -last JJ B-NP -Thursday NNP I-NP -. . O - -This DT B-NP -has VBZ B-VP -increased VBN I-VP -the DT B-NP -risk NN I-NP -of IN B-PP -the DT B-NP -government NN I-NP -being VBG B-VP -forced VBN I-VP -to TO I-VP -increase VB I-VP -base NN B-NP -rates NNS I-NP -to TO B-PP -16 CD B-NP -% NN I-NP -from IN B-PP -their PRP$ B-NP -current JJ I-NP -15 CD I-NP -% NN I-NP -level NN I-NP -to TO B-VP -defend VB I-VP -the DT B-NP -pound NN I-NP -, , O -economists NNS B-NP -and CC O -foreign JJ B-NP -exchange NN I-NP -market NN I-NP -analysts NNS I-NP -say VBP B-VP -. . O - -`` `` O -The DT B-NP -risks NNS I-NP -for IN B-PP -sterling NN B-NP -of IN B-PP -a DT B-NP -bad JJ I-NP -trade NN I-NP -figure NN I-NP -are VBP B-VP -very RB B-ADVP -heavily RB I-ADVP -on IN B-PP -the DT B-NP -down JJ I-NP -side NN I-NP -, , O -'' '' O -said VBD B-VP -Chris NNP B-NP -Dillow NNP I-NP -, , O -senior JJ B-NP -U.K. NNP I-NP -economist NN I-NP -at IN B-PP -Nomura NNP B-NP -Research NNP I-NP -Institute NNP I-NP -. . O - -`` `` O -If IN B-SBAR -there EX B-NP -is VBZ B-VP -another DT B-NP -bad JJ I-NP -trade NN I-NP -number NN I-NP -, , O -there EX B-NP -could MD B-VP -be VB I-VP -an DT B-NP -awful JJ I-NP -lot NN I-NP -of IN B-PP -pressure NN B-NP -, , O -'' '' O -noted VBD B-VP -Simon NNP B-NP -Briscoe NNP I-NP -, , O -U.K. NNP B-NP -economist NN I-NP -for IN B-PP -Midland NNP B-NP -Montagu NNP I-NP -, , O -a DT B-NP -unit NN I-NP -of IN B-PP -Midland NNP B-NP -Bank NNP I-NP -PLC NNP I-NP -. . O - -Forecasts NNS B-NP -for IN B-PP -the DT B-NP -trade NN I-NP -figures NNS I-NP -range VBP B-VP -widely RB B-ADVP -, , O -but CC O -few JJ B-NP -economists NNS I-NP -expect VBP B-VP -the DT B-NP -data NNS I-NP -to TO B-VP -show VB I-VP -a DT B-NP -very RB I-NP -marked VBN I-NP -improvement NN I-NP -from IN B-PP -the DT O -# # O -2 CD O -billion CD O --LRB- ( O -$ $ B-ADJP -3.2 CD O -billion CD O --RRB- ) O -deficit NN B-NP -in IN B-PP -the DT B-NP -current JJ I-NP -account NN I-NP -reported VBD B-VP -for IN B-PP -August NNP B-NP -. . O - -The DT B-NP -August NNP I-NP -deficit NN I-NP -and CC O -the DT B-NP -# # I-NP -2.2 CD I-NP -billion CD I-NP -gap NN I-NP -registered VBN B-VP -in IN B-PP -July NNP B-NP -are VBP B-VP -topped VBN I-VP -only RB B-ADVP -by IN B-PP -the DT B-NP -# # I-NP -2.3 CD I-NP -billion CD I-NP -deficit NN I-NP -of IN B-PP -October NNP B-NP -1988 CD I-NP -. . O - -Sanjay NNP B-NP -Joshi NNP I-NP -, , O -European JJ B-NP -economist NN I-NP -at IN B-PP -Baring NNP B-NP -Brothers NNPS I-NP -& CC I-NP -Co. NNP I-NP -, , O -said VBD B-VP -there EX B-NP -is VBZ B-VP -no DT B-NP -sign NN I-NP -that IN B-SBAR -Britain NNP B-NP -'s POS B-NP -manufacturing NN I-NP -industry NN I-NP -is VBZ B-VP -transforming VBG I-VP -itself PRP B-NP -to TO B-VP -boost VB I-VP -exports NNS B-NP -. . O - -At IN B-PP -the DT B-NP -same JJ I-NP -time NN I-NP -, , O -he PRP B-NP -remains VBZ B-VP -fairly RB B-ADJP -pessimistic JJ I-ADJP -about IN B-PP -the DT B-NP -outlook NN I-NP -for IN B-PP -imports NNS B-NP -, , O -given VBN B-PP -continued VBD B-NP -high JJ I-NP -consumer NN I-NP -and CC I-NP -capital NN I-NP -goods NNS I-NP -inflows NNS I-NP -. . O - -He PRP B-NP -reckons VBZ B-VP -the DT B-NP -current JJ I-NP -account NN I-NP -deficit NN I-NP -will MD B-VP -narrow VB I-VP -to TO B-PP -only RB B-NP -# # I-NP -1.8 CD I-NP -billion CD I-NP -in IN B-PP -September NNP B-NP -. . O - -However RB B-ADVP -, , O -Mr. NNP B-NP -Dillow NNP I-NP -said VBD B-VP -he PRP B-NP -believes VBZ B-VP -that IN B-SBAR -a DT B-NP -reduction NN I-NP -in IN B-PP -raw JJ B-NP -material NN I-NP -stockbuilding VBG I-NP -by IN B-PP -industry NN B-NP -could MD B-VP -lead VB I-VP -to TO B-PP -a DT B-NP -sharp JJ I-NP -drop NN I-NP -in IN B-PP -imports NNS B-NP -. . O - -Combined VBN B-PP -with IN B-PP -at IN B-ADVP -least JJS I-ADVP -some DT B-NP -rebound NN I-NP -in IN B-PP -exports NNS B-NP -after IN B-PP -August NNP B-NP -'s POS B-NP -unexpected JJ I-NP -decline NN I-NP -, , O -the DT B-NP -deficit NN I-NP -could MD B-VP -narrow VB I-VP -to TO B-PP -as RB B-NP -little JJ I-NP -as IN I-NP -# # I-NP -1.3 CD I-NP -billion CD I-NP -. . O - -Mr. NNP B-NP -Briscoe NNP I-NP -, , O -who WP B-NP -also RB B-ADVP -forecasts VBZ B-VP -a DT B-NP -# # I-NP -1.3 CD I-NP -billion CD I-NP -current JJ I-NP -account NN I-NP -gap NN I-NP -, , O -warns VBZ B-VP -that IN B-SBAR -even RB B-SBAR -if IN I-SBAR -the DT B-NP -trade NN I-NP -figures NNS I-NP -are VBP B-VP -bullish JJ B-ADJP -for IN B-PP -sterling NN B-NP -, , O -the DT B-NP -currency NN I-NP -wo MD B-VP -n't RB I-VP -advance VB I-VP -much JJ B-NP -because IN B-SBAR -investors NNS B-NP -will MD B-VP -want VB I-VP -to TO I-VP -see VB I-VP -further JJ B-NP -evidence NN I-NP -of IN B-PP -the DT B-NP -turnaround NN I-NP -before IN B-PP -adjusting VBG B-VP -positions NNS B-NP -. . O - -Nevertheless RB B-ADVP -, , O -he PRP B-NP -noted VBD B-VP -, , O -`` `` O -No DT B-NP -one PRP I-NP -will MD B-VP -want VB I-VP -to TO I-VP -go VB I-VP -into IN B-PP -the DT B-NP -trade NN I-NP -figures NNS I-NP -without IN B-PP -a DT B-NP -flat JJ I-NP -position NN I-NP -'' '' O -in IN B-PP -the DT B-NP -pound NN I-NP -. . O - -Meanwhile RB B-ADVP -, , O -overall JJ B-NP -evidence NN I-NP -on IN B-PP -the DT B-NP -economy NN I-NP -remains VBZ B-VP -fairly RB B-ADJP -clouded VBN I-ADJP -. . O - -In IN B-PP -his PRP$ B-NP -Mansion NNP I-NP -House NNP I-NP -speech NN I-NP -, , O -Mr. NNP B-NP -Lawson NNP I-NP -warned VBD B-VP -that IN B-SBAR -a DT B-NP -further JJ I-NP -slowdown NN I-NP -can MD B-VP -be VB I-VP -expected VBN I-VP -as IN B-SBAR -the DT B-NP -impact NN I-NP -of IN B-PP -the DT B-NP -last JJ I-NP -rise NN I-NP -in IN B-PP -interest NN B-NP -rates NNS I-NP -earlier RBR B-NP -this DT I-NP -month NN I-NP -takes VBZ B-VP -effect NN B-NP -. . O - -U.K. JJ B-NP -base NN I-NP -rates NNS I-NP -are VBP B-VP -at IN B-PP -their PRP$ B-NP -highest JJS I-NP -level NN I-NP -in IN B-PP -eight CD B-NP -years NNS I-NP -. . O - -But CC O -consumer NN B-NP -expenditure NN I-NP -data NNS I-NP -released VBD B-VP -Friday NNP B-NP -do VBP B-VP -n't RB I-VP -suggest VB I-VP -that IN B-SBAR -the DT B-NP -U.K. NNP I-NP -economy NN I-NP -is VBZ B-VP -slowing VBG I-VP -that DT B-ADVP -quickly RB I-ADVP -. . O - -The DT B-NP -figures NNS I-NP -show VBP B-VP -that DT O -spending NN B-NP -rose VBD B-VP -0.1 CD B-NP -% NN I-NP -in IN B-PP -the DT B-NP -third JJ I-NP -quarter NN I-NP -from IN B-PP -the DT B-NP -second JJ I-NP -quarter NN I-NP -and CC O -was VBD B-VP -up IN B-ADVP -3.8 CD B-NP -% NN I-NP -from IN B-PP -a DT B-NP -year NN I-NP -ago RB B-ADVP -. . O - -This DT B-NP -compares VBZ B-VP -with IN B-PP -a DT B-NP -1.6 CD I-NP -% NN I-NP -rise NN I-NP -in IN B-PP -the DT B-NP -second NN I-NP -from IN B-PP -the DT B-NP -first JJ I-NP -quarter NN I-NP -and CC O -a DT B-NP -5.4 CD I-NP -% NN I-NP -increase NN I-NP -from IN B-PP -the DT B-NP -second JJ I-NP -quarter NN I-NP -of IN B-PP -1988 CD B-NP -. . O - -Mr. NNP B-NP -Dillow NNP I-NP -said VBD B-VP -the DT B-NP -data NNS I-NP -show VBP B-VP -the DT B-NP -economy NN I-NP -`` `` O -is VBZ B-VP -still RB B-ADVP -quite RB B-ADJP -strong JJ I-ADJP -, , O -'' '' O -but CC O -suggestions NNS B-NP -that IN B-SBAR -much NN B-NP -of IN B-PP -the DT B-NP -spending NN I-NP -went VBD B-VP -on IN B-PP -services NNS B-NP -rather RB B-PP -than IN I-PP -consumer NN B-NP -goods NNS I-NP -should MD B-VP -reduce VB I-VP -fears NNS B-NP -of IN B-PP -more JJR B-NP -import NN I-NP -rises NNS I-NP -. . O - -Certainly RB B-ADVP -, , O -the DT B-NP -chancellor NN I-NP -has VBZ B-VP -made VBN I-VP -it PRP B-NP -clear JJ B-ADJP -that IN B-SBAR -he PRP B-NP -is VBZ B-VP -prepared VBN I-VP -to TO I-VP -increase VB I-VP -interest NN B-NP -rates NNS I-NP -again RB B-ADVP -if IN B-SBAR -necessary JJ B-ADJP -to TO B-VP -both DT I-VP -ensure VB I-VP -that IN B-SBAR -a DT B-NP -substantial JJ I-NP -slowdown NN I-NP -does VBZ B-VP -take VB I-VP -place NN B-NP -and CC O -that DT O -sterling NN B-NP -does VBZ B-VP -n't RB I-VP -decline VB I-VP -further JJ B-ADVP -. . O - -Thursday NNP B-NP -, , O -he PRP B-NP -reminded VBD B-VP -his PRP$ B-NP -audience NN I-NP -that IN B-SBAR -the DT B-NP -government NN I-NP -`` `` O -can MD B-VP -not RB I-VP -allow VB I-VP -the DT B-NP -necessary JJ I-NP -rigor NN I-NP -of IN B-PP -monetary JJ B-NP -policy NN I-NP -to TO B-VP -be VB I-VP -undermined VBN I-VP -by IN B-PP -exchange NN B-NP -rate NN I-NP -weakness NN I-NP -. . O -'' '' O - -Analysts NNS B-NP -agree VBP B-VP -there EX B-NP -is VBZ B-VP -little JJ B-NP -holding NN B-VP -sterling NN B-NP -firm NN B-ADJP -at IN B-PP -the DT B-NP -moment NN I-NP -other JJ B-ADJP -than IN B-PP -Mr. NNP B-NP -Lawson NNP I-NP -'s POS B-NP -promise NN I-NP -that IN B-SBAR -rates NNS B-NP -will MD B-VP -be VB I-VP -pushed VBN I-VP -higher JJR B-ADJP -if IN B-SBAR -necessary JJ B-ADJP -. . O - -And CC O -, , O -they PRP B-NP -warn VBP B-VP -, , O -any DT B-NP -further JJ I-NP -drop NN I-NP -in IN B-PP -the DT B-NP -government NN I-NP -'s POS B-NP -popularity NN I-NP -could MD B-VP -swiftly RB I-VP -make VB I-VP -this DT B-NP -promise NN I-NP -sound NN B-VP -hollow JJ B-ADJP -. . O - -Sterling NNP B-NP -was VBD B-VP -already RB I-VP -showing VBG I-VP -some DT B-NP -signs NNS I-NP -of IN B-PP -a DT B-NP -lack NN I-NP -of IN B-PP -confidence NN B-NP -in IN B-PP -Mr. NNP B-NP -Lawson NNP I-NP -'s POS B-NP -promise NN I-NP -Friday NNP B-NP -. . O - -In IN B-PP -European JJ B-NP -trading NN I-NP -it PRP B-NP -declined VBD B-VP -to TO B-PP -$ $ B-NP -1.5890 CD I-NP -and CC O -2.9495 CD B-NP -marks NNS I-NP -from IN B-PP -$ $ B-NP -1.5940 CD I-NP -and CC O -2.9429 CD B-NP -marks NNS I-NP -late JJ B-NP -Thursday NNP I-NP -. . O - -Economists NNS B-NP -suggested VBD B-VP -that IN B-SBAR -if IN B-SBAR -the DT B-NP -pound NN I-NP -falls VBZ B-VP -much JJ B-NP -below IN B-PP -2.90 CD B-NP -marks NNS I-NP -, , O -the DT B-NP -government NN I-NP -will MD B-VP -be VB I-VP -forced VBN I-VP -to TO I-VP -increase VB I-VP -rates NNS B-NP -to TO B-PP -16 CD B-NP -% NN I-NP -, , O -both DT B-VP -to TO I-VP -halt VB B-VP -any DT B-NP -further JJ I-NP -decline NN I-NP -and CC O -ensure VB B-VP -that IN B-SBAR -the DT B-NP -balance NN I-NP -of IN B-PP -monetary JJ B-NP -policy NN I-NP -remains VBZ B-VP -unchanged JJ B-ADJP -. . O - -Friday NNP B-NP -'s POS B-NP -Market NNP I-NP -Activity NN I-NP - -The DT B-NP -dollar NN I-NP -posted VBD B-VP -gains NNS B-NP -in IN B-PP -quiet JJ B-NP -trading NN I-NP -as IN B-SBAR -concerns NNS B-NP -about IN B-PP -equities NNS B-NP -abated VBN B-VP -. . O - -Foreign JJ B-NP -exchange NN I-NP -dealers NNS I-NP -said VBD B-VP -that IN B-SBAR -the DT B-NP -currency NN I-NP -market NN I-NP -has VBZ B-VP -begun VBN I-VP -to TO I-VP -distance VB I-VP -itself PRP B-NP -from IN B-PP -the DT B-NP -volatile JJ I-NP -stock NN I-NP -exchange NN I-NP -, , O -which WDT B-NP -has VBZ B-VP -preoccupied VBN I-VP -the DT B-NP -market NN I-NP -since IN B-PP -Oct. NNP B-NP -13 CD I-NP -, , O -when WRB B-ADVP -the DT B-NP -Dow NNP I-NP -Jones NNP I-NP -Industrial NNP I-NP -Average NNP I-NP -plunged VBD B-VP -more JJR B-NP -than IN I-NP -190 CD I-NP -points NNS I-NP -. . O - -Currency NN B-NP -analysts NNS I-NP -predict VBP B-VP -that IN B-SBAR -in IN B-PP -the DT B-NP -coming VBG I-NP -week NN I-NP -the DT B-NP -foreign JJ I-NP -exchange NN I-NP -market NN I-NP -will MD B-VP -shift VB I-VP -its PRP$ B-NP -focus NN I-NP -back RB B-ADVP -to TO B-PP -economic JJ B-NP -fundamentals NNS I-NP -, , O -keeping VBG B-VP -a DT B-NP -close NN I-NP -eye NN I-NP -out IN B-ADVP -for IN B-PP -any DT B-NP -signs NNS I-NP -of IN B-PP -monetary JJ B-NP -easing NN I-NP -by IN B-PP -U.S. NNP B-NP -Federal NNP I-NP -Reserve NNP I-NP -. . O - -Late RB B-ADVP -in IN B-PP -the DT B-NP -New NNP I-NP -York NNP I-NP -trading NN I-NP -day NN I-NP -, , O -the DT B-NP -dollar NN I-NP -was VBD B-VP -quoted VBN I-VP -at IN B-PP -1.8578 CD B-NP -marks NNS I-NP -, , O -up IN B-ADVP -from IN B-PP -1.8470 CD B-NP -marks NNS I-NP -late JJ B-NP -Thursday NNP I-NP -in IN B-PP -New NNP B-NP -York NNP I-NP -. . O - -The DT B-NP -U.S. NNP I-NP -currency NN I-NP -was VBD B-VP -also RB I-VP -changing VBG I-VP -hands NNS B-NP -at IN B-PP -142.43 CD B-NP -yen NN I-NP -, , O -up IN B-ADVP -from IN B-PP -141.70 CD B-NP -yen NN I-NP -in IN B-PP -New NNP B-NP -York NNP I-NP -late JJ B-NP -Thursday NNP I-NP -. . O - -In IN B-PP -Tokyo NNP B-NP -on IN B-PP -Monday NNP B-NP -, , O -the DT B-NP -U.S. NNP I-NP -currency NN I-NP -opened VBD B-VP -for IN B-PP -trading NN B-NP -at IN B-PP -141.95 CD B-NP -yen NN I-NP -, , O -up IN B-ADVP -from IN B-PP -Friday NNP B-NP -'s POS B-NP -Tokyo NNP I-NP -close NN I-NP -of IN B-PP -141.35 CD B-NP -yen NN I-NP -. . O - -On IN B-PP -the DT B-NP -Commodity NNP I-NP -Exchange NNP I-NP -in IN B-PP -New NNP B-NP -York NNP I-NP -, , O -gold NN B-NP -for IN B-PP -current JJ B-NP -delivery NN I-NP -settled VBD B-VP -at IN B-PP -$ $ B-NP -367.30 CD I-NP -an DT B-NP -ounce NN I-NP -, , O -up IN B-ADVP -20 CD B-NP -cents NNS I-NP -. . O - -Estimated VBN B-NP -volume NN I-NP -was VBD B-VP -a DT B-NP -light NN I-NP -2.4 CD I-NP -million CD I-NP -ounces NNS I-NP -. . O - -In IN B-PP -early JJ B-NP -trading NN I-NP -in IN B-PP -Hong NNP B-NP -Kong NNP I-NP -Monday NNP B-NP -, , O -gold NN B-NP -was VBD B-VP -quoted VBN I-VP -at IN B-PP -$ $ B-NP -366.50 CD I-NP -an DT B-NP -ounce NN I-NP -. . O - -East NNP B-NP -Rock NNP I-NP -Partners NNP I-NP -Limited NNP I-NP -Partnership NNP I-NP -said VBD B-VP -it PRP B-NP -proposed VBD B-VP -to TO I-VP -acquire VB I-VP -A.P. NNP B-NP -Green NNP I-NP -Industries NNP I-NP -Inc. NNP I-NP -for IN B-PP -$ $ B-NP -40 CD I-NP -a DT B-NP -share NN I-NP -. . O - -In IN B-PP -an DT B-NP -Oct. NNP I-NP -19 CD I-NP -letter NN I-NP -to TO B-PP -A.P. NNP B-NP -Green NNP I-NP -'s POS B-NP -board NN I-NP -, , O -East NNP B-NP -Rock NNP I-NP -said VBD B-VP -the DT B-NP -offer NN I-NP -is VBZ B-VP -subject NN B-ADJP -to TO B-PP -the DT B-NP -signing NN I-NP -of IN B-PP -a DT B-NP -merger NN I-NP -agreement NN I-NP -by IN B-PP -no DT B-ADVP -later RB I-ADVP -than IN B-PP -Oct. NNP B-NP -31 CD I-NP -. . O - -The DT B-NP -letter NN I-NP -, , O -attached VBN B-VP -to TO B-PP -a DT B-NP -filing NN I-NP -with IN B-PP -the DT B-NP -Securities NNP I-NP -and CC I-NP -Exchange NNP I-NP -Commission NNP I-NP -, , O -said VBD B-VP -the DT B-NP -approval NN I-NP -is VBZ B-VP -also RB B-ADVP -contingent JJ B-ADJP -upon IN B-PP -obtaining VBG B-VP -satisfactory JJ B-NP -financing NN I-NP -. . O - -An DT B-NP -A.P. NNP I-NP -Green NNP I-NP -official NN I-NP -declined VBD B-VP -to TO I-VP -comment VB I-VP -on IN B-PP -the DT B-NP -filing NN I-NP -. . O - -The DT B-NP -$ $ I-NP -40-a-share JJ I-NP -proposal NN I-NP -values VBZ B-VP -the DT B-NP -company NN I-NP -at IN B-PP -about RB B-NP -$ $ I-NP -106.6 CD I-NP -million CD I-NP -. . O - -A.P. NNP B-NP -Green NNP I-NP -currently RB B-ADVP -has VBZ B-VP -2,664,098 CD B-NP -shares NNS I-NP -outstanding JJ B-ADJP -. . O - -Its PRP$ B-NP -stock NN I-NP -closed VBD B-VP -at IN B-PP -$ $ B-NP -38 CD I-NP -, , O -up IN B-ADVP -$ $ B-NP -1.875 CD I-NP -, , O -in IN B-PP -national JJ B-NP -over-the-counter JJ I-NP -trading NN I-NP -. . O - -The DT B-NP -company NN I-NP -is VBZ B-VP -a DT B-NP -Mexico NNP I-NP -, , I-NP -Mo. NNP I-NP -, , I-NP -maker NN I-NP -of IN B-PP -refractory JJ B-NP -products NNS I-NP -. . O - -East NNP B-NP -Rock NNP I-NP -also RB B-ADVP -said VBD B-VP -in IN B-PP -the DT B-NP -filing NN I-NP -that IN B-SBAR -it PRP B-NP -boosted VBD B-VP -its PRP$ B-NP -stake NN I-NP -in IN B-PP -A.P. NNP B-NP -Green NNP I-NP -to TO B-PP -8.7 CD B-NP -% NN I-NP -. . O - -It PRP B-NP -now RB B-ADVP -holds VBZ B-VP -233,000 CD B-NP -A.P. NNP I-NP -Green NNP I-NP -common JJ I-NP -shares NNS I-NP -, , O -including VBG B-PP -30,000 CD B-NP -shares NNS I-NP -bought VBD B-VP -last JJ B-NP -Thursday NNP I-NP -for IN B-PP -$ $ B-NP -35.50 CD I-NP -to TO I-NP -$ $ I-NP -36.50 CD I-NP -a DT B-NP -share NN I-NP -. . O - -New NNP B-NP -York-based JJ I-NP -John NNP I-NP -Kuhns NNP I-NP -and CC I-NP -Robert NNP I-NP -MacDonald NNP I-NP -control NN B-VP -East NNP B-NP -Rock NNP I-NP -Partners NNP I-NP -Inc. NNP I-NP -, , O -the DT B-NP -sole JJ I-NP -general JJ I-NP -partner NN I-NP -of IN B-PP -East NNP B-NP -Rock NNP I-NP -Partners NNP I-NP -L.P NNP I-NP -. . O - -The DT B-NP -sole JJ I-NP -limited JJ I-NP -partner NN I-NP -of IN B-PP -the DT B-NP -partnership NN I-NP -is VBZ B-VP -Westwood NNP B-NP -Brick NNP I-NP -Lime NNP I-NP -Inc. NNP I-NP -, , O -an DT B-NP -indirect JJ I-NP -subsidiary NN I-NP -of IN B-PP -Westwood NNP B-NP -Group NNP I-NP -Inc NNP I-NP -. . O - -Both DT B-NP -Westwood NNP B-NP -Brick NNP I-NP -and CC O -Westwood NNP B-NP -Group NNP I-NP -are VBP B-VP -based VBN I-VP -in IN B-PP -Boston NNP B-NP -. . O - -Freight NN B-NP -rates NNS I-NP -, , O -declining VBG B-VP -for IN B-PP -most RBS B-NP -of IN B-PP -the DT B-NP -decade NN I-NP -because IN B-PP -of IN I-PP -competition NN B-NP -spurred VBN B-VP -by IN B-PP -deregulation NN B-NP -, , O -are VBP B-VP -bottoming VBG I-VP -out IN B-PRT -, , O -turning VBG B-VP -upward RB B-ADVP -and CC O -threatening VBG B-VP -to TO I-VP -fuel VB I-VP -inflation NN B-NP -. . O - -Trucking NNP B-NP -, , I-NP -shipping VBG I-NP -and CC I-NP -air-freight NN I-NP -companies NNS I-NP -have VBP B-VP -announced VBN I-VP -rate NN B-NP -increases NNS I-NP -, , O -scheduled VBN B-VP -for IN B-PP -this DT B-NP -fall NN I-NP -or CC O -early JJ B-NP -next JJ I-NP -year NN I-NP -, , O -reflecting VBG B-VP -higher JJR B-NP -costs NNS I-NP -and CC O -tightened VBD B-NP -demand NN I-NP -for IN B-PP -freight NN B-NP -transport NN I-NP -. . O - -Major JJ B-NP -shippers NNS I-NP -say VBP B-VP -they PRP B-NP -expect VBP B-VP -freight NN B-NP -rates NNS I-NP -to TO B-VP -rise VB I-VP -at IN B-ADVP -least JJS I-ADVP -as RB B-ADVP -fast RB I-ADVP -as IN B-PP -inflation NN B-NP -and CC B-ADVP -maybe RB I-ADVP -faster RBR B-ADVP -in IN B-PP -the DT B-NP -next JJ I-NP -few JJ I-NP -years NNS I-NP -. . O - -That DT B-NP -'s VBZ B-VP -a DT B-NP -big JJ I-NP -change NN I-NP -from IN B-PP -recent JJ B-NP -years NNS I-NP -when WRB B-ADVP -freight NN B-NP -haulage NN I-NP -was VBD B-VP -a DT B-NP -bright JJ I-NP -spot NN I-NP -for IN B-PP -U.S. NNP B-NP -productivity NN I-NP -, , O -helping VBG B-VP -to TO I-VP -restrain VB I-VP -inflation NN B-NP -and CC O -make VB B-VP -U.S. NNP B-NP -industry NN I-NP -more RBR B-ADJP -competitive JJ I-ADJP -abroad RB B-ADVP -. . O - -`` `` O -Demand NN B-NP -has VBZ B-VP -caught VBN I-VP -up IN B-PRT -with IN B-PP -the DT B-NP -supply NN I-NP -of IN B-PP -certain JJ B-NP -types NNS I-NP -of IN B-PP -freight NN B-NP -transportation NN I-NP -, , O -and CC O -rates NNS B-NP -are VBP B-VP -starting VBG I-VP -to TO I-VP -move VB I-VP -up IN B-ADVP -'' '' O -at IN B-PP -a DT B-NP -rate NN I-NP -`` `` O -close RB B-ADJP -to TO B-PP -or CC O -slightly RB B-ADJP -more JJR I-ADJP -than IN B-PP -the DT B-NP -inflation NN I-NP -rate NN I-NP -, , O -'' '' O -said VBD B-VP -Clifford NNP B-NP -Sayre NNP I-NP -, , O -director NN B-NP -of IN B-PP -logistics NNS B-NP -at IN B-PP -Du NNP B-NP -Pont NNP I-NP -Co NNP I-NP -. . O - -Shippers NNS B-NP -surveyed VBN B-VP -recently RB B-ADVP -by IN B-PP -Ohio NNP B-NP -State NNP I-NP -University NNP I-NP -said VBD B-VP -they PRP B-NP -expect VBP B-VP -their PRP$ B-NP -freight-transport JJ I-NP -, , I-NP -storage NN I-NP -and CC I-NP -distribution NN I-NP -costs NNS I-NP -to TO B-VP -rise VB I-VP -about IN B-NP -4 CD I-NP -% NN I-NP -this DT B-NP -year NN I-NP -. . O - -Only RB B-NP -10 CD I-NP -% NN I-NP -of IN B-PP -the DT B-NP -250 CD I-NP -shippers NNS I-NP -polled VBN B-VP -expected VBN B-VP -their PRP$ B-NP -freight-transport JJ I-NP -costs NNS I-NP -to TO B-VP -decrease VB I-VP -, , O -compared VBN B-PP -with IN B-PP -30 CD B-NP -% NN I-NP -who WP B-NP -had VBD B-VP -looked VBN I-VP -to TO B-PP -freight VB B-NP -transport NN I-NP -to TO B-VP -reduce VB I-VP -costs NNS B-NP -in IN B-PP -past JJ B-NP -years NNS I-NP -. . O - -`` `` O -This DT B-NP -is VBZ B-VP -the DT B-NP -first JJ I-NP -year NN I-NP -since IN B-PP -transportation NN B-NP -deregulation NN I-NP -in IN B-PP -1980 CD B-NP -that IN B-ADVP -we PRP B-NP -have VBP B-VP -had VBN I-VP -such JJ B-NP -a DT I-NP -dramatic JJ I-NP -and CC I-NP -broad-based JJ I-NP -upturn NN I-NP -in IN B-PP -perceived VBN B-NP -transportation NN I-NP -rates NNS I-NP -, , O -'' '' O -said VBD B-VP -Bernard NNP B-NP -LaLonde NNP I-NP -, , O -a DT B-NP -transportation NN I-NP -logistics NNS I-NP -professor NN I-NP -at IN B-PP -Ohio NNP B-NP -State NNP I-NP -in IN B-PP -Columbus NNP B-NP -. . O - -The DT B-NP -deregulation NN I-NP -of IN B-PP -railroads NNS B-NP -and CC I-NP -trucking NN I-NP -companies NNS I-NP -that WDT B-NP -began VBD B-VP -in IN B-PP -1980 CD B-NP -enabled VBD B-VP -shippers NNS B-NP -to TO B-VP -bargain VB I-VP -for IN B-PP -transportation NN B-NP -. . O - -Carriers NNP B-NP -could MD B-VP -use VB I-VP -their PRP$ B-NP -equipment NN I-NP -more RBR B-ADVP -efficiently RB I-ADVP -, , O -leading VBG B-VP -to TO B-PP -overcapacity NN B-NP -they PRP B-NP -were VBD B-VP -eager JJ B-ADJP -to TO B-VP -fill VB I-VP -. . O - -Shippers NNS B-NP -cut VBP B-VP -about RB B-NP -$ $ I-NP -35 CD I-NP -billion CD I-NP -from IN B-PP -their PRP$ B-NP -annual JJ I-NP -, , I-NP -inter-city JJ I-NP -truck NN I-NP -and CC I-NP -rail NN I-NP -costs NNS I-NP -, , O -to TO B-PP -about RB B-NP -$ $ I-NP -150 CD I-NP -billion CD I-NP -, , O -or CC O -about IN B-NP -6.4 CD I-NP -% NN I-NP -of IN B-PP -gross JJ B-NP -national JJ I-NP -product NN I-NP -, , O -down RB B-ADVP -from IN B-PP -8 CD B-NP -% NN I-NP -of IN B-PP -GNP NNP B-NP -in IN B-PP -1981 CD B-NP -. . O - -But CC O -with IN B-PP -much NN B-NP -of IN B-PP -the DT B-NP -inefficiency NN I-NP -squeezed VBN B-VP -out IN B-PP -of IN B-PP -the DT B-NP -freight-transport JJ I-NP -system NN I-NP -, , O -rising VBG B-NP -costs NNS I-NP -are VBP B-VP -likely JJ B-ADJP -to TO B-VP -be VB I-VP -reflected VBN I-VP -directly RB B-ADVP -in IN B-PP -higher JJR B-NP -freight NN I-NP -rates NNS I-NP -. . O - -`` `` O -Shippers NNS B-NP -are VBP B-VP -saying VBG I-VP -` `` O -the DT B-NP -party NN I-NP -'s POS B-VP -over IN B-ADJP -, , O -' '' O -'' '' O -said VBD B-VP -Mr. NNP B-NP -LaLonde NNP I-NP -. . O - -`` `` O -Shippers NNS B-NP -wo MD B-VP -n't RB I-VP -be VB I-VP -able JJ B-ADJP -to TO B-VP -look VB I-VP -for IN B-PP -transportation-cost JJ B-NP -savings NNS I-NP -as IN B-SBAR -they PRP B-NP -have VBP B-VP -for IN B-PP -the DT B-NP -last JJ I-NP -eight CD I-NP -or CC I-NP -nine CD I-NP -years NNS I-NP -. . O - -Transport NN B-NP -rates NNS I-NP -wo MD B-VP -n't RB I-VP -be VB I-VP -an DT B-NP -opportunity NN I-NP -for IN B-PP -offsetting VBG B-VP -cost NN B-NP -increases NNS I-NP -in IN B-PP -other JJ B-NP -segments NNS I-NP -of IN B-PP -the DT B-NP -economy NN I-NP -. . O -'' '' O - -Robert NNP B-NP -Delaney NNP I-NP -, , O -a DT B-NP -consultant NN I-NP -at IN B-PP -Arthur NNP B-NP -D. NNP I-NP -Little NNP I-NP -Inc. NNP I-NP -, , O -Cambridge NNP B-NP -, , O -Mass. NNP B-NP -, , O -said VBD B-VP -`` `` O -We PRP B-NP -'ve VBP B-VP -gotten VBN I-VP -all PDT B-NP -the DT I-NP -benefits NNS I-NP -of IN B-PP -deregulation NN B-NP -in IN B-PP -freight-cost JJ B-NP -reductions NNS I-NP -. . O - -Now RB B-ADVP -we PRP B-NP -are VBP B-VP -starting VBG I-VP -to TO I-VP -see VB I-VP -real JJ B-NP -freight-rate JJ I-NP -increases NNS I-NP -as IN B-SBAR -carriers NNS B-NP -replace VBP B-VP -equipment NN B-NP -, , O -pay VB B-VP -higher JJR B-NP -fuel NN I-NP -costs NNS I-NP -and CC O -pay VB B-VP -more JJR B-NP -for IN B-PP -labor NN B-NP -. . O - -You PRP B-NP -'ll MD B-VP -see VB I-VP -carriers NNS B-NP -try VB B-VP -to TO I-VP -recoup VB I-VP -some DT B-NP -of IN B-PP -the DT B-NP -price NN I-NP -cutting VBG I-NP -that WDT B-NP -occurred VBD B-VP -previously RB B-ADVP -. . O -'' '' O - -Not RB B-NP -everyone NN I-NP -believes VBZ B-VP -that IN B-SBAR -the DT B-NP -good JJ I-NP -times NNS I-NP -are VBP B-VP -over IN B-ADJP -for IN B-PP -shippers NNS B-NP -. . O - -`` `` O -There EX B-NP -'s VBZ B-VP -still RB B-ADVP -a DT B-NP -lot NN I-NP -of IN B-PP -pressure NN B-NP -on IN B-PP -rates NNS B-NP -in IN B-PP -both DT B-NP -rail NN I-NP -and CC I-NP -truck NN I-NP -, , O -'' '' O -said VBD B-VP -Gerard NNP B-NP -McCullough NNP I-NP -, , O -lecturer NN B-NP -in IN B-PP -transportation NN B-NP -at IN B-PP -Massachusetts NNP B-NP -Institute NNP I-NP -of IN B-PP -Technology NNP B-NP -. . O - -Less-than-truckload JJ B-NP -companies NNS I-NP -, , O -which WDT B-NP -carry VBP B-VP -the DT B-NP -freight NN I-NP -of IN B-PP -several JJ B-NP -shippers NNS I-NP -in IN B-PP -each DT B-NP -truck NN I-NP -trailer NN I-NP -, , O -discounted VBD B-VP -away RB B-ADVP -a DT B-NP -4.7 CD I-NP -% NN I-NP -rate NN I-NP -increase NN I-NP -implemented VBD B-VP -last JJ B-NP -April NNP I-NP -. . O - -The DT B-NP -carriers NNS I-NP -were VBD B-VP -competing VBG I-VP -fiercely RB B-ADVP -for IN B-PP -market NN B-NP -share NN I-NP -. . O - -Railroad-rate JJ B-NP -increases NNS I-NP -are VBP B-VP -likely JJ B-ADJP -to TO B-VP -be VB I-VP -restrained VBN I-VP -by IN B-PP -weakening VBG B-NP -rail-traffic JJ I-NP -levels NNS I-NP -and CC O -keen JJ B-NP -competition NN I-NP -for IN B-PP -freight NN B-NP -from IN B-PP -trucks NNS B-NP -. . O - -An DT B-NP -official NN I-NP -at IN B-PP -Consolidated NNP B-NP -Freightways NNP I-NP -Inc. NNP I-NP -, , O -a DT B-NP -Menlo NNP I-NP -Park NNP I-NP -, , I-NP -Calif. NNP I-NP -, , I-NP -less-than-truckload JJ I-NP -carrier NN I-NP -, , O -said VBD B-VP -rate NN B-NP -discounting NN I-NP -in IN B-PP -that DT B-NP -industry NN I-NP -has VBZ B-VP -begun VBN I-VP -to TO I-VP -`` `` O -stabilize VB B-VP -. . O -'' '' O - -Consolidated NNP B-NP -Freightways NNP I-NP -plans VBZ B-VP -to TO I-VP -raise VB I-VP -its PRP$ B-NP -rates NNS I-NP -5.3 CD B-NP -% NN I-NP -late JJ B-NP -this DT I-NP -year NN I-NP -or CC O -early JJ B-NP -next JJ I-NP -year NN I-NP -, , O -and CC O -at IN B-NP -least JJS I-NP -two CD I-NP -competitors NNS I-NP -have VBP B-VP -announced VBN I-VP -similar JJ B-NP -increases NNS I-NP -. . O - -Truckers NNS B-NP -are VBP B-VP -`` `` O -trying VBG B-VP -to TO I-VP -send VB I-VP -signals NNS B-NP -that IN B-SBAR -they PRP B-NP -need VBP B-VP -to TO I-VP -stop VB I-VP -the DT B-NP -bloodletting NN I-NP -, , O -forget VB B-VP -about IN B-PP -market NN B-NP -share NN I-NP -and CC O -go VB B-VP -for IN B-PP -higher JJR B-NP -rates NNS I-NP -, , O -'' '' O -said VBD B-VP -Michael NNP B-NP -Lloyd NNP I-NP -, , O -an DT B-NP -analyst NN I-NP -at IN B-PP -Salomon NNP B-NP -Bros NNP I-NP -. . O - -And CC O -`` `` O -shippers NNS B-NP -are VBP B-VP -getting VBG I-VP -the DT B-NP -feeling NN I-NP -that IN B-SBAR -they PRP B-NP -have VBP B-VP -played VBN I-VP -one CD B-NP -trucker NN I-NP -off IN B-ADVP -against IN B-PP -another DT B-NP -as RB B-NP -much JJ I-NP -as IN B-SBAR -they PRP B-NP -can MD B-VP -, , O -'' '' O -he PRP B-NP -said VBD B-VP -. . O - -Air-freight NN B-NP -carriers NNS I-NP -raised VBD B-VP -their PRP$ B-NP -rates NNS I-NP -for IN B-PP -U.S. NNP B-NP -products NNS I-NP -going VBG B-VP -across IN B-PP -the DT B-NP -Pacific NNP I-NP -to TO B-PP -Asia NNP B-NP -by IN B-PP -about IN B-NP -20 CD I-NP -% NN I-NP -earlier RBR B-NP -this DT I-NP -month NN I-NP -. . O - -And CC O -Japan NNP B-NP -Air NNP I-NP -Lines NNPS I-NP -said VBD B-VP -it PRP B-NP -plans VBZ B-VP -to TO I-VP -boost VB I-VP -its PRP$ B-NP -rates NNS I-NP -a DT B-NP -further JJ I-NP -25 CD I-NP -% NN I-NP -over IN B-PP -the DT B-NP -next JJ I-NP -two CD I-NP -years NNS I-NP -. . O - -Such JJ B-NP -rate NN I-NP -increases NNS I-NP -`` `` O -will MD B-VP -increase VB I-VP -the DT B-NP -total JJ I-NP -cost NN I-NP -of IN B-PP -U.S. NNP B-NP -products NNS I-NP -and CC O -slow JJ B-VP -down RP B-PRT -the DT B-NP -rate NN I-NP -of IN B-PP -increase NN B-NP -of IN B-PP -U.S. NNP B-NP -exports NNS I-NP -, , O -'' '' O -said VBD B-VP -Richard NNP B-NP -Connors NNP I-NP -, , O -a DT B-NP -senior JJ I-NP -vice NN I-NP -president NN I-NP -of IN B-PP -Yusen NNP B-NP -Air NNP I-NP -& CC I-NP -Sea NNP I-NP -Service NNP I-NP -U.S.A. NNP I-NP -Inc. NNP I-NP -, , O -the DT B-NP -U.S. NNP I-NP -air-freight-forwarding JJ I-NP -subsidiary NN I-NP -of IN B-PP -Nippon NNP B-NP -Yusen NNP I-NP -Kaisha NNP I-NP -of IN B-PP -Japan NNP B-NP -. . O - -Ship NN B-NP -companies NNS I-NP -carrying VBG B-VP -bulk NN B-NP -commodities NNS I-NP -, , O -such JJ B-PP -as IN I-PP -oil NN B-NP -, , O -grain NN B-NP -, , O -coal NN B-NP -and CC O -iron NN B-NP -ore NN I-NP -, , O -have VBP B-VP -been VBN I-VP -able JJ B-ADJP -to TO B-VP -increase VB I-VP -their PRP$ B-NP -rates NNS I-NP -in IN B-PP -the DT B-NP -last JJ I-NP -couple NN I-NP -of IN B-PP -years NNS B-NP -. . O - -Some DT B-NP -bulk NN I-NP -shipping VBG I-NP -rates NNS I-NP -have VBP B-VP -increased VBN I-VP -`` `` O -3 CD B-NP -% NN I-NP -to TO I-NP -4 CD I-NP -% NN I-NP -in IN B-PP -the DT B-NP -past JJ I-NP -few JJ I-NP -months NNS I-NP -, , O -'' '' O -said VBD B-VP -Salomon NNP B-NP -'s POS B-NP -Mr. NNP I-NP -Lloyd NNP I-NP -. . O - -And CC O -ship NN B-NP -lines NNS I-NP -carrying VBG B-VP -containers NNS B-NP -are VBP B-VP -also RB I-VP -trying VBG I-VP -to TO I-VP -raise VB I-VP -their PRP$ B-NP -rates NNS I-NP -. . O - -Carriers NNP B-NP -boosted VBD B-VP -rates NNS B-NP -more JJR B-NP -than IN I-NP -10 CD I-NP -% NN I-NP -in IN B-PP -the DT B-NP -North NNP I-NP -Atlantic NNP I-NP -between IN B-PP -the DT B-NP -U.S. NNP I-NP -and CC O -Europe NNP B-NP -last JJ B-NP -September NNP I-NP -, , O -hoping VBG B-VP -to TO I-VP -partly RB I-VP -restore VB I-VP -rates NNS B-NP -to TO B-PP -earlier JJR B-NP -levels NNS I-NP -. . O - -Ship NN B-NP -lines NNS I-NP -operating VBG B-VP -in IN B-PP -the DT B-NP -Pacific NNP I-NP -plan NN B-VP -to TO I-VP -raise VB I-VP -rates NNS B-NP -on IN B-PP -containers NNS B-NP -carrying VBG B-VP -U.S. NNP B-NP -exports NNS I-NP -to TO B-PP -Asia NNP B-NP -about IN B-NP -10 CD I-NP -% NN I-NP -, , O -effective JJ B-ADJP -next JJ B-NP -April NNP I-NP -. . O - -MGM NNP B-NP -Grand NNP I-NP -Inc. NNP I-NP -said VBD B-VP -it PRP B-NP -filed VBD B-VP -a DT B-NP -registration NN I-NP -statement NN I-NP -with IN B-PP -the DT B-NP -Securities NNP I-NP -and CC I-NP -Exchange NNP I-NP -Commission NNP I-NP -for IN B-PP -a DT B-NP -public JJ I-NP -offering NN I-NP -of IN B-PP -six CD B-NP -million CD I-NP -common JJ I-NP -shares NNS I-NP -. . O - -The DT B-NP -Beverly NNP I-NP -Hills NNP I-NP -, , I-NP -Calif.-based JJ I-NP -company NN I-NP -said VBD B-VP -it PRP B-NP -would MD B-VP -have VB I-VP -26.9 CD B-NP -million CD I-NP -common JJ I-NP -shares NNS I-NP -outstanding JJ B-ADJP -after IN B-PP -the DT B-NP -offering NN I-NP -. . O - -The DT B-NP -hotel NN I-NP -and CC I-NP -Gaming NNP I-NP -company NN I-NP -said VBD B-VP -Merrill NNP B-NP -Lynch NNP I-NP -Capital NNP I-NP -Markets NNPS I-NP -will MD B-VP -lead VB I-VP -the DT B-NP -underwriters NNS I-NP -. . O - -Proceeds NNS B-NP -from IN B-PP -the DT B-NP -sale NN I-NP -will MD B-VP -be VB I-VP -used VBN I-VP -for IN B-PP -remodeling VBG B-NP -and CC I-NP -refurbishing VBG I-NP -projects NNS I-NP -, , B-PP -as RB I-PP -well RB I-PP -as IN I-PP -for IN B-PP -the DT B-NP -planned VBN I-NP -MGM NNP I-NP -Grand NNP I-NP -hotel\/casino NN I-NP -and CC I-NP -theme NN I-NP -park NN I-NP -. . O - -Bob NNP B-NP -Stone NNP I-NP -stewed JJ B-VP -over IN B-PP -a DT B-NP -letter NN I-NP -from IN B-PP -his PRP$ B-NP -manager NN I-NP -putting VBG B-VP -him PRP B-NP -on IN B-PP -probation NN B-NP -for IN B-PP -insubordination NN B-NP -. . O - -Mr. NNP B-NP -Stone NNP I-NP -thought VBD B-VP -the DT B-NP -discipline NN I-NP -was VBD B-VP -unfair JJ B-ADJP -; : O -he PRP B-NP -believed VBD B-VP -that IN B-SBAR -his PRP$ B-NP -manager NN I-NP -wanted VBD B-VP -to TO I-VP -get VB I-VP -rid JJ B-ADJP -of IN B-PP -him PRP B-NP -for IN B-PP -personal JJ B-NP -reasons NNS I-NP -. . O - -Unable JJ B-ADJP -to TO B-VP -persuade VB I-VP -the DT B-NP -manager NN I-NP -to TO B-VP -change VB I-VP -his PRP$ B-NP -decision NN I-NP -, , O -he PRP B-NP -went VBD B-VP -to TO B-PP -a DT B-NP -`` `` I-NP -company NN I-NP -court NN I-NP -'' '' O -for IN B-PP -a DT B-NP -hearing NN I-NP -. . O - -At IN B-PP -the DT B-NP -scheduled VBN I-NP -time NN I-NP -, , O -Mr. NNP B-NP -Stone NNP I-NP -entered VBD B-VP -a DT B-NP -conference NN I-NP -room NN I-NP -in IN B-PP -a DT B-NP -building NN I-NP -near IN B-PP -where WRB B-ADVP -he PRP B-NP -worked VBD B-VP -. . O - -After IN B-SBAR -the DT B-NP -three CD I-NP -members NNS I-NP -of IN B-PP -the DT B-NP -court NN I-NP -introduced VBD B-VP -themselves PRP B-NP -, , O -the DT B-NP -chairman NN I-NP -of IN B-PP -the DT B-NP -panel NN I-NP -said VBD B-VP -: : O -`` `` O -Go VB B-VP -ahead RB B-ADVP -and CC O -tell VB B-VP -us PRP B-NP -what WP B-NP -happened VBD B-VP -. . O - -We PRP B-NP -may MD B-VP -ask VB I-VP -questions NNS B-NP -as IN B-SBAR -you PRP B-NP -go VBP B-VP -along IN B-PRT -, , O -or CC O -we PRP B-NP -may MD B-VP -wait VB I-VP -until IN B-PP -the DT B-NP -end NN I-NP -. . O -'' '' O - -No DT B-NP -lawyers NNS I-NP -or CC I-NP -tape NN I-NP -recorders NNS I-NP -were VBD B-VP -present JJ B-ADJP -. . O - -The DT B-NP -only RB I-NP -extra JJ I-NP -people NNS I-NP -were VBD B-VP -a DT B-NP -couple NN I-NP -of IN B-PP -personnel NNS B-NP -specialists NNS I-NP -, , O -one CD B-NP -of IN B-PP -whom WP B-NP -knew VBD B-VP -Mr. NNP B-NP -Stone NNP I-NP -'s POS B-NP -case NN I-NP -intimately RB B-ADVP -and CC O -would MD B-VP -help VB I-VP -fill VB I-VP -in IN B-PRT -any DT B-NP -facts NNS I-NP -needed VBN B-VP -to TO B-VP -give VB I-VP -the DT B-NP -court NN I-NP -the DT B-NP -full JJ I-NP -picture NN I-NP -. . O - -Over IN B-PP -a DT B-NP -cup NN I-NP -of IN B-PP -coffee NN B-NP -, , O -Mr. NNP B-NP -Stone NNP I-NP -told VBD B-VP -his PRP$ B-NP -story NN I-NP -. . O - -He PRP B-NP -talked VBD B-VP -about IN B-NP -20 CD I-NP -minutes NNS I-NP -. . O - -When WRB B-ADVP -he PRP B-NP -was VBD B-VP -through IN B-ADJP -, , O -the DT B-NP -court NN I-NP -members NNS I-NP -asked VBD B-VP -many JJ B-NP -questions NNS I-NP -, , O -then RB B-ADVP -the DT B-NP -chairman NN I-NP -said VBD B-VP -they PRP B-NP -would MD B-VP -like VB I-VP -to TO I-VP -hear VB I-VP -his PRP$ B-NP -manager NN I-NP -'s POS B-NP -side NN I-NP -and CC O -talk VB B-VP -to TO B-PP -witnesses NNS B-NP -. . O - -The DT B-NP -chairman NN I-NP -promised VBD B-VP -Mr. NNP B-NP -Stone NNP I-NP -a DT B-NP -decision NN I-NP -within IN B-PP -two CD B-NP -weeks NNS I-NP -. . O - -Bob NNP B-NP -Stone NNP I-NP -is VBZ B-VP -a DT B-NP -fictional JJ I-NP -name NN I-NP -, , O -but CC O -the DT B-NP -incident NN I-NP -described VBN B-VP -is VBZ B-VP -real JJ B-ADJP -. . O - -It PRP B-NP -happened VBD B-VP -at IN B-PP -Northrop NNP B-NP -Corp. NNP I-NP -in IN B-PP -Los NNP B-NP -Angeles NNP I-NP -. . O - -The DT B-NP -court NN I-NP -is VBZ B-VP -called VBN I-VP -the DT B-NP -Management NNP I-NP -Appeals NNP I-NP -Committee NNP I-NP -, , O -or CC O -just RB B-NP -`` `` I-NP -MAC NNP I-NP -, , O -'' '' O -and CC O -it PRP B-NP -is VBZ B-VP -likely JJ B-ADJP -to TO B-VP -hear VB I-VP -a DT B-NP -couple NN I-NP -of IN I-NP -dozen NN I-NP -cases VBZ I-NP -a DT B-NP -year NN I-NP -. . O - -Alter VB B-VP -some DT B-NP -details NNS I-NP -of IN B-PP -this DT B-NP -example NN I-NP -and CC O -it PRP B-NP -could MD B-VP -be VB I-VP -taking VBG I-VP -place NN B-NP -today NN B-ADVP -at IN B-PP -Federal NNP B-NP -Express NNP I-NP -in IN B-PP -Memphis NNP B-NP -, , O -the DT B-NP -Defense NNP I-NP -and CC I-NP -Underseas NNP I-NP -Systems NNP I-NP -divisions NNS I-NP -of IN B-PP -Honeywell NNP B-NP -in IN B-PP -Minneapolis NNP B-NP -, , O -a DT B-NP -General NNP I-NP -Electric NNP I-NP -plant NN I-NP -in IN B-PP -Columbia NNP B-NP -, , O -Md. NNP B-NP -, , O -or CC O -a DT B-NP -number NN I-NP -of IN B-PP -other JJ B-NP -companies NNS I-NP -. . O - -These DT B-NP -firms NNS I-NP -are VBP B-VP -pioneers NNS B-NP -in IN B-PP -a DT B-NP -significant JJ I-NP -new JJ I-NP -trend NN I-NP -in IN B-PP -the DT B-NP -corporate JJ I-NP -world NN I-NP -: : O -the DT B-NP -rise NN I-NP -of IN B-PP -what WP B-NP -I PRP B-NP -call VBP B-VP -corporate JJ B-NP -due JJ I-NP -process NN I-NP -. . O - -Although IN B-SBAR -corporate JJ B-NP -due JJ I-NP -process NN I-NP -is VBZ B-VP -practiced VBN I-VP -today NN B-NP -in IN B-PP -few JJ B-NP -companies NNS I-NP --- : O -perhaps RB B-ADVP -40 CD B-NP -to TO I-NP -60 CD I-NP --- : O -it PRP B-NP -is VBZ B-VP -one CD B-NP -of IN B-PP -the DT B-NP -fastest JJS I-NP -developing VBG I-NP -trends NNS I-NP -in IN B-PP -industry NN B-NP -. . O - -In IN B-PP -the DT B-NP -coming VBG I-NP -decade NN I-NP -a DT B-NP -majority NN I-NP -of IN B-PP -people-oriented JJ B-NP -companies NNS I-NP -are VBP B-VP -likely JJ B-ADJP -to TO B-VP -adopt VB I-VP -it PRP B-NP -. . O - -Corporate JJ B-NP -due JJ I-NP -process NN I-NP -appeals NNS B-VP -to TO B-PP -management NN B-NP -for IN B-PP -a DT B-NP -variety NN I-NP -of IN B-PP -reasons NNS B-NP -. . O - -It PRP B-NP -reduces VBZ B-VP -lawsuits NNS B-NP -from IN B-PP -disgruntled JJ B-NP -employees NNS I-NP -and CC I-NP -ex-employees NNS I-NP -, , O -with IN B-PP -all DT B-NP -that WDT B-NP -means VBZ B-VP -for IN B-PP -reduced VBN B-NP -legal JJ I-NP -costs NNS I-NP -and CC O -better RBR B-NP -public JJ I-NP -relations NNS I-NP -. . O - -It PRP B-NP -helps VBZ B-VP -to TO I-VP -keep VB I-VP -out IN B-PRT -unions NNS B-NP -. . O - -It PRP B-NP -increases VBZ B-VP -employee NN B-NP -commitment NN I-NP -to TO B-PP -the DT B-NP -company NN I-NP -, , O -with IN B-PP -all DT B-NP -that WDT B-NP -means VBZ B-VP -for IN B-PP -efficiency NN B-NP -and CC O -quality NN B-NP -control NN I-NP -. . O - -What WP B-NP -must MD O -your PRP$ B-NP -management NN I-NP -team NN I-NP -do VBP B-VP -to TO B-VP -establish VB I-VP -corporate JJ B-NP -due JJ I-NP -process NN I-NP -? . O - -Here RB B-ADVP -are VBP B-VP -four CD B-NP -key JJ I-NP -steps NNS I-NP -: : O - -1 CD B-LST -. . O -Make VB B-VP -sure JJ B-ADJP -you PRP B-NP -have VBP B-VP -a DT B-NP -strong JJ I-NP -personnel NNS I-NP -department NN I-NP -. . O - -It PRP B-NP -must MD B-VP -be VB I-VP -able JJ B-ADJP -to TO B-VP -handle VB I-VP -most RBS B-NP -of IN B-PP -the DT B-NP -complaints NNS I-NP -that WDT B-NP -can MD B-VP -not RB I-VP -be VB I-VP -solved VBN I-VP -in IN B-PP -the DT B-NP -trenches NNS I-NP -by IN B-PP -managers NNS B-NP -and CC O -their PRP$ B-NP -subordinates NNS I-NP -, , O -else RB B-ADVP -the DT B-NP -company NN I-NP -court NN I-NP -or CC I-NP -adjudicators NNS I-NP -will MD B-VP -be VB B-VP -inundated VBN I-VP -with IN B-PP -cases NNS B-NP -. . O - -At IN B-PP -Polaroid NNP B-NP -, , O -the DT B-NP -Personnel NNP I-NP -Policy NNP I-NP -Planning NNP I-NP -Committee NNP I-NP -may MD B-VP -hear VB I-VP -only RB B-NP -about IN I-NP -20 CD I-NP -cases VBZ I-NP -a DT B-NP -year NN I-NP -; : O -the DT B-NP -rest NN I-NP -of IN B-PP -the DT B-NP -many JJ I-NP -hundreds NNS I-NP -of IN B-PP -complaints NNS B-NP -are VBP B-VP -resolved VBN I-VP -at IN B-PP -earlier JJR B-NP -stages NNS I-NP -. . O - -At IN B-PP -TWA NNP B-NP -, , O -the DT B-NP -System NNP I-NP -Board NNP I-NP -of IN B-PP -Adjustment NNP B-NP -hears VBZ B-VP -50 CD B-NP -to TO I-NP -75 CD I-NP -cases VBZ I-NP -a DT B-NP -year NN I-NP -, , O -only RB B-NP -a DT I-NP -fraction NN I-NP -of IN B-PP -the DT B-NP -complaints NNS I-NP -brought VBN B-VP -to TO B-PP -personnel NNS B-NP -specialists NNS I-NP -. . O - -At IN B-PP -Citicorp NNP B-NP -, , O -the DT B-NP -Problem NNP I-NP -Review NNP I-NP -Board NNP I-NP -may MD B-VP -hear VB I-VP -only RB B-NP -12 CD I-NP -or CC I-NP -so RB I-NP -cases VBZ I-NP -because IN B-PP -of IN I-PP -personnel NNS B-NP -'s POS B-NP -skill NN I-NP -in IN B-PP -complaint-resolution NN B-NP -. . O - -In IN B-PP -a DT B-NP -typical JJ I-NP -year NN I-NP -, , O -up IN B-NP -to TO I-NP -20 CD I-NP -% NN I-NP -of IN B-PP -the DT B-NP -work NN I-NP -force NN I-NP -goes VBZ B-VP -to TO B-PP -personnel NNS B-NP -specialists NNS I-NP -with IN B-PP -complaints NNS B-NP -of IN B-PP -unfair JJ B-NP -treatment NN I-NP -. . O - -In IN B-PP -a DT B-NP -large JJ I-NP -company NN I-NP -that WDT B-NP -means VBZ B-VP -many JJ B-NP -hundreds NNS I-NP -of IN B-PP -complaints NNS B-NP -for IN B-PP -personnel NNS B-NP -to TO B-VP -handle VB I-VP -. . O - -2 CD B-LST -. . O -Formally RB B-ADVP -or CC I-ADVP -informally RB I-ADVP -, , O -train NN B-VP -all DT B-NP -your PRP$ I-NP -managers NNS I-NP -and CC I-NP -supervisors NNS I-NP -in IN B-PP -the DT B-NP -company NN I-NP -'s POS B-NP -due-process NN I-NP -approach NN I-NP -. . O - -See VB B-VP -that IN B-SBAR -they PRP B-NP -know VBP B-VP -company NN B-NP -personnel NNS I-NP -policy NN I-NP -backwards RB B-ADVP -and CC I-ADVP -forwards RB I-ADVP -, , O -for IN O -it PRP B-NP -is VBZ B-VP -the DT B-NP -`` `` I-NP -law NN I-NP -'' '' O -governing VBG B-VP -company NN B-NP -courts NNS I-NP -and CC I-NP -adjudicators NNS I-NP -. . O - -Coach NNP B-VP -them PRP B-NP -in IN B-PP -handling NN B-VP -complaints NNS B-NP -so RB B-SBAR -that IN I-SBAR -they PRP B-NP -can MD B-VP -resolve VB I-VP -problems NNS B-NP -immediately RB B-ADVP -. . O - -In IN B-SBAR -case NN O -managers NNS B-NP -and CC O -personnel NNS B-NP -specialists NNS I-NP -are VBP B-VP -unsuccessful JJ B-ADJP -and CC O -subordinates NNS B-NP -take VBP B-VP -their PRP$ B-NP -complaints NNS I-NP -to TO B-PP -a DT B-NP -company NN I-NP -court NN I-NP -or CC I-NP -adjudicator NN I-NP -, , O -teach VB B-VP -managers NNS B-NP -to TO B-VP -accept VB I-VP -reversals NNS B-NP -as IN B-PP -a DT B-NP -fact NN I-NP -of IN B-PP -business NN B-NP -life NN I-NP -, , O -for IN O -in IN B-PP -a DT B-NP -good JJ I-NP -due-process NN I-NP -system NN I-NP -they PRP B-NP -are VBP B-VP -bound VBN I-VP -to TO I-VP -happen VB I-VP -. . O - -In IN B-PP -the DT B-NP -15 CD I-NP -companies NNS I-NP -I PRP B-NP -studied VBD B-VP -, , O -reversal NN B-NP -rates NNS I-NP -range VBP B-VP -on IN B-PP -the DT B-NP -average NN I-NP -from IN B-PP -20 CD B-NP -% NN I-NP -to TO B-PP -40 CD B-NP -% NN I-NP -. . O - -3 CD B-LST -. . O -Decide VB B-VP -whether IN O -you PRP B-NP -want VBP B-VP -a DT B-NP -panel NN I-NP -system NN I-NP -or CC O -a DT B-NP -single JJ I-NP -adjudicator NN I-NP -. . O - -A DT B-NP -panel NN I-NP -system NN I-NP -like IN B-PP -that DT B-NP -in NN B-PP -the DT B-NP -Bob NNP I-NP -Stone NNP I-NP -example NN I-NP -enjoys VBZ B-VP -such JJ B-NP -advantages NNS I-NP -as IN B-PP -high JJ B-NP -credibility NN I-NP -and CC O -, , O -for IN B-PP -the DT B-NP -panelists NNS I-NP -, , O -mutual JJ B-NP -support NN I-NP -. . O - -An DT B-NP -adjudicator NN I-NP -system NN I-NP --- : O -that DT B-INTJ -is VBZ I-INTJ -, , O -an DT B-NP -investigator NN I-NP -who WP B-NP -acts VBZ B-VP -first JJ B-ADVP -as IN B-PP -a DT B-NP -fact-finder NN I-NP -and CC O -then RB O -switches VBZ B-VP -hats NNS B-NP -and CC O -arbitrates VBZ B-VP -the DT B-NP -facts NNS I-NP --- : O -has VBZ B-VP -such JJ B-NP -advantages NNS I-NP -as IN B-PP -speed NN B-NP -, , O -flexibility NN B-NP -and CC O -maximum JJ B-NP -privacy NN I-NP -. . O - -International NNP B-NP -Business NNP I-NP -Machines NNPS I-NP -and CC O -Bank NNP B-NP -of IN B-PP -America NNP B-NP -are VBP B-VP -among IN B-PP -the DT B-NP -companies NNS I-NP -using VBG B-VP -the DT B-NP -single-adjudicator JJ I-NP -approach NN I-NP -. . O - -4 CD B-LST -. . O -Make VB B-VP -your PRP$ B-NP -due-process NN I-NP -system NN I-NP -visible JJ B-ADJP -. . O - -It PRP B-NP -wo MD B-VP -n't RB I-VP -do VB I-VP -any DT B-NP -good NN I-NP -for IN B-PP -anybody NN B-NP -unless IN B-SBAR -employees NNS B-NP -know VBP B-VP -about IN B-PP -it PRP B-NP -. . O - -Most JJS B-NP -managements NNS I-NP -hesitate VBP B-VP -to TO I-VP -go VB I-VP -all DT B-ADVP -out NN I-ADVP -in IN B-PP -advertising VBG B-VP -their PRP$ B-NP -due-process NN I-NP -systems NNS I-NP -for IN B-PP -fear NN B-NP -of IN B-PP -encouraging VBG B-VP -cranks NNS B-NP -and CC O -chronic JJ B-NP -soreheads NNS I-NP -to TO B-VP -file VB I-VP -complaints NNS B-NP -. . O - -On IN B-PP -the DT B-NP -other JJ I-NP -hand NN I-NP -, , O -they PRP B-NP -make VBP B-VP -sure JJ B-ADJP -at IN B-PP -a DT B-NP -minimum NN I-NP -that IN B-SBAR -their PRP$ B-NP -systems NNS I-NP -are VBP B-VP -described VBN I-VP -in IN B-PP -their PRP$ B-NP -employee NN I-NP -handbooks NNS I-NP -and CC O -talked VBD B-VP -up IN B-PRT -by IN B-PP -personnel NNS B-NP -specialists NNS I-NP -. . O - -Smith-Kline NNP B-NP -Beecham NNP I-NP -goes VBZ B-VP -further JJ B-ADVP -and CC O -sometimes RB B-VP -features VBZ I-VP -its PRP$ B-NP -grievance NN I-NP -procedure NN I-NP -in IN B-PP -closed-circuit JJ B-NP -TV NN I-NP -programs NNS I-NP -. . O - -Naturally RB B-ADVP -, , O -one CD B-NP -of IN B-PP -the DT B-NP -best JJS I-NP -ways NNS I-NP -to TO B-VP -guarantee VB I-VP -visibility NN B-NP -for IN B-PP -your PRP$ B-NP -due-process NN I-NP -system NN I-NP -is VBZ B-VP -for IN B-SBAR -top JJ B-NP -management NN I-NP -to TO B-VP -support VB I-VP -it PRP B-NP -. . O - -At IN B-PP -IBM NNP B-NP -, , O -the DT B-NP -company NN I-NP -'s POS B-NP -Open NNP I-NP -Door NNP I-NP -system NN I-NP -is VBZ B-VP -sometimes RB B-ADVP -the DT B-NP -subject NN I-NP -of IN B-PP -memorandums NNS B-NP -from IN B-PP -the DT B-NP -chief JJ I-NP -executive NN I-NP -. . O - -Federal NNP B-NP -Express NNP I-NP -goes VBZ B-VP -further JJ B-ADVP -in IN B-PP -this DT B-NP -respect NN I-NP -than IN B-PP -any DT B-NP -company NN I-NP -I PRP B-NP -know VBP B-VP -of IN B-PP -with IN B-PP -both DT B-NP -Frederick NNP B-NP -Smith NNP I-NP -and CC O -James NNP B-NP -Barksdale NNP I-NP -, , O -chief JJ B-NP -executive NN I-NP -and CC O -chief JJ B-NP -operating VBG I-NP -officer NN I-NP -, , O -respectively RB B-ADVP -, , O -sitting VBG B-VP -in IN B-PRT -on IN B-PP -the DT B-NP -Appeals NNP I-NP -Board NNP I-NP -almost RB B-NP -every DT I-NP -Tuesday NNP I-NP -to TO B-VP -decide VB I-VP -cases NNS B-NP -. . O - -Mr. NNP B-NP -Ewing NNP I-NP -is VBZ B-VP -a DT B-NP -consultant NN I-NP -based VBN B-VP -in IN B-PP -Winchester NNP B-NP -, , O -Mass. NNP B-NP -, , O -and CC O -author NN B-NP -of IN B-PP -`` `` O -Justice NNP B-NP -on IN B-PP -the DT B-NP -Job NNP I-NP -: : O -Resolving NNP B-VP -Grievances NNP B-NP -in IN B-PP -the DT B-NP -Nonunion NNP I-NP -Workplace NN I-NP -'' '' O --LRB- ( O -Harvard NNP B-NP -Business NNP I-NP -School NNP I-NP -Press NNP I-NP -, , O -1989 CD B-NP --RRB- ) O -. . O - -Tokyo NNP B-NP -stocks NNS I-NP -closed VBD B-VP -higher JJR B-ADVP -in IN B-PP -active JJ B-NP -trading NN I-NP -Friday NNP B-NP -, , O -marking VBG B-VP -the DT B-NP -fourth JJ I-NP -consecutive JJ I-NP -daily JJ I-NP -gain NN I-NP -since IN B-PP -Monday NNP B-NP -'s POS B-NP -sharp JJ I-NP -fall NN I-NP -. . O - -London JJ B-NP -shares NNS I-NP -closed VBD B-VP -moderately RB B-ADVP -lower JJR I-ADVP -in IN B-PP -thin JJ B-NP -trading NN I-NP -. . O - -At IN B-PP -Tokyo NNP B-NP -, , O -the DT B-NP -Nikkei NNP I-NP -index NN I-NP -of IN B-PP -225 CD B-NP -selected VBN I-NP -issues NNS I-NP -was VBD B-VP -up IN B-ADVP -112.16 CD B-NP -points NNS I-NP -to TO B-PP -35486.38 CD B-NP -. . O - -The DT B-NP -index NN I-NP -advanced VBD B-VP -266.66 CD B-NP -points NNS I-NP -Thursday NNP B-NP -. . O - -In IN B-PP -early JJ B-NP -trading NN I-NP -in IN B-PP -Tokyo NNP B-NP -Monday NNP B-NP -, , O -the DT B-NP -Nikkei NNP I-NP -index NN I-NP -rose VBD B-VP -101.98 CD B-NP -points NNS I-NP -to TO B-PP -35588.36 CD B-NP -. . O - -Friday NNP B-NP -'s POS B-NP -volume NN I-NP -on IN B-PP -the DT B-NP -First NNP I-NP -Section NN I-NP -was VBD B-VP -estimated VBN I-VP -at IN B-PP -one CD B-NP -billion CD I-NP -shares NNS I-NP -, , O -up IN B-ADVP -from IN B-PP -862 CD B-NP -million CD I-NP -Thursday NNP B-NP -. . O - -Winners NNS B-NP -outpaced VBD B-VP -losers NNS B-NP -, , O -572 CD B-ADVP -to TO I-ADVP -368 CD I-ADVP -, , O -while IN B-SBAR -181 CD B-NP -issues NNS I-NP -remained VBD B-VP -unchanged JJ B-ADJP -. . O - -With IN B-SBAR -investors NNS B-NP -relieved VBN B-ADJP -at IN B-PP -the DT B-NP -overnight JJ I-NP -gain NN I-NP -in IN B-PP -New NNP B-NP -York NNP I-NP -stocks NNS I-NP -, , O -small-lot JJ B-NP -buying NN I-NP -orders NNS I-NP -streamed VBD B-VP -into IN B-PP -the DT B-NP -market NN I-NP -from IN B-PP -early JJ B-NP -morning NN I-NP -, , O -making VBG B-VP -traders NNS B-NP -believe VBP B-VP -the DT B-NP -market NN I-NP -was VBD B-VP -back RB B-ADVP -to TO B-PP -normal JJ B-NP -. . O - -The DT B-NP -Nikkei NNP I-NP -, , O -which WDT B-NP -reached VBD B-VP -as RB B-ADJP -high JJ I-ADJP -as IN B-PP -35611.38 CD B-NP -right NN B-ADVP -after IN B-PP -the DT B-NP -opening NN I-NP -, , O -surrendered VBD B-VP -part NN B-NP -of IN B-PP -its PRP$ B-NP -early JJ I-NP -advance NN I-NP -toward IN B-PP -the DT B-NP -end NN I-NP -of IN B-PP -the DT B-NP -day NN I-NP -because IN B-PP -of IN I-PP -profit-taking NN B-NP -. . O - -`` `` O -Investors NNS B-NP -, , B-NP -especially RB I-NP -dealers NNS B-NP -, , O -do VBP B-VP -n't RB I-VP -want VB I-VP -to TO I-VP -hold VB I-VP -a DT B-NP -position NN I-NP -over IN B-PP -the DT B-NP -weekend NN I-NP -, , O -'' '' O -a DT B-NP -trader NN I-NP -at IN B-PP -Dai-ichi NNP B-NP -Securities NNP I-NP -said VBD B-VP -, , O -adding VBG B-VP -, , O -though RB B-ADVP -, , O -that IN B-SBAR -the DT B-NP -trading NN I-NP -mood NN I-NP -remained VBD B-VP -positive JJ B-ADJP -through IN B-PP -the DT B-NP -afternoon NN I-NP -session NN I-NP -. . O - -The DT B-NP -Tokyo NNP I-NP -Stock NNP I-NP -Price NNP I-NP -Index NNP I-NP --LRB- ( O -Topix NNP B-NP --RRB- ) O -of IN B-PP -all DT B-NP -issues NNS I-NP -listed VBN B-VP -in IN B-PP -the DT B-NP -First NNP I-NP -Section NN I-NP -, , O -which WDT B-NP -gained VBD B-VP -22.78 CD B-NP -points NNS I-NP -Thursday NNP B-NP -, , O -was VBD B-VP -up IN B-ADVP -14.06 CD B-NP -points NNS I-NP -, , O -or CC O -0.53 CD B-NP -% NN I-NP -, , O -at IN B-PP -2679.72 CD B-NP -. . O - -The DT B-NP -Second JJ I-NP -Section NN I-NP -index NN I-NP -, , O -which WDT B-NP -rose VBD B-VP -15.72 CD B-NP -points NNS I-NP -Thursday NNP B-NP -, , O -was VBD B-VP -up IN B-ADVP -11.88 CD B-NP -points NNS I-NP -, , O -or CC O -0.32 CD B-NP -% NN I-NP -, , O -to TO B-VP -close VB I-VP -at IN B-PP -3717.46 CD B-NP -. . O - -Volume NN B-NP -in IN B-PP -the DT B-NP -second JJ I-NP -section NN I-NP -was VBD B-VP -estimated VBN I-VP -at IN B-PP -30 CD B-NP -million CD I-NP -shares NNS I-NP -, , O -up IN B-ADVP -from IN B-PP -28 CD B-NP -million CD I-NP -Thursday NNP B-NP -. . O - -In IN B-PP -turmoil NN B-NP -caused VBN B-VP -by IN B-PP -the DT O -previous JJ B-NP -Friday NNP I-NP -'s POS B-NP -plunge NN I-NP -in IN B-PP -New NNP B-NP -York NNP I-NP -stocks NNS I-NP -, , O -the DT B-NP -Nikkei NNP I-NP -marked VBD B-VP -a DT B-NP -sharp JJ I-NP -647.33-point JJ I-NP -fall NN I-NP -Monday NNP B-NP -. . O - -But CC O -the DT B-NP -Nikkei NNP I-NP -fell VBD B-VP -an DT B-NP -overall JJ I-NP -1.8 CD I-NP -% NN I-NP -in IN B-PP -value NN B-NP -that DT B-NP -day NN I-NP -compared VBN B-PP -with IN B-PP -Wall NNP B-NP -Street NNP I-NP -'s POS I-NP -far RB B-ADJP -sharper JJR I-ADJP -6.9 CD B-ADJP -% NN I-ADJP -drop NN B-NP -on IN B-PP -Oct. NNP B-NP -13 CD I-NP -. . O - -The DT B-NP -Tokyo NNP I-NP -market NN I-NP -'s POS B-NP -resiliency NN I-NP -helped VBD B-VP -participants NNS B-NP -to TO B-VP -regain VB I-VP -confidence NN B-NP -gradually RB B-ADVP -as IN B-SBAR -they PRP B-NP -spent VBD B-VP -more JJR B-NP -time NN I-NP -on IN B-PP -analyzing VBG B-VP -factors NNS B-NP -that WDT B-NP -caused VBD B-VP -the DT B-NP -Friday NNP I-NP -plunge NN I-NP -and CC O -realized VBD B-VP -these DT B-NP -problems NNS I-NP -were VBD B-VP -unique JJ B-ADJP -to TO B-PP -New NNP B-NP -York NNP I-NP -stocks NNS I-NP -and CC B-ADJP -not RB I-ADJP -directly RB B-ADJP -related VBN I-ADJP -to TO B-PP -Tokyo NNP B-NP -. . O - -The DT B-NP -Nikkei NNP I-NP -continued VBD B-VP -to TO I-VP -gain VB I-VP -for IN B-PP -the DT B-NP -rest NN I-NP -of IN B-PP -the DT B-NP -week NN I-NP -, , O -adding VBG B-VP -1017.69 CD B-NP -points NNS I-NP -in IN B-PP -four CD B-NP -days NNS I-NP --- : O -more JJR B-VP -than IN I-VP -erasing VBG I-VP -Monday NNP B-NP -'s POS B-NP -losses NNS I-NP -. . O - -But CC O -further JJ B-NP -major JJ I-NP -advances NNS I-NP -on IN B-PP -the DT B-NP -Nikkei NNP I-NP -are VBP B-VP -n't RB I-VP -foreseen VBN I-VP -this DT B-NP -week NN I-NP -by IN B-PP -market NN B-NP -observers NNS I-NP -. . O - -Investors NNS B-NP -are VBP B-VP -still RB I-VP -waiting VBG I-VP -to TO I-VP -see VB I-VP -how WRB B-ADVP -the DT B-NP -U.S. NNP I-NP -government NN I-NP -will MD B-VP -decide VB I-VP -on IN B-PP -interest NN B-NP -rates NNS I-NP -and CC O -how WRB B-ADVP -the DT B-NP -dollar NN I-NP -will MD B-VP -be VB I-VP -stabilized VBN I-VP -. . O - -Some DT B-NP -high-priced JJ I-NP -issues NNS I-NP -made VBD B-VP -a DT B-NP -comeback NN I-NP -Friday NNP B-NP -. . O - -Pioneer NNP B-NP -surged VBD B-VP -450 CD B-NP -yen NN I-NP --LRB- ( O -$ $ B-NP -3.16 CD I-NP --RRB- ) O -to TO B-PP -6,050 CD B-NP -yen NN I-NP --LRB- ( O -$ $ B-NP -42.60 CD I-NP --RRB- ) O -. . O - -Kyocera NNP B-NP -advanced VBD B-VP -80 CD B-NP -yen NN I-NP -to TO B-PP -5,440 CD B-NP -. . O - -Fanuc NNP B-NP -gained VBD B-VP -100 CD B-NP -to TO B-PP -7,580 CD B-NP -. . O - -Breweries NNP B-NP -attracted VBD B-VP -investors NNS B-NP -because IN B-PP -of IN I-PP -their PRP$ B-NP -land NN I-NP -property NN I-NP -holdings NNS I-NP -that WDT B-NP -could MD B-VP -figure VB I-VP -in IN B-PP -development NN B-NP -or CC O -other JJ B-NP -plans NNS I-NP -, , O -traders NNS B-NP -said VBD B-VP -. . O - -Sapporo NNP B-NP -gained VBD B-VP -80 CD B-NP -to TO B-PP -1,920 CD B-NP -and CC O -Kirin NNP B-NP -added VBD B-VP -60 CD B-NP -to TO B-PP -2,070 CD B-NP -. . O - -Housings NNS B-NP -, , I-NP -constructions NNS I-NP -and CC I-NP -pharmaceuticals NNS I-NP -continued VBD B-VP -to TO I-VP -be VB I-VP -bought VBN I-VP -following VBG B-PP -Thursday NNP B-NP -'s POS B-NP -gains NNS I-NP -because IN B-PP -of IN I-PP -strong JJ B-NP -earnings NNS I-NP -outlooks NNS I-NP -. . O - -Daiwa NNP B-NP -House NNP I-NP -gained VBD B-VP -50 CD B-NP -to TO B-PP -2,660 CD B-NP -. . O - -Misawa NNP B-NP -Homes NNP I-NP -was VBD B-VP -up IN B-ADVP -20 CD B-NP -at IN B-PP -2,960 CD B-NP -. . O - -Kajima NNP B-NP -advanced VBD B-VP -40 CD B-NP -to TO B-PP -2,120 CD B-NP -and CC O -Ohbayashi NNP B-NP -added VBD B-VP -50 CD B-NP -to TO B-PP -1,730 CD B-NP -. . O - -Fujisawa NNP B-NP -added VBD B-VP -80 CD B-NP -to TO B-PP -2,010 CD B-NP -and CC O -Mochida NNP B-NP -advanced VBD B-VP -230 CD B-NP -to TO B-PP -4,400 CD B-NP -. . O - -London JJ B-NP -share NN I-NP -prices NNS I-NP -were VBD B-VP -influenced VBN I-VP -largely RB B-ADVP -by IN B-PP -declines NNS B-NP -on IN B-PP -Wall NNP B-NP -Street NNP I-NP -and CC O -weakness NN B-NP -in IN B-PP -the DT B-NP -British JJ I-NP -pound NN I-NP -. . O - -The DT B-NP -key JJ I-NP -Financial NNP I-NP -Times-Stock NNP I-NP -Exchange NNP I-NP -100-share JJ I-NP -index NN I-NP -ended VBD B-VP -10.2 CD B-NP -points NNS I-NP -lower JJR B-ADVP -at IN B-PP -2179.1 CD B-NP -, , O -above IN B-ADVP -its PRP$ B-NP -intraday JJ I-NP -low NN I-NP -of IN B-PP -2176.9 CD B-NP -, , B-ADVP -but CC I-ADVP -off IN B-ADVP -the DT B-NP -day NN I-NP -'s POS I-NP -high NN B-NP -of IN B-PP -2189 CD B-NP -. . O - -The DT B-NP -index NN I-NP -finished VBD B-VP -2.4 CD B-NP -% NN I-NP -under IN B-PP -its PRP$ B-NP -close NN I-NP -of IN B-PP -2233.9 CD B-NP -the DT B-NP -previous JJ I-NP -Friday NNP I-NP -, , O -although IN B-SBAR -it PRP B-NP -recouped VBD B-VP -some DT B-NP -of IN B-PP -the DT B-NP -sharp JJ I-NP -losses NNS I-NP -staged VBD B-VP -early JJ B-NP -last JJ I-NP -week NN I-NP -on IN B-PP -the DT B-NP -back RB I-NP -of IN B-PP -Wall NNP B-NP -Street NNP I-NP -'s POS B-NP -fall NN I-NP -. . O - -London NNP B-NP -was VBD B-VP -weak JJ B-ADJP -throughout IN B-PP -Friday NNP B-NP -'s POS B-NP -trading NN I-NP -, , O -however RB B-ADVP -, , O -on IN B-PP -what WP B-NP -dealers NNS B-NP -attributed VBD B-VP -to TO B-PP -generally RB B-NP -thin JJ I-NP -interest NN I-NP -ahead RB B-ADVP -of IN B-PP -the DT B-NP -weekend NN I-NP -and CC O -this DT B-NP -week NN I-NP -'s POS I-NP -potentially RB B-ADJP -important JJ I-ADJP -U.K. NNP B-NP -trade NN I-NP -figures NNS I-NP -for IN B-PP -September NNP B-NP -. . O - -The DT B-NP -FT-SE NNP I-NP -100 CD I-NP -largely RB B-ADVP -remained VBD B-VP -within IN B-PP -an DT B-NP -11-point JJ I-NP -range NN I-NP -establshed VBN B-VP -within IN B-PP -the DT B-NP -first JJ I-NP -hour NN I-NP -of IN B-PP -trading NN B-NP -before IN B-PP -it PRP B-NP -eased VBD B-VP -to TO B-PP -an DT B-NP -intraday JJ I-NP -low JJ I-NP -late RB B-ADVP -in IN B-PP -the DT B-NP -session NN I-NP -when WRB B-ADVP -a DT B-NP -flurry NN I-NP -of IN B-PP -program NN B-NP -selling VBG I-NP -pushed VBN B-VP -Wall NNP B-NP -Street NNP I-NP -lower JJR B-ADVP -. . O - -The DT B-NP -FT NNP I-NP -30-share JJ I-NP -index NN I-NP -closed VBD B-VP -11.0 CD B-NP -points NNS I-NP -lower JJR B-ADVP -at IN B-PP -1761.0 CD B-NP -. . O - -Volume NN B-NP -was VBD B-VP -extremely RB B-ADJP -thin JJ I-ADJP -at IN B-PP -351.3 CD B-NP -million CD I-NP -shares NNS I-NP -, , O -the DT B-NP -lightest JJS I-NP -volume NN I-NP -of IN B-PP -the DT B-NP -week NN I-NP -and CC O -modestly RB B-ADVP -under IN B-PP -Thursday NNP B-NP -'s POS B-NP -387.4 CD I-NP -million CD I-NP -shares NNS I-NP -. . O - -Dealers NNS B-NP -said VBD B-VP -the DT B-NP -day NN I-NP -'s POS B-NP -action NN I-NP -was VBD B-VP -featureless JJ B-ADJP -outside IN B-PP -some DT B-NP -response NN I-NP -to TO B-PP -sterling NN B-NP -'s POS B-NP -early JJ I-NP -weakness NN I-NP -against IN B-PP -the DT B-NP -mark NN I-NP -, , O -and CC O -fears NNS B-NP -that IN B-SBAR -Wall NNP B-NP -Street NNP I-NP -might MD B-VP -open RB I-VP -lower JJR B-ADVP -after IN B-PP -its PRP$ B-NP -strong JJ I-NP -leap NN I-NP -forward RB B-ADVP -Thursday NNP B-NP -. . O - -They PRP B-NP -added VBD B-VP -that IN B-SBAR -market-makers NNS B-NP -were VBD B-VP -largely RB I-VP -sidelined VBN I-VP -after IN B-PP -aggressively RB B-VP -supporting VBG I-VP -the DT B-NP -market NN I-NP -Thursday NNP B-NP -in IN B-PP -their PRP$ B-NP -quest NN I-NP -to TO B-VP -cover VB I-VP -internal JJ B-NP -shortages NNS I-NP -of IN B-PP -FT-SE NNP B-NP -100 CD I-NP -shares NNS I-NP -. . O - -Interest NN B-NP -may MD B-VP -remain VB I-VP -limited JJ B-ADJP -into IN B-PP -tomorrow NN B-NP -'s POS B-NP -U.K. NNP I-NP -trade NN I-NP -figures NNS I-NP -, , O -which WDT B-NP -the DT B-NP -market NN I-NP -will MD B-VP -be VB I-VP -watching VBG I-VP -closely RB B-ADVP -to TO B-VP -see VB I-VP -if IN B-SBAR -there EX B-NP -is VBZ B-VP -any DT B-NP -improvement NN I-NP -after IN B-PP -disappointing JJ B-NP -numbers NNS I-NP -in IN B-PP -the DT B-NP -previous JJ I-NP -two CD I-NP -months NNS I-NP -. . O - -The DT B-NP -key JJ I-NP -corporate JJ I-NP -news NN I-NP -of IN B-PP -the DT B-NP -day NN I-NP -was VBD B-VP -that IN B-SBAR -British JJ B-NP -Airways NNPS I-NP -decided VBD B-VP -to TO I-VP -withdraw VB I-VP -from IN B-PP -a DT B-NP -management-led JJ I-NP -bid NN I-NP -for IN B-PP -UAL NNP B-NP -Corp. NNP I-NP -, , O -the DT B-NP -parent NN I-NP -of IN B-PP -United NNP B-NP -Airlines NNPS I-NP -. . O - -British JJ B-NP -Airways NNPS I-NP -rose VBD B-VP -initially RB B-ADVP -after IN B-PP -announcing VBG B-VP -its PRP$ B-NP -withdrawal NN I-NP -from IN B-PP -the DT B-NP -UAL NNP I-NP -deal NN I-NP -. . O - -Dealers NNS B-NP -said VBD B-VP -they PRP B-NP -viewed VBD B-VP -the DT O -initial JJ O -# # O -390-million CD O --LRB- ( O -$ $ B-ADJP -622 CD O -million CD O --RRB- ) O -outlay NN B-NP -for IN B-PP -a DT B-NP -15 CD I-NP -% NN I-NP -stake NN I-NP -in IN B-PP -the DT B-NP -airline NN I-NP -as IN B-PP -a DT B-NP -bit NN I-NP -much JJ I-NP -. . O - -Its PRP$ B-NP -shares NNS I-NP -slid VBD B-VP -in IN B-PP -late JJ B-NP -dealings NNS I-NP -to TO B-VP -close VB I-VP -a DT B-NP -penny NN I-NP -per IN B-PP -share NN B-NP -lower JJR B-ADVP -at IN B-PP -197 CD B-NP -pence NN I-NP -. . O - -The DT B-NP -airline NN I-NP -was VBD B-VP -the DT B-NP -most RBS I-NP -active JJ I-NP -FT-SE NNP I-NP -100 CD I-NP -at IN B-PP -8.2 CD B-NP -million CD I-NP -shares NNS I-NP -traded VBN B-VP -. . O - -The DT B-NP -next JJ I-NP -most RBS I-NP -active JJ I-NP -top-tier JJ I-NP -stock NN I-NP -was VBD B-VP -B.A.T NNP B-NP -Industries NNPS I-NP -, , O -the DT B-NP -target NN I-NP -of IN B-PP -Sir NNP B-NP -James NNP I-NP -Goldsmith NNP I-NP -'s POS B-NP -# # B-ADJP -13.4 CD O -billion CD O -bid NN B-NP -. . O - -The DT B-NP -company NN I-NP -gained VBD B-VP -shareholder NN B-NP -approval NN I-NP -Thursday NNP B-NP -to TO B-VP -restructure VB I-VP -in IN B-PP -a DT B-NP -bid NN I-NP -to TO B-VP -fend VB I-VP -off IN B-PRT -the DT B-NP -hostile JJ I-NP -takeover NN I-NP -. . O - -Sir NNP B-NP -James NNP I-NP -said VBD B-VP -Thursday NNP B-NP -night NN I-NP -that IN B-SBAR -his PRP$ B-NP -plans NNS I-NP -for IN B-PP -the DT B-NP -takeover NN I-NP -had VBD B-VP -n't RB I-VP -changed VBN I-VP -. . O - -B.A.T NNP B-NP -ended VBD B-VP -the DT B-NP -day NN I-NP -at IN B-PP -778 CD B-NP -, , O -down JJ B-ADVP -5 NN B-NP -, , O -on IN B-PP -turnover NN B-NP -of IN B-PP -7.5 CD B-NP -million CD I-NP -shares NNS I-NP -. . O - -Dealers NNS B-NP -said VBD B-VP -it PRP B-NP -was VBD B-VP -hit VBN I-VP -by IN B-PP -some DT B-NP -profit-taking NN I-NP -after IN B-PP -gains NNS B-NP -since IN B-PP -mid-week NN B-NP -. . O - -In IN B-PP -other JJ B-NP -active JJ I-NP -shares NNS I-NP -, , O -Trusthouse NNP B-NP -Forte NNP I-NP -shed VB B-VP -10 CD B-NP -to TO B-PP -294 CD B-NP -on IN B-PP -volume NN B-NP -of IN B-PP -6.4 CD B-NP -million CD I-NP -shares NNS I-NP -after IN B-PP -a DT B-NP -Barclays NNP I-NP -De NNP I-NP -Zoete NNP I-NP -Wedd NNP I-NP -downgrading NN I-NP -, , O -while IN B-SBAR -Hillsdown NNP B-NP -Holdings NNP I-NP -, , O -a DT B-NP -food NN I-NP -products NNS I-NP -concern VBP I-NP -, , O -was VBD B-VP -boosted VBN I-VP -2 CD B-NP -to TO B-PP -271 CD B-NP -after IN O -it PRP B-NP -disclosed VBD B-VP -it PRP B-NP -would MD B-VP -seek VB I-VP -shareholder NN B-NP -approval NN I-NP -to TO B-VP -begin VB I-VP -share NN B-NP -repurchases NNS I-NP -. . O - -Elsewhere RB B-ADVP -in IN B-PP -Europe NNP B-NP -, , O -share NN B-NP -prices NNS I-NP -closed VBD B-VP -higher JJR B-ADVP -in IN B-PP -Stockholm NNP B-NP -, , I-NP -Brussels NNP I-NP -and CC I-NP -Milan NNP I-NP -. . O - -Prices NNS B-NP -were VBD B-VP -lower JJR B-ADJP -in IN B-PP -Frankfurt NNP B-NP -, , I-NP -Zurich NNP I-NP -, , I-NP -Paris NNP I-NP -and CC I-NP -Amsterdam NNP I-NP -. . O - -South JJ B-NP -African JJ I-NP -gold NN I-NP -stocks NNS I-NP -closed VBD B-VP -moderately RB B-ADVP -lower JJR I-ADVP -. . O - -Share NN B-NP -prices NNS I-NP -closed VBD B-VP -higher JJR B-ADVP -in IN B-PP -Sydney NNP B-NP -, , O -Taipei NNP B-NP -, , O -Wellington NNP B-NP -, , O -Manila NNP B-NP -, , O -Hong NNP B-NP -Kong NNP I-NP -and CC O -Singapore NNP B-NP -and CC O -were VBD B-VP -lower JJR B-ADJP -in IN B-PP -Seoul NNP B-NP -. . O - -Here RB B-ADVP -are VBP B-VP -price NN B-NP -trends NNS I-NP -on IN B-PP -the DT B-NP -world NN I-NP -'s POS B-NP -major JJ I-NP -stock NN I-NP -markets NNS I-NP -, , O -as IN B-SBAR -calculated VBN B-VP -by IN B-PP -Morgan NNP B-NP -Stanley NNP I-NP -Capital NNP I-NP -International NNP I-NP -Perspective NNP I-NP -, , O -Geneva NNP B-NP -. . O - -To TO B-VP -make VB I-VP -them PRP B-NP -directly RB B-ADJP -comparable JJ I-ADJP -, , O -each DT B-NP -index NN I-NP -is VBZ B-VP -based VBN I-VP -on IN B-PP -the DT B-NP -close NN I-NP -of IN B-PP -1969 CD B-NP -equaling VBG B-VP -100 CD B-NP -. . O - -The DT B-NP -percentage NN I-NP -change NN I-NP -is VBZ B-VP -since IN B-PP -year-end NN B-NP -. . O - -The DT B-NP -U.S. NNP I-NP -is VBZ B-VP -required VBN I-VP -to TO I-VP -notify VB I-VP -foreign JJ B-NP -dictators NNS I-NP -if IN B-SBAR -it PRP B-NP -knows VBZ B-VP -of IN B-PP -coup NN B-NP -plans NNS I-NP -likely JJ B-ADJP -to TO B-VP -endanger VB I-VP -their PRP$ B-NP -lives NNS I-NP -, , O -government NN B-NP -officials NNS I-NP -said VBD B-VP -. . O - -The DT B-NP -notification NN I-NP -policy NN I-NP -was VBD B-VP -part NN B-NP -of IN B-PP -a DT B-NP -set NN I-NP -of IN B-PP -guidelines NNS B-NP -on IN B-PP -handling NN B-VP -coups NNS B-NP -outlined VBN B-VP -in IN B-PP -a DT B-NP -secret JJ I-NP -1988 CD I-NP -exchange NN I-NP -of IN B-PP -letters NNS B-NP -between IN B-PP -the DT B-NP -Reagan NNP I-NP -administration NN I-NP -and CC O -the DT B-NP -Senate NNP I-NP -Intelligence NNP I-NP -Committee NNP I-NP -. . O - -The DT B-NP -existence NN I-NP -of IN B-PP -the DT B-NP -guidelines NNS I-NP -has VBZ B-VP -become VBN I-VP -known VBN I-VP -since IN B-SBAR -President NNP B-NP -Bush NNP I-NP -disclosed VBD B-VP -them PRP B-NP -privately RB B-ADVP -to TO B-PP -seven CD B-NP -Republican NNP I-NP -senators NNS I-NP -at IN B-PP -a DT B-NP -White NNP I-NP -House NNP I-NP -meeting NN I-NP -last JJ B-NP -Monday NNP I-NP -. . O - -Officials NNS B-NP -familiar JJ B-ADJP -with IN B-PP -the DT B-NP -meeting NN I-NP -said VBD B-VP -Mr. NNP B-NP -Bush NNP I-NP -cited VBD B-VP -the DT B-NP -policy NN I-NP -as IN B-PP -an DT B-NP -example NN I-NP -of IN B-PP -the DT B-NP -sort NN I-NP -of IN B-PP -congressional JJ B-NP -requirements NNS I-NP -the DT B-NP -administration NN I-NP -contends VBZ B-VP -contribute VB B-VP -to TO B-PP -the DT B-NP -failure NN I-NP -of IN B-PP -such JJ B-NP -covert JJ I-NP -actions NNS I-NP -as IN B-PP -this DT B-NP -month NN I-NP -'s POS B-NP -futile JJ I-NP -effort NN I-NP -to TO B-VP -oust VB I-VP -Panamanian JJ B-NP -dictator NN I-NP -Manuel NNP I-NP -Noriega NNP I-NP -. . O - -According VBG B-PP -to TO B-PP -the DT B-NP -officials NNS I-NP -, , O -Mr. NNP B-NP -Bush NNP I-NP -even RB B-ADVP -read VB B-VP -to TO B-PP -the DT B-NP -senators NNS I-NP -selections NNS B-NP -from IN B-PP -a DT B-NP -highly RB I-NP -classified VBN I-NP -letter NN I-NP -from IN B-PP -the DT B-NP -committee NN I-NP -to TO B-PP -the DT B-NP -White NNP I-NP -House NNP I-NP -discussing VBG B-VP -the DT B-NP -guidelines NNS I-NP -. . O - -They PRP B-NP -said VBD B-VP -the DT B-NP -president NN I-NP -conceded VBD B-VP -the DT B-NP -notification NN I-NP -requirement NN I-NP -did VBD B-VP -n't RB I-VP -affect VB I-VP -his PRP$ B-NP -decision NN I-NP -to TO B-VP -lend VB I-VP -only RB B-NP -minor JJ I-NP -support NN I-NP -to TO B-PP -this DT B-NP -month NN I-NP -'s POS B-NP -Panama NNP I-NP -coup NN I-NP -effort NN I-NP -. . O - -No DT B-NP -notification NN I-NP -was VBD B-VP -ever RB I-VP -considered VBN I-VP -, , O -officials NNS B-NP -said VBD B-VP -, , O -apparently RB B-ADVP -because IN B-SBAR -the DT B-NP -U.S. NNP I-NP -did VBD B-VP -n't RB I-VP -think VB I-VP -the DT B-NP -coup NN I-NP -plotters NNS I-NP -intended VBN B-VP -to TO I-VP -kill VB I-VP -Mr. NNP B-NP -Noriega NNP I-NP -, , O -but CC O -merely RB B-VP -sought VBD I-VP -to TO I-VP -imprison VB I-VP -him PRP B-NP -. . O - -What WP B-NP -'s VBZ B-VP -more JJR B-NP -, , O -both DT B-NP -administration NN B-NP -and CC O -congressional JJ B-NP -officials NNS I-NP -hint VBP B-VP -that IN B-SBAR -the DT B-NP -notification NN I-NP -requirement NN I-NP -is VBZ B-VP -likely JJ B-ADJP -to TO B-VP -be VB I-VP -dropped VBN I-VP -from IN B-PP -the DT B-NP -guidelines NNS I-NP -on IN B-PP -coup NN B-NP -attempts NNS I-NP -that WDT B-NP -are VBP B-VP -being VBG I-VP -rewritten VBN I-VP -by IN B-PP -the DT B-NP -panel NN I-NP -and CC O -the DT B-NP -White NNP I-NP -House NNP I-NP -. . O - -The DT B-NP -rewriting VBG I-NP -was VBD B-VP -launched VBN I-VP -at IN B-PP -a DT B-NP -meeting NN I-NP -between IN B-PP -Mr. NNP B-NP -Bush NNP I-NP -and CC O -intelligence NN B-NP -committee NN I-NP -leaders NNS I-NP -Oct. NNP B-NP -12 CD I-NP -, , O -a DT B-NP -few JJ I-NP -days NNS I-NP -before IN B-PP -the DT B-NP -meeting NN I-NP -at IN B-PP -which WDT B-NP -the DT B-NP -president NN I-NP -complained VBD B-VP -about IN B-PP -the DT B-NP -rules NNS I-NP -. . O - -However RB B-ADVP -, , O -the DT B-NP -disclosure NN I-NP -of IN B-PP diff --git a/paddle/trainer/tests/train_files.txt b/paddle/trainer/tests/train_files.txt deleted file mode 100644 index 1c268914953ff090ae47c56051fcf1cad0e1707b..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/train_files.txt +++ /dev/null @@ -1 +0,0 @@ -trainer/tests/train_proto.bin diff --git a/paddle/trainer/tests/train_sparse.list b/paddle/trainer/tests/train_sparse.list deleted file mode 100644 index 6ea020e2202f8464f8a647cd96c84a9d17a03ae3..0000000000000000000000000000000000000000 --- a/paddle/trainer/tests/train_sparse.list +++ /dev/null @@ -1 +0,0 @@ -trainer/tests/compare_sparse_data diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 2cff25d09583f7f6b01b122f06795b28e0230bb6..64f381da51442e6675645f0e800eb82def2963dc 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -551,6 +551,10 @@ message LayerConfig { // for switch order layer optional ReshapeConfig reshape_conf = 59; + + // for batch normalization layer + // The small constant added to the variance to improve numeric stability. + optional double epsilon = 60 [ default = 0.00001 ]; } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index e1b9d526566f33cb2b850fe7d055fc1f1ef88325..953f2832f95318b8dab41392f44c5b7325bdb9ad 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1124,35 +1124,6 @@ def PyData(files=None, return data_config -@config_func -def ProtoData(files=None, - type=None, - file_group_queue_capacity=None, - load_file_count=None, - constant_slots=None, - load_thread_num=None, - **xargs): - data_config = create_data_config_proto(**xargs) - if type is None: - data_config.type = 'proto' - else: - data_config.type = type - data_config.files = files - - # When type="proto_group", one data provider contains at most - # load_file_count files, and there are at most - # (queue_capacity + load_thread_num + 1) data providers in memory - if file_group_queue_capacity is not None: - data_config.file_group_conf.queue_capacity = file_group_queue_capacity - if load_file_count is not None: - data_config.file_group_conf.load_file_count = load_file_count - if load_thread_num is not None: - data_config.file_group_conf.load_thread_num = load_thread_num - if constant_slots: - data_config.constant_slots.extend(constant_slots) - return data_config - - #real data for training is actually provided by "sub_data" data providers. @config_func def MultiData(sub_data=[]): @@ -2074,13 +2045,20 @@ class ParameterReluLayer(LayerBase): def __init__(self, name, inputs, partial_sum=1, **args): super(ParameterReluLayer, self).__init__( name, self.layer_type, 0, inputs=inputs, **args) + input_layer = self.get_input_layer(0) config_assert(len(self.inputs) == 1, "prelu layer has only one input.") config_assert(input_layer.size % partial_sum == 0, "a wrong setting for partial_sum") + + dims = [1, input_layer.size / partial_sum] self.set_layer_size(input_layer.size) self.config.partial_sum = partial_sum - self.create_input_parameter(0, input_layer.size / partial_sum) + self.create_input_parameter(0, input_layer.size / partial_sum, dims) + + self.set_layer_height_width(self.get_input_layer(0).height, \ + self.get_input_layer(0).width) + self.set_layer_depth(self.get_input_layer(0).depth) @config_layer('conv') @@ -2482,6 +2460,7 @@ class BatchNormLayer(LayerBase): bias=True, img3D=False, use_global_stats=True, + epsilon=1e-5, moving_average_fraction=0.9, batch_norm_type=None, mean_var_names=None, @@ -2530,6 +2509,9 @@ class BatchNormLayer(LayerBase): self.config.use_global_stats = use_global_stats if moving_average_fraction is not None: self.config.moving_average_fraction = moving_average_fraction + if epsilon is not None: + assert epsilon >= 1e-5, "epsilon must be no less than 1e-5." + self.config.epsilon = epsilon input_layer = self.get_input_layer(0) image_conf = self.config.inputs[0].image_conf @@ -2762,7 +2744,7 @@ Usage: max_sort_size = -1, inputs = ["output", "score"]) Input data: Samples of the same query should be loaded as a sequence, - by ProtoDataProvider or PyDataProvider etc.. User should provide + by PyDataProvider etc.. User should provide scores for each sample. The score slot should be the 2nd input of lambdaRank layer. @@ -2864,19 +2846,18 @@ class AddToLayer(LayerBase): name, self.layer_type, 0, inputs=inputs, **xargs) config_assert(len(inputs) > 0, 'inputs cannot be empty for AddToLayer') - if len(self.inputs) > 1: - for input_index in xrange(len(self.inputs)): - assert self.get_input_layer(0).height == self.get_input_layer( - input_index).height - assert self.get_input_layer(0).width == self.get_input_layer( - input_index).width - assert self.get_input_layer(0).depth == self.get_input_layer( - input_index).depth + layer_size = self.get_input_layer(0).size + # To reserve heght, width, depth. + layer_with_hwc = self.get_input_layer(0) + for input_index in xrange(len(self.inputs)): + input_layer = self.get_input_layer(input_index) + assert layer_size == input_layer.size + if input_layer.height and input_layer.height and input_layer.height: + layer_with_hwc = input_layer - self.set_layer_size(self.get_input_layer(0).size) - self.set_layer_height_width(self.get_input_layer(0).height, \ - self.get_input_layer(0).width) - self.set_layer_depth(self.get_input_layer(0).depth) + self.set_layer_size(layer_with_hwc.size) + self.set_layer_height_width(layer_with_hwc.height, layer_with_hwc.width) + self.set_layer_depth(layer_with_hwc.depth) self.create_bias_parameter(bias, self.config.size) diff --git a/python/paddle/trainer_config_helpers/activations.py b/python/paddle/trainer_config_helpers/activations.py index c749fa827fea4a808ab715dcb3442aa24d06a4d2..00efc01c0592107314f5b23c951706d039d49a88 100644 --- a/python/paddle/trainer_config_helpers/activations.py +++ b/python/paddle/trainer_config_helpers/activations.py @@ -17,7 +17,8 @@ __all__ = [ "IdentityActivation", "LinearActivation", 'SequenceSoftmaxActivation', 'ExpActivation', "ReluActivation", "BReluActivation", "SoftReluActivation", "STanhActivation", "AbsActivation", "SquareActivation", "BaseActivation", - "LogActivation", "SqrtActivation", "ReciprocalActivation" + "LogActivation", "SqrtActivation", "ReciprocalActivation", + "SoftSignActivation" ] @@ -243,8 +244,20 @@ class ReciprocalActivation(BaseActivation): Reciprocal Activation. .. math:: - f(z) = 1/z + f(z)=\\frac{1}{z} """ def __init__(self): BaseActivation.__init__(self, 'reciprocal', False) + + +class SoftSignActivation(BaseActivation): + """ + SoftSign Activation. + + .. math:: + f(z)=\\frac{z}{1 + |z|} + """ + + def __init__(self): + BaseActivation.__init__(self, 'softsign', False) diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py index 57979db4de08989ab583b0ab41589c09789a0921..95797fba8f67bacb421f5c2813ad6332bc53cbc9 100644 --- a/python/paddle/trainer_config_helpers/evaluators.py +++ b/python/paddle/trainer_config_helpers/evaluators.py @@ -297,7 +297,7 @@ def auc_evaluator( def pnpair_evaluator( input, label, - info, + query_id, weight=None, name=None, ): """ @@ -308,16 +308,20 @@ def pnpair_evaluator( .. code-block:: python - eval = pnpair_evaluator(input, label, info) + eval = pnpair_evaluator(input, label, query_id) :param input: Input Layer name. The output prediction of network. :type input: LayerOutput :param label: Label layer name. :type label: LayerOutput - :param info: Info layer name. (TODO, explaination) - :type info: LayerOutput + :param query_id: Query_id layer name. Query_id indicates that which query + each sample belongs to. Its shape should be + the same as output of Label layer. + :type query_id: LayerOutput :param weight: Weight Layer name. It should be a matrix with size - [sample_num, 1]. (TODO, explaination) + [sample_num, 1] which indicates the weight of each sample. + The default weight of sample is 1 if the weight layer is None. + And the pair weight is the mean of the two samples' weight. :type weight: LayerOutput :param name: Evaluator name. :type name: None|basestring @@ -326,8 +330,8 @@ def pnpair_evaluator( input = [input] if label: input.append(label) - if info: - input.append(info) + if query_id: + input.append(query_id) evaluator_base( input=input, type="pnpair", diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index b05e2b27906417028ec30529cdb8e505d2bfdea4..e57ec9fe60385cd7ece39d1daa6bbcc29af19b22 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1902,9 +1902,12 @@ def repeat_layer(input, A layer for repeating the input for num_repeats times. If as_row_vector: + .. math:: y = [x_1,\cdots, x_n, \cdots, x_1, \cdots, x_n] + If not as_row_vector: + .. math:: y = [x_1,\cdots, x_1, \cdots, x_n, \cdots, x_n] @@ -1917,19 +1920,19 @@ def repeat_layer(input, :param input: The input of this layer. :type input: LayerOutput - :param num_repeats: Repeat the input so many times + :param num_repeats: The times of repeating the input. :type num_repeats: int :param name: The name of this layer. It is optional. - :param as_row_vector: True for treating input as row vector and repeating - in the column direction. This is equivalent to apply - concat_layer() with num_repeats same input. - False for treating input as column vector and repeating - in the row direction. + :type name: basestring + :param as_row_vector: Whether to treat the input as row vectors or not. If + the parameter is set to True, the repeating operation + will be performed in the column direction. Otherwise, + it will be performed in the row direction. :type as_row_vector: bool :param act: Activation type. IdentityActivation is the default activation. :type act: BaseActivation - :type name: basestring - :param layer_attr: extra layer attributes. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. :rtype: LayerOutput @@ -1976,13 +1979,14 @@ def seq_reshape_layer(input, :param input: The input of this layer. :type input: LayerOutput - :param reshape_size: the size of reshaped sequence. + :param reshape_size: The dimension of the reshaped sequence. :type reshape_size: int :param name: The name of this layer. It is optional. :type name: basestring :param act: Activation type. IdentityActivation is the default activation. :type act: BaseActivation - :param layer_attr: extra layer attributes. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute. :param bias_attr: The bias attribute. If the parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If the @@ -2010,7 +2014,7 @@ def seq_reshape_layer(input, @layer_support() def interpolation_layer(input, weight, name=None, layer_attr=None): """ - This layer is for linear interpolation with two inputs, + This layer performs linear interpolation on two inputs, which is used in NEURAL TURING MACHINE. .. math:: @@ -2032,7 +2036,8 @@ def interpolation_layer(input, weight, name=None, layer_attr=None): :type weight: LayerOutput :param name: The name of this layer. It is optional. :type name: basestring - :param layer_attr: extra layer attributes. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. :rtype: LayerOutput @@ -2066,7 +2071,7 @@ def bilinear_interp_layer(input, name=None, layer_attr=None): """ - This layer is to implement bilinear interpolation on conv layer output. + This layer implements bilinear interpolation on convolutional layer's output. Please refer to Wikipedia: https://en.wikipedia.org/wiki/Bilinear_interpolation @@ -2076,18 +2081,19 @@ def bilinear_interp_layer(input, bilinear = bilinear_interp_layer(input=layer1, out_size_x=64, out_size_y=64) - :param input: A input layer. - :type input: LayerOutput. - :param out_size_x: bilinear interpolation output width. - :type out_size_x: int | None - :param out_size_y: bilinear interpolation output height. - :type out_size_y: int | None - :param name: The layer's name, which cna not be specified. - :type name: None | basestring - :param layer_attr: Extra Layer attribute. - :type layer_attr: ExtraLayerAttribute + :param input: The input of this layer. + :type input: LayerOutput. + :param out_size_x: The width of the output. + :type out_size_x: int + :param out_size_y: The height of the output. + :type out_size_y: int + :param name: The name of this layer. It is optional. + :type name: basestring + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. - :rtype: LayerOutput + :rtype: LayerOutput """ assert input.layer_type == LayerType.CONV_LAYER assert isinstance(input.activation, LinearActivation) @@ -2122,8 +2128,8 @@ def power_layer(input, weight, name=None, layer_attr=None): .. math:: y = x^w - where :math:`x` is a input vector, :math:`w` is scalar weight, - and :math:`y` is a output vector. + where :math:`x` is an input vector, :math:`w` is a scalar exponent, + and :math:`y` is an output vector. The example usage is: @@ -2133,11 +2139,12 @@ def power_layer(input, weight, name=None, layer_attr=None): :param input: The input of this layer. :type input: LayerOutput - :param weight: Weight layer. + :param weight: The exponent of the power. :type weight: LayerOutput :param name: The name of this layer. It is optional. :type name: basestring - :param layer_attr: extra layer attributes. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. :rtype: LayerOutput @@ -2177,11 +2184,12 @@ def scaling_layer(input, weight, name=None, layer_attr=None): :param input: The input of this layer. :type input: LayerOutput - :param weight: Weight layer. + :param weight: The weight of each sample. :type weight: LayerOutput :param name: The name of this layer. It is optional. :type name: basestring - :param layer_attr: extra layer attributes. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. :rtype: LayerOutput @@ -2219,7 +2227,8 @@ def trans_layer(input, name=None, layer_attr=None): :type input: LayerOutput :param name: The name of this layer. It is optional. :type name: basestring - :param layer_attr: extra layer attributes. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. :rtype: LayerOutput @@ -2255,11 +2264,14 @@ def rotate_layer(input, height, width, name=None, layer_attr=None): :param input: The input of this layer. :type input: LayerOutput - :param height: The height of the sample matrix + :param height: The height of the sample matrix. :type height: int + :param width: The width of the sample matrix. + :type width: int :param name: The name of this layer. It is optional. :type name: basestring - :param layer_attr: extra layer attributes. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute. :return: LayerOutput object. :rtype: LayerOutput @@ -2304,15 +2316,15 @@ def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None): :param name: The name of this layer. It is optional. :type name: basestring - :param a: input layer a + :param a: The first input of this layer. :type a: LayerOutput - :param b: input layer b + :param b: The second input of this layer. :type b: LayerOutput - :param scale: scale for cosine value. default is 5. + :param scale: The scale of the cosine similarity. 1 is the default value. :type scale: float - :param size: layer size. NOTE size_a * size should equal size_b. + :param size: The dimension of this layer. NOTE size_a * size should equal size_b. :type size: int - :param layer_attr: Extra Layer Attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -2397,8 +2409,10 @@ def hsigmoid(input, """ Organize the classes into a binary tree. At each node, a sigmoid function is used to calculate the probability of belonging to the right branch. - This idea is from "F. Morin, Y. Bengio (AISTATS 05): - Hierarchical Probabilistic Neural Network Language Model." + + Reference: + `Hierarchical Probabilistic Neural Network Language Model + `_ The example usage is: @@ -2409,19 +2423,21 @@ def hsigmoid(input, :param input: The input of this layer. :type input: LayerOutput | list | tuple - :param label: Label layer. + :param label: The input label. :type label: LayerOutput - :param num_classes: number of classes. - :type num_classes: int | None + :param num_classes: The number of classes. And it should be larger than 2. If the parameter + is not set or set to None, its actual value will be automatically set to + the number of labels. + :type num_classes: int :param name: The name of this layer. It is optional. :type name: basestring :param bias_attr: The bias attribute. If the parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If the parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param param_attr: Parameter Attribute. None means default parameter. - :type param_attr: ParameterAttribute | None - :param layer_attr: Extra Layer Attribute. + :param param_attr: The parameter attribute. See ParameterAttribute for details. + :type param_attr: ParameterAttribute + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -2509,12 +2525,12 @@ def img_conv_layer(input, input is raw pixels of image(mono or RGB), or it may be the previous layer's num_filters * num_group. - There are several group of filter in PaddlePaddle implementation. - Each group will process some channel of the inputs. For example, if an input + There are several groups of filters in PaddlePaddle implementation. + Each group will process some channels of the input. For example, if num_channel = 256, group = 4, num_filter=32, the PaddlePaddle will create - 32*4 = 128 filters to process inputs. The channels will be split into 4 - pieces. First 256/4 = 64 channels will process by first 32 filters. The - rest channels will be processed by rest group of filters. + 32*4 = 128 filters to process the input. The channels will be split into 4 + pieces. First 256/4 = 64 channels will be processed by first 32 filters. The + rest channels will be processed by the rest groups of filters. The example usage is: @@ -2530,53 +2546,68 @@ def img_conv_layer(input, :type name: basestring :param input: The input of this layer. :type input: LayerOutput - :param filter_size: The x dimension of a filter kernel. Or input a tuple for - two image dimension. + :param filter_size: The dimensions of the filter kernel. If the parameter is + set to one integer, the two dimensions on x and y axises + will be same when filter_size_y is not set. If it is set + to a list, the first element indicates the dimension on + the x axis, and the second is used to specify the dimension + on the y axis when filter_size_y is not provided. :type filter_size: int | tuple | list - :param filter_size_y: The y dimension of a filter kernel. Since PaddlePaddle - currently supports rectangular filters, the filter's - shape will be (filter_size, filter_size_y). - :type filter_size_y: int | None + :param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter + is not set, it will be set automatically according to filter_size. + :type filter_size_y: int :param num_filters: Each filter group's number of filter :param act: Activation type. ReluActivation is the default activation. :type act: BaseActivation - :param groups: Group size of filters. + :param groups: The group number. 1 is the default group number. :type groups: int - :param stride: The x dimension of the stride. Or input a tuple for two image - dimension. + :param stride: The strides. If the parameter is set to one integer, the strides + on x and y axises will be same when stride_y is not set. If it is + set to a list, the first element indicates the stride on the x axis, + and the second is used to specify the stride on the y axis when + stride_y is not provided. 1 is the default value. :type stride: int | tuple | list - :param stride_y: The y dimension of the stride. + :param stride_y: The stride on the y axis. :type stride_y: int - :param padding: The x dimension of the padding. Or input a tuple for two - image dimension + :param padding: The padding sizes. If the parameter is set to one integer, the padding + sizes on x and y axises will be same when padding_y is not set. If it + is set to a list, the first element indicates the padding size on the + x axis, and the second is used to specify the padding size on the y axis + when padding_y is not provided. 0 is the default padding size. :type padding: int | tuple | list - :param padding_y: The y dimension of the padding. + :param padding_y: The padding size on the y axis. :type padding_y: int - :param dilation: The x dimension of the dilation. Or input a tuple for two - image dimension + :param dilation: The dimensions of the dilation. If the parameter is set to one integer, + the two dimensions on x and y axises will be same when dilation_y is not + set. If it is set to a list, the first element indicates the dimension + on the x axis, and the second is used to specify the dimension on the y + axis when dilation_y is not provided. 1 is the default dimension. :type dilation: int | tuple | list - :param dilation_y: The y dimension of the dilation. + :param dilation_y: The dimension of the dilation on the y axis. :type dilation_y: int :param bias_attr: The bias attribute. If the parameter is set to False or an object whose type is not ParameterAttribute, no bias is defined. If the parameter is set to True, the bias is initialized to zero. :type bias_attr: ParameterAttribute | None | bool | Any - :param num_channels: number of input channels. If None will be set - automatically from previous output. + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channel number of the input. :type num_channels: int - :param param_attr: Convolution param attribute. None means default attribute + :param param_attr: The parameter attribute. See ParameterAttribute for + details. :type param_attr: ParameterAttribute - :param shared_biases: Is biases will be shared between filters or not. + :param shared_biases: Whether biases will be shared between filters or not. :type shared_biases: bool - :param layer_attr: Layer Extra Attribute. + :param layer_attr: The extra layer attributes. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute - :param trans: true if it is a convTransLayer, false if it is a convLayer + :param trans: True if it is a convTransLayer, False if it is a convLayer :type trans: bool - :param layer_type: specify the layer_type, default is None. If trans=True, - layer_type has to be "exconvt" or "cudnn_convt", - otherwise layer_type has to be either "exconv" or - "cudnn_conv" - :type layer_type: String + :param layer_type: Specify the layer type. If the dilation's dimension on one axis is + larger than 1, layer_type has to be "cudnn_conv" or "cudnn_convt". + If trans=True, layer_type has to be "exconvt" or "cudnn_convt", + otherwise layer_type has to be either "exconv" or "cudnn_conv". + :type layer_type: basestring :return: LayerOutput object. :rtype: LayerOutput """ @@ -2681,7 +2712,7 @@ def img_pool_layer(input, """ Image pooling Layer. - The details of pooling layer, please refer ufldl's pooling_ . + The details of pooling layer, please refer to ufldl's pooling_ . .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/ @@ -2713,32 +2744,37 @@ def img_pool_layer(input, padding_y=2, pool_type=MaxPooling()) - :param padding: pooling padding width. + :param padding: The padding size on the x axis. 0 is the default padding size. :type padding: int - :param padding_y: pooling padding height. It's equal to padding by default. - :type padding_y: int | None - :param name: name of pooling layer - :type name: basestring. + :param padding_y: The padding size on the y axis. If the parameter is not set + or set to None, it will be set to 'padding' automatically. + :param name: The name of this layer. It is optional. + :type name: basestring :param input: The input of this layer. :type input: LayerOutput - :param pool_size: pooling window width + :param pool_size: The pooling window length on the x axis. :type pool_size: int - :param pool_size_y: pooling window height. It's eaqual to pool_size by default. - :type pool_size_y: int | None - :param num_channels: number of input channel. + :param pool_size_y: The pooling window length on the y axis. If the parameter is + not set or set to None, its actual value will be automatically + set to pool_size. + :type pool_size_y: int + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channels number of the input. :type num_channels: int - :param pool_type: pooling type. MaxPooling or AvgPooling. Default is - MaxPooling. + :param pool_type: Pooling type. MaxPooling is the default pooling. :type pool_type: BasePoolingType - :param stride: stride width of pooling. + :param stride: The stride on the x axis. 1 is the default value. :type stride: int - :param stride_y: stride height of pooling. It is equal to stride by default. - :type stride_y: int | None - :param layer_attr: Extra Layer attribute. + :param stride_y: The stride on the y axis. If the parameter is not set or set to + None, its actual value will be automatically set to 'stride'. + :type stride_y: int + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute - :param ceil_mode: Wether to use ceil mode to calculate output height and with. - Defalut is True. If set false, Otherwise use floor. - + :param ceil_mode: Wether to use the ceil function to calculate output height and width. + True is the default. If it is set to False, the floor function will + be used. :type ceil_mode: bool :return: LayerOutput object. :rtype: LayerOutput @@ -2844,24 +2880,32 @@ def img_pool3d_layer(input, :param padding: pooling padding width. :type padding: int | tuple | list - :param name: name of pooling layer + :param name: The name of this layer. It is optional. :type name: basestring. :param input: The input of this layer. :type input: LayerOutput - :param pool_size: pooling window width + :param pool_size: The pooling window lengths along three axises. If the parameter + is set to one integer, the three lengths will be same. :type pool_size: int | tuple | list - :param num_channels: number of input channel. + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channels number of the input. :type num_channels: int - :param pool_type: pooling type. MaxPooling or AvgPooling. Default is - MaxPooling. + :param pool_type: Pooling type. MaxPooling is the default pooling. :type pool_type: BasePoolingType - :param stride: stride width of pooling. + :param stride: The strides of the pooling along three axises. If the parameter + is set to one integer, the three strides will be same. 1 is the + default value. :type stride: int | tuple | list - :param layer_attr: Extra Layer attribute. + :param padding: The sizes of padding along three axises. If the parameter is set to + one integer, they will be same. 0 is the default padding size. + :type padding: int | tuple | list + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute - :param ceil_mode: Wether to use ceil mode to calculate output height and with. - Defalut is True. If set false, Otherwise use floor. - + :param ceil_mode: Wether to use the ceil function to calculate output height and width. + True is the default. If it is set to False, the floor function will + be used. :type ceil_mode: bool :return: LayerOutput object. :rtype: LayerOutput @@ -3017,9 +3061,11 @@ def spp_layer(input, pyramid_height=None, layer_attr=None): """ - Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition. - The details please refer to - `Kaiming He's paper `_. + A layer performs spatial pyramid pooling. + + Reference: + Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition + https://arxiv.org/abs/1406.4729 The example usage is: @@ -3034,13 +3080,16 @@ def spp_layer(input, :type name: basestring :param input: The input of this layer. :type input: LayerOutput - :param num_channels: number of input channel. + :param num_channels: The number of input channels. If the parameter is not set or + set to None, its actual value will be automatically set to + the channels number of the input. :type num_channels: int - :param pool_type: Pooling type. MaxPooling or AveragePooling. Default is MaxPooling. + :param pool_type: Pooling type. MaxPooling is the default pooling. :type scale: BasePoolingType - :param pyramid_height: pyramid height. + :param pyramid_height: The pyramid height of this pooling. :type pyramid_height: int - :param layer_attr: Extra Layer Attribute. + :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for + details. :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput @@ -3164,6 +3213,7 @@ def batch_norm_layer(input, param_attr=None, layer_attr=None, batch_norm_type=None, + epsilon=1e-5, moving_average_fraction=0.9, use_global_stats=None, mean_var_names=None): @@ -3234,6 +3284,8 @@ def batch_norm_layer(input, will use the mean and variance of the current batch of test data. :type use_global_stats: bool | None. + :param epsilon: The small constant added to the variance to improve numeric stability. + :type epsilon: float. :param moving_average_fraction: Factor used in the moving average computation. :math:`runningMean = newMean*(1-factor) + runningMean*factor` :type moving_average_fraction: float. @@ -3251,6 +3303,7 @@ def batch_norm_layer(input, assert (batch_norm_type is None) or (batch_norm_type == "batch_norm") or \ (batch_norm_type == "mkldnn_batch_norm") or \ (batch_norm_type == "cudnn_batch_norm") + l = Layer( name=name, img3D=img3D, @@ -3260,6 +3313,7 @@ def batch_norm_layer(input, type=LayerType.BATCH_NORM_LAYER, batch_norm_type=batch_norm_type, bias=ParamAttr.to_bias(bias_attr), + epsilon=epsilon, moving_average_fraction=moving_average_fraction, use_global_stats=use_global_stats, mean_var_names=mean_var_names, @@ -4282,7 +4336,7 @@ def dot_prod_layer(input1, input2, name=None, layer_attr=None): :param name: The name of this layer. It is optional. :type name: basestring :param input1: The first input layer. - :type input: LayerOutput + :type input1: LayerOutput :param input2: The second input layer. :type input2: LayerOutput :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for @@ -4773,7 +4827,7 @@ def conv_projection(input, will be same when filter_size_y is not set. If it is set to a list, the first element indicates the dimension on the x axis, and the second is used to specify the dimension - on the y axis when filter_size is not provided. + on the y axis when filter_size_y is not provided. :type filter_size: int | tuple | list :param filter_size_y: The dimension of the filter kernel on the y axis. If the parameter is not set, it will be set automatically according to filter_size. @@ -6650,10 +6704,11 @@ def row_conv_layer(input, @layer_support() @wrap_name_default() -@wrap_param_attr_default() def prelu_layer(input, name=None, partial_sum=1, + channel_shared=None, + num_channels=None, param_attr=None, layer_attr=None): """ @@ -6684,6 +6739,14 @@ def prelu_layer(input, - partial_sum = number of outputs, indicates all elements share the same weight. :type partial_sum: int + :param channel_shared: whether or not the parameter are shared across channels. + + - channel_shared = True, we set the partial_sum to the number of outputs. + - channel_shared = False, we set the partial_sum to the number of elements in one channel. + + :type channel_shared: bool + :param num_channels: number of input channel. + :type num_channels: int :param param_attr: The parameter attribute. See ParameterAttribute for details. :type param_attr: ParameterAttribute :param layer_attr: The extra layer attribute. See ExtraLayerAttribute for @@ -6694,7 +6757,25 @@ def prelu_layer(input, """ assert isinstance(input, LayerOutput), 'prelu_layer accepts only one input.' - assert isinstance(param_attr, ParameterAttribute) + + if not param_attr: + param_attr = ParamAttr(initial_mean=0.25, initial_std=0.0) + else: + assert isinstance(param_attr, ParameterAttribute) + + if num_channels is None: + assert input.num_filters is not None, \ + 'the input channel cannot be detected, please specify the num_channels parameter' + num_channels = input.num_filters + + if channel_shared is not None: + assert isinstance(channel_shared, bool) + assert (input.height != 0 and input.width != 0), \ + 'input height and widht must be setted' + if channel_shared: + partial_sum = input.height * input.width * num_channels + else: + partial_sum = input.height * input.width l = Layer( name=name, @@ -6706,6 +6787,7 @@ def prelu_layer(input, name=name, layer_type=LayerType.PRELU, parents=input, + num_filters=num_channels, size=l.config.size) @@ -7155,7 +7237,7 @@ def img_conv3d_layer(input, :type layer_attr: ExtraLayerAttribute :param trans: True if it is a convTransLayer, False if it is a convLayer :type trans: bool - :param layer_type: Specify the layer_type. If the parameter is set, it must be "deconv3d" + :param layer_type: Specify the layer type. If the parameter is set, it must be "deconv3d" when trans=True. If not set, it will be automatically set to "deconv3d" when trans=True and "conv3d" when trans=False. :type layer_type: basestring diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr index b14121e82cb7d9516c4771fc896b9b3b9e01d1c8..3e0f957648879d4350d662b336c953273bac1378 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr @@ -65,6 +65,7 @@ layers { height: 227 width: 227 depth: 1 + epsilon: 1e-05 } layers { name: "__crmnorm_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr index c7a487a11231cba6182b654108773037bdb0ec35..a18a4652e14c0cfc4dbca87e67d31aa663ee756b 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr @@ -65,6 +65,7 @@ layers { height: 256 width: 256 depth: 1 + epsilon: 1e-05 } layers { name: "__crmnorm_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr index 832ed24a31dd2bedba9a4fce77d7a088d1796fdb..9b69ae4a3b3cbcc7c0c69a2d5b3728e2f0204f33 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr @@ -36,6 +36,7 @@ layers { height: 6 width: 20 depth: 3 + epsilon: 1e-05 } parameters { name: "___batch_norm_0__.w0" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr index 94ad56cab063df9e6a11bb1c293727fb9dec810f..63fb38c6508675d379f577b965ea17ad4c3b4942 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr @@ -4,6 +4,8 @@ layers { type: "data" size: 300 active_type: "" + height: 10 + width: 10 } layers { name: "__prelu_layer_0__" @@ -15,6 +17,9 @@ layers { input_parameter_name: "___prelu_layer_0__.w0" } partial_sum: 1 + height: 10 + width: 10 + depth: 1 } layers { name: "__prelu_layer_1__" @@ -26,6 +31,9 @@ layers { input_parameter_name: "___prelu_layer_1__.w0" } partial_sum: 1 + height: 10 + width: 10 + depth: 1 } layers { name: "__prelu_layer_2__" @@ -37,41 +45,100 @@ layers { input_parameter_name: "___prelu_layer_2__.w0" } partial_sum: 5 + height: 10 + width: 10 + depth: 1 +} +layers { + name: "__prelu_layer_3__" + type: "prelu" + size: 300 + active_type: "" + inputs { + input_layer_name: "input" + input_parameter_name: "___prelu_layer_3__.w0" + } + partial_sum: 300 + height: 10 + width: 10 + depth: 1 +} +layers { + name: "__prelu_layer_4__" + type: "prelu" + size: 300 + active_type: "" + inputs { + input_layer_name: "input" + input_parameter_name: "___prelu_layer_4__.w0" + } + partial_sum: 100 + height: 10 + width: 10 + depth: 1 } parameters { name: "___prelu_layer_0__.w0" size: 300 - initial_mean: 0.0 - initial_std: 0.057735026919 + initial_mean: 0.25 + initial_std: 0.0 + dims: 1 + dims: 300 initial_strategy: 0 - initial_smart: true + initial_smart: false } parameters { name: "___prelu_layer_1__.w0" size: 300 - initial_mean: 0.0 - initial_std: 0.057735026919 + initial_mean: 0.25 + initial_std: 0.0 + dims: 1 + dims: 300 initial_strategy: 0 - initial_smart: true + initial_smart: false } parameters { name: "___prelu_layer_2__.w0" size: 60 - initial_mean: 0.0 - initial_std: 0.129099444874 + initial_mean: 0.25 + initial_std: 0.0 + dims: 1 + dims: 60 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___prelu_layer_3__.w0" + size: 1 + initial_mean: 0.25 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___prelu_layer_4__.w0" + size: 3 + initial_mean: 0.25 + initial_std: 0.0 + dims: 1 + dims: 3 initial_strategy: 0 - initial_smart: true + initial_smart: false } input_layer_names: "input" -output_layer_names: "__prelu_layer_2__" +output_layer_names: "__prelu_layer_4__" sub_models { name: "root" layer_names: "input" layer_names: "__prelu_layer_0__" layer_names: "__prelu_layer_1__" layer_names: "__prelu_layer_2__" + layer_names: "__prelu_layer_3__" + layer_names: "__prelu_layer_4__" input_layer_names: "input" - output_layer_names: "__prelu_layer_2__" + output_layer_names: "__prelu_layer_4__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py index aae90fab32db78a70c2169ed8fafb930433f4136..45b02fbf325bb63b057bbbf64d59af8debf0bc9d 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py @@ -1,8 +1,10 @@ from paddle.trainer_config_helpers import * -data = data_layer(name='input', size=300) -prelu = prelu_layer(input=data) -prelu = prelu_layer(input=data, partial_sum=1) -prelu = prelu_layer(input=data, partial_sum=5) +data = data_layer(name='input', size=300, height=10, width=10) +prelu = prelu_layer(input=data, num_channels=3) +prelu = prelu_layer(input=data, partial_sum=1, num_channels=3) +prelu = prelu_layer(input=data, partial_sum=5, num_channels=3) +prelu = prelu_layer(input=data, channel_shared=True, num_channels=3) +prelu = prelu_layer(input=data, channel_shared=False, num_channels=3) outputs(prelu) diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 7bbe3eaaa67a117bc53571e6571365c3a26814c1..33a0829ba8d635ebd68b50f3da07da958fb79dcb 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -62,21 +62,15 @@ __all__ = [ cp.begin_parse() -def init(**kwargs): - import py_paddle.swig_paddle as api - args = [] - args_dict = {} - # NOTE: append arguments if they are in ENV - for ek, ev in os.environ.iteritems(): - if ek.startswith("PADDLE_INIT_"): - args_dict[ek.replace("PADDLE_INIT_", "").lower()] = str(ev) +def set_omp_mkl_env_vars(trainer_count): + '''Auto set CPU environment if have not set before. + export KMP_AFFINITY, OMP_DYNAMIC according to the Hyper Threading status. + export OMP_NUM_THREADS, MKL_NUM_THREADS according to trainer_count. + ''' + import platform + if not platform.system() in ['Linux', 'Darwin']: + return - args_dict.update(kwargs) - # NOTE: overwrite arguments from ENV if it is in kwargs - for key in args_dict.keys(): - args.append('--%s=%s' % (key, str(args_dict[key]))) - - # auto set cpu environment def set_env(key, value): '''If the key has not been set in the environment, set it with value.''' assert isinstance(key, str) @@ -85,22 +79,59 @@ def init(**kwargs): if envset is None: os.environ[key] = value - ht = os.popen("lscpu |grep \"per core\"|awk -F':' '{print $2}'|xargs") - ht = int(ht.read()) - if ht == 1: # ht is off - set_env("OMP_DYNAMIC", "false") - set_env("KMP_AFFINITY", "granularity=fine,compact,0,0") - else: + def num_physical_cores(): + '''Get the number of physical cores''' + if platform.system() == "Linux": + num_sockets = int( + os.popen("lscpu |grep \"Socket\" |awk -F':' '{print $2}'|xargs") + .read()) + num_cores_per_socket = int( + os.popen( + "lscpu |grep \"per socket\" |awk -F':' '{print $2}'|xargs") + .read()) + return num_sockets * num_cores_per_socket + else: + cmds = {"Darwin": "sysctl -n hw.physicalcpu"} + return int(os.popen(cmds.get(platform.system(), "expr 1")).read()) + + def num_logical_processors(): + '''Get the number of logical processors''' + cmds = { + "Linux": "grep \"processor\" /proc/cpuinfo|sort -u|wc -l", + "Darwin": "sysctl -n hw.logicalcpu" + } + return int(os.popen(cmds.get(platform.system(), "expr 1")).read()) + + num_cores = num_physical_cores() + num_processors = num_logical_processors() + if num_processors > num_cores: # Hyper Threading is enabled set_env("OMP_DYNAMIC", "true") set_env("KMP_AFFINITY", "granularity=fine,compact,1,0") - processors = os.popen("grep \"processor\" /proc/cpuinfo|sort -u|wc -l") - processors = int(processors.read()) - trainers = kwargs.get('trainer_count', 1) - threads = processors / trainers + else: + set_env("OMP_DYNAMIC", "false") + set_env("KMP_AFFINITY", "granularity=fine,compact,0,0") + threads = num_processors / trainer_count threads = '1' if threads < 1 else str(threads) set_env("OMP_NUM_THREADS", threads) set_env("MKL_NUM_THREADS", threads) + +def init(**kwargs): + import py_paddle.swig_paddle as api + args = [] + args_dict = {} + # NOTE: append arguments if they are in ENV + for ek, ev in os.environ.iteritems(): + if ek.startswith("PADDLE_INIT_"): + args_dict[ek.replace("PADDLE_INIT_", "").lower()] = str(ev) + + args_dict.update(kwargs) + # NOTE: overwrite arguments from ENV if it is in kwargs + for key in args_dict.keys(): + args.append('--%s=%s' % (key, str(args_dict[key]))) + + set_omp_mkl_env_vars(kwargs.get('trainer_count', 1)) + if 'use_gpu' in kwargs: cp.g_command_config_args['use_gpu'] = kwargs['use_gpu'] if 'use_mkldnn' in kwargs: diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py index 98b97c75ca72f11c105535e0f2a5fa0201db5d42..f10bf7e42a1ead09b3eba0d61e55701215e4360f 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/v2/dataset/uci_housing.py @@ -38,6 +38,7 @@ UCI_TEST_DATA = None URL_MODEL = 'https://github.com/PaddlePaddle/book/raw/develop/01.fit_a_line/fit_a_line.tar' MD5_MODEL = '52fc3da8ef3937822fcdd87ee05c0c9b' + def feature_range(maximums, minimums): import matplotlib matplotlib.use('Agg') @@ -114,7 +115,8 @@ def test(): def model(): - tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'fit_a_line.tar', MD5_MODEL) + tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'fit_a_line.tar', + MD5_MODEL) with open(tar_file, 'r') as f: parameters = Parameters.from_tar(f) return parameters diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py index 5df612bf3530c843c16b337f2b8f83445fcf39b5..9677c9568c6783921545364bca7b2c9c0041d823 100644 --- a/python/paddle/v2/fluid/__init__.py +++ b/python/paddle/v2/fluid/__init__.py @@ -1,11 +1,41 @@ -import sys -import core -__all__ = ['proto'] -argv = [] -if core.is_compile_gpu(): - argv = list(sys.argv) + [ - "--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory" - ] -else: - argv = list(sys.argv) + ["--tryfromenv=use_pinned_memory"] -core.init_gflags(argv) +# import all class inside framework into fluid module +import framework +from framework import * +# import all class inside executor into fluid module +import executor +from executor import * + +import io +import evaluator +import initializer +import layers +import nets +import optimizer +import backward +import regularizer + +from core import LoDTensor, CPUPlace, GPUPlace + +Tensor = LoDTensor +__all__ = framework.__all__ + executor.__all__ + [ + 'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward', + 'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor' +] + + +def __read_gflags_from_env__(): + """ + Enable reading gflags from environment variables. + + Returns: + None + """ + import sys + import core + read_env_flags = ['use_pinned_memory'] + if core.is_compile_gpu(): + read_env_flags.append('fraction_of_gpu_memory_to_use') + core.init_gflags(sys.argv + ["--tryfromenv=" + ",".join(read_env_flags)]) + + +__read_gflags_from_env__() diff --git a/python/paddle/v2/fluid/evaluator.py b/python/paddle/v2/fluid/evaluator.py index 3a8f1831cf2c44c81aee62c6ee172942db188217..bd4a6fda1fd20e68d5a42e76f6ab516bb5c00cff 100644 --- a/python/paddle/v2/fluid/evaluator.py +++ b/python/paddle/v2/fluid/evaluator.py @@ -1,14 +1,18 @@ import numpy as np -from paddle.v2.fluid.framework import Program, g_main_program, unique_name, Variable -import paddle.v2.fluid.core as core +import layers +from framework import Program, unique_name, Variable +from layer_helper import LayerHelper -def _clone_var_in_block_(block, var): +__all__ = ['Accuracy'] + + +def _clone_var_(block, var): assert isinstance(var, Variable) return block.create_var( name=var.name, shape=var.shape, - dtype=var.data_type, + dtype=var.dtype, type=var.type, lod_level=var.lod_level, persistable=True) @@ -16,172 +20,115 @@ def _clone_var_in_block_(block, var): class Evaluator(object): """ - Evalutor Base class. - - create metric states - add mini-batch evaluator caculate operator - add increment operator to accumulate the metric states + Base Class for all evaluators + + Args: + name(str): The name of evaluator. such as, "accuracy". Used for generate + temporary variable name. + main_program(Program, optional): The evaluator should be added to this + main_program. Default g_main_program + startup_program(Program, optional):The parameter should be added to this + startup_program. Default g_startup_program + + Attributes: + states(list): The list of state variables. states will be reset to zero + when `reset` is invoked. + metrics(list): The list of metrics variables. They will be calculate + every mini-batch """ def __init__(self, name, **kwargs): - """ - init the global states - """ - self._states = {} - if kwargs.has_key("main_program"): - self._main_program = kwargs.get("main_program") - else: - self._main_program = g_main_program - - def _update_ops(self, *args, **kwargs): - """ - append update ops to the global states - """ - raise NotImplementedError() + self.states = [] + self.metrics = [] + self.helper = LayerHelper(name, **kwargs) def reset(self, executor, reset_program=None): """ - Clear metric states at the begin of each pass/user specified batch + reset metric states at the begin of each pass/user specified batch """ - if reset_program == None: + if reset_program is None: reset_program = Program() - else: - reset_program = program - block = reset_program.global_block() - for k, var in self._states.iteritems(): - g_var = _clone_var_in_block_(block, var) - zeros = block.create_var(dtype="float32", persistable=True) - block.append_op( - type="fill_constant", - outputs={"Out": [zeros]}, - attrs={ - "shape": g_var.shape, - "value": .0, - "data_type": 5, - }) - block.append_op( - type="scale", inputs={"X": zeros}, outputs={"Out": g_var}) - executor.run(reset_program, fetch_list=self._states.values()) + + for var in self.states: + assert isinstance(var, Variable) + g_var = _clone_var_(reset_program.current_block(), var) + layers.fill_constant( + shape=g_var.shape, + value=0.0, + dtype=g_var.dtype, + out=g_var, + main_program=reset_program) + + executor.run(reset_program) def eval(self, executor, eval_program=None): """ - Merge the mini-batch statistics to form the evaluation result for multiple mini-batches. + Evaluate the statistics merged by multiple mini-batches. """ raise NotImplementedError() + def create_state(self, suffix, dtype, shape): + """ + Create state variable. + + NOTE: It is not a public API. + + Args: + suffix(str): the state suffix. + dtype(str|core.DataType): the state data type + shape(tuple|list): the shape of state + + Returns: State variable + + """ + state = self.helper.create_variable( + name="_".join([unique_name(self.helper.name), suffix]), + persistable=True, + dtype=dtype, + shape=shape) + self.states.append(state) + return state + class Accuracy(Evaluator): """ - Accuracy need two state variable Total, Correct + Average Accuracy for multiple mini-batches. """ - def __init__(self, *args, **kwargs): + def __init__(self, input, label, k=1, **kwargs): super(Accuracy, self).__init__("accuracy", **kwargs) - block = self._main_program.global_block() - g_total = block.create_var( - name=unique_name("Total"), - persistable=True, - dtype="int64", - shape=[1]) - g_correct = block.create_var( - name=unique_name("Correct"), - persistable=True, - dtype="int64", - shape=[1]) - self._states["Total"] = g_total - self._states["Correct"] = g_correct - - def _update_ops(self, input, label, k=1, **kwargs): - block = self._main_program.global_block() - topk_out = block.create_var(dtype=input.data_type) - topk_indices = block.create_var(dtype="int64") - block.append_op( - type="top_k", - inputs={"X": [input]}, - outputs={"Out": [topk_out], - "Indices": [topk_indices]}, - attrs={"k": k}) - acc_out = block.create_var(dtype=kwargs.get("out_dtype", "float32")) - correct = block.create_var(dtype="int64", persistable=True) - total = block.create_var(dtype="int64", persistable=True) - block.append_op( - type="accuracy", - inputs={ - "Out": [topk_out], - "Indices": [topk_indices], - "Label": [label] - }, - outputs={ - "Accuracy": [acc_out], - "Correct": [correct], - "Total": [total], - }) - - block.append_op( - type="cast", - inputs={"X": [self._states["Total"]]}, - outputs={"Out": [self._states["Total"]]}, - attrs={ - "in_data_type": 5, # float32 - "out_data_type": 2, #int32 - }) - block.append_op( - type="cast", - inputs={"X": [self._states["Correct"]]}, - outputs={"Out": [self._states["Correct"]]}, - attrs={ - "in_data_type": 5, - "out_data_type": 2, - }) - - block.append_op( - type="elementwise_add", - inputs={"X": [self._states["Total"]], - "Y": [total]}, - outputs={"Out": [self._states["Total"]]}) - block.append_op( - type="elementwise_add", - inputs={"X": [self._states["Correct"]], - "Y": [correct]}, - outputs={"Out": [self._states["Correct"]]}) - - return acc_out + main_program = self.helper.main_program + if main_program.current_block().idx != 0: + raise ValueError("You can only invoke Evaluator in root block") + + self.total = self.create_state(dtype='int64', shape=[1], suffix='total') + self.correct = self.create_state( + dtype='int64', shape=[1], suffix='correct') + kwargs = {'main_program': main_program} + total = self.helper.create_tmp_variable(dtype='int') + correct = self.helper.create_tmp_variable(dtype='int') + acc = layers.accuracy( + input=input, + label=label, + k=k, + total=total, + correct=correct, + **kwargs) + total = layers.cast(x=total, dtype='int64', **kwargs) + correct = layers.cast(x=correct, dtype='int64', **kwargs) + layers.sums(input=[self.total, total], out=self.total, **kwargs) + layers.sums(input=[self.correct, correct], out=self.correct, **kwargs) + + self.metrics.append(acc) def eval(self, executor, eval_program=None): - if eval_program != None: - eval_program = eval_program - else: + if eval_program is None: eval_program = Program() - block = eval_program.global_block() - eval_out = block.create_var(dtype=self._states["Total"].data_type) - e_total = _clone_var_in_block_(block, self._states["Total"]) - e_correct = _clone_var_in_block_(block, self._states["Correct"]) - block.append_op( - type="cast", - inputs={"X": [e_total]}, - outputs={"Out": [e_total]}, - attrs={ - "in_data_type": 2, #int32 - "out_data_type": 5, #float32 - }) - block.append_op( - type="cast", - inputs={"X": [e_correct]}, - outputs={"Out": [e_correct]}, - attrs={ - "in_data_type": 2, - "out_data_type": 5, - }) - block.append_op( - type="elementwise_div", - inputs={"X": e_correct, - "Y": e_total}, - outputs={"Out": eval_out}) - out = executor.run(eval_program, fetch_list=[eval_out]) - return np.array(out[0]) - - -def accuracy(*args, **kwargs): - cls = Accuracy(*args, **kwargs) - out = cls._update_ops(*args, **kwargs) - return cls, out + block = eval_program.current_block() + kwargs = {'main_program': eval_program} + total = _clone_var_(block, self.total) + correct = _clone_var_(block, self.correct) + total = layers.cast(total, dtype='float32', **kwargs) + correct = layers.cast(correct, dtype='float32', **kwargs) + out = layers.elementwise_div(x=correct, y=total, **kwargs) + return np.array(executor.run(eval_program, fetch_list=[out])[0]) diff --git a/python/paddle/v2/fluid/executor.py b/python/paddle/v2/fluid/executor.py index ed1c2c06daa7ede97e138049a1f7044d071c31e8..3e26d1b983a3c924ce2392c266bcd32e27c7b309 100644 --- a/python/paddle/v2/fluid/executor.py +++ b/python/paddle/v2/fluid/executor.py @@ -1,9 +1,40 @@ -import paddle.v2.fluid.core as core -from paddle.v2.fluid.framework import Block, Program, g_main_program +import numpy as np +from . import core +from framework import Program, g_main_program + +__all__ = ['Executor', 'g_scope'] g_scope = core.Scope() +def as_numpy(tensor): + if isinstance(tensor, list): + return [as_numpy(t) for t in tensor] + assert isinstance(tensor, core.LoDTensor) + lod = tensor.lod() + tensor_data = np.array(tensor) + if len(lod) == 0: + ans = tensor_data + else: + raise RuntimeError("LoD Calculate lacks unit tests and buggy") + # elif len(lod) == 1: + # ans = [] + # idx = 0 + # while idx < len(lod) - 1: + # ans.append(tensor_data[lod[idx]:lod[idx + 1]]) + # idx += 1 + # else: + # for l in reversed(lod): + # ans = [] + # idx = 0 + # while idx < len(l) - 1: + # ans.append(tensor_data[l[idx]:l[idx + 1]]) + # idx += 1 + # tensor_data = ans + # ans = tensor_data + return ans + + class Executor(object): def __init__(self, places): if not isinstance(places, list) and not isinstance(places, tuple): @@ -16,6 +47,47 @@ class Executor(object): act_places.append(p) self.executor = core.Executor(act_places) + self.places = places + + def aslodtensor(self, data): + def accumulate(data): + if not isinstance(data, list): + return 1 + return sum([accumulate(sub) for sub in data]) + + def parselod(data): + seq_lens = [accumulate(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + return lod + + assert len(self.places) != 0 + if not isinstance(data, list): + # pure tensor case + tensor = core.LoDTensor() + tensor.set(data, self.places[0]) + return tensor + else: + raise RuntimeError("Current implementation lacks unittests") + # lodtensor case + lod = [] + if not isinstance(data[0], list): + lod.append(parselod(data)) + flattened_data = np.concatenate(data, axis=0).astype("int64") + else: + while isinstance(data[0], list): + lod.append(parselod(seq)) + flattened_data = [item for seq in data for item in seq] + data = flattened_data + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + tensor = core.LoDTensor() + tensor.set(flattened_data, self.places[0]) + tensor.set_lod(lod) + return tensor def run(self, program=None, @@ -23,7 +95,8 @@ class Executor(object): fetch_list=None, feed_var_name='feed', fetch_var_name='fetch', - scope=None): + scope=None, + return_numpy=True): if feed is None: feed = {} if fetch_list is None: @@ -52,7 +125,10 @@ class Executor(object): inputs={'X': [feed_var]}, outputs={'Out': [out]}, attrs={'col': i}) - core.set_feed_variable(scope, feed[name], feed_var.name, i) + cur_feed = feed[name] + if not isinstance(cur_feed, core.LoDTensor): + cur_feed = self.aslodtensor(cur_feed) + core.set_feed_variable(scope, cur_feed, feed_var.name, i) fetch_var = global_block.create_var( name=fetch_var_name, @@ -66,7 +142,11 @@ class Executor(object): attrs={'col': i}) self.executor.run(program.desc, scope, 0, True) - return [ + outs = [ core.get_fetch_variable(scope, fetch_var_name, i) for i in xrange(len(fetch_list)) ] + + if return_numpy: + outs = as_numpy(outs) + return outs diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index acca6ba35ced8674d4eec7dc57e41673c90cf8f8..6d6ea23f55eebc57cb120582a7c82d77eb1df45c 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -1,12 +1,12 @@ -import paddle.v2.fluid.core as core -import paddle.v2.fluid.proto.framework_pb2 as framework_pb2 import collections + import numpy as np -import copy +from . import core +import proto.framework_pb2 as framework_pb2 __all__ = [ 'Block', 'Variable', 'Program', 'Operator', 'default_startup_program', - 'default_main_program' + 'default_main_program', 'g_startup_program', 'g_main_program' ] @@ -15,6 +15,37 @@ def unique_name(prefix): return "_".join([prefix, str(uid)]) +def convert_np_dtype_to_dtype_(np_dtype): + dtype = np.dtype(np_dtype) + if dtype == np.float32: + return core.DataType.FP32 + elif dtype == np.float64: + return core.DataType.FP64 + elif dtype == np.float16: + return core.DataType.FP16 + elif dtype == np.int32: + return core.DataType.INT32 + elif dtype == np.int16: + return core.DataType.INT16 + elif dtype == np.int64: + return core.DataType.INT64 + elif dtype == np.bool: + return core.DataType.BOOL + else: + raise ValueError("Not supported numpy dtype " + str(dtype)) + + +def dtype_is_floating(dtype): + if not isinstance(dtype, core.DataType): + dtype = convert_np_dtype_to_dtype_(dtype) + + if (dtype == core.DataType.FP16 or dtype == core.DataType.FP32 or + dtype == core.DataType.FP64): + return True + else: + return False + + def _debug_string_(proto, throw_on_error=True): error_fields = list() if not proto.IsInitialized(error_fields) and throw_on_error: @@ -66,11 +97,11 @@ class Variable(object): "matched.".format(self.name, old_shape, shape)) if dtype is not None: if not isinstance(dtype, core.DataType): - dtype = Variable._convert_np_dtype_to_dtype_(dtype) + dtype = convert_np_dtype_to_dtype_(dtype) if is_new_var: - self.desc.set_data_type(dtype) + self.desc.set_dtype(dtype) else: - old_dtype = self.data_type + old_dtype = self.dtype if dtype != old_dtype: raise ValueError("Variable {0} has been created before. " "The previous data type is {1}; the new " @@ -131,8 +162,8 @@ class Variable(object): return tuple(self.desc.shape()) @property - def data_type(self): - return self.desc.data_type() + def dtype(self): + return self.desc.dtype() @property def lod_level(self): @@ -148,26 +179,6 @@ class Variable(object): uid = core.unique_integer(prefix) # unique during whole process. return "_".join([prefix, str(uid)]) - @staticmethod - def _convert_np_dtype_to_dtype_(np_dtype): - dtype = np.dtype(np_dtype) - if dtype == np.float32: - return core.DataType.FP32 - elif dtype == np.float64: - return core.DataType.FP64 - elif dtype == np.float16: - return core.DataType.FP16 - elif dtype == np.int32: - return core.DataType.INT32 - elif dtype == np.int16: - return core.DataType.INT16 - elif dtype == np.int64: - return core.DataType.INT64 - elif dtype == np.bool: - return core.DataType.BOOL - else: - raise ValueError("Not supported numpy dtype " + str(dtype)) - def get_all_op_protos(): """ @@ -384,7 +395,11 @@ class Block(object): return v def all_parameters(self): - return {v for k, v in self.vars.iteritems() if isinstance(v, Parameter)} + return list(self.iter_parameters()) + + def iter_parameters(self): + return (item[1] for item in self.vars.iteritems() + if isinstance(item[1], Parameter)) def create_var(self, *args, **kwargs): var = Variable(self, *args, **kwargs) @@ -458,6 +473,37 @@ class Block(object): for index in range(len(self.ops)): assert self.ops[index].desc == ops_in_cpp[index] + def copy_param_info_from(self, other): + """ + Copy the information of parameters from other block + Args: + other(Block): other block + + Returns: + None + """ + if not isinstance(other, Block): + raise TypeError("copy_param_info_from should be invoked with Block") + for p in other.iter_parameters(): + assert isinstance(p, Parameter) + v = self.vars.get(p.name, None) + if v is None: + raise ValueError("copy_param_info_from should be invoked with " + "same topology") + assert isinstance(v, Variable) + new_p = Parameter( + block=self, + shape=v.shape, + dtype=v.dtype, + type=v.type, + lod_level=v.lod_level, + stop_gradient=p.stop_gradient, + trainable=p.trainable, + optimize_attr=p.optimize_attr, + regularizer=p.regularizer, + name=v.name) + self.vars[new_p.name] = new_p + class Program(object): def __init__(self): @@ -478,6 +524,7 @@ class Program(object): p.desc = core.ProgramDesc(self.desc) p.blocks = [Block(p, i) for i in xrange(self.desc.num_blocks())] p.sync_with_cpp() + p.copy_param_info_from(self) return p def prune(self, targets): @@ -500,6 +547,13 @@ class Program(object): res.sync_with_cpp() return res + def inference_optimize(self): + res = Program() + res.desc = core.inference_optimize(self.desc) + res.blocks = [Block(res, i) for i in xrange(res.desc.num_blocks())] + res.sync_with_cpp() + return res + @staticmethod def parse_from_string(binary_str): p = Program() @@ -554,6 +608,24 @@ class Program(object): for block in self.blocks: block.sync_with_cpp() + def copy_param_info_from(self, other): + """ + Copy the information of parameters from other program. + Args: + other(Program): Other program + + Returns: + None + """ + if not isinstance(other, Program): + raise TypeError("copy_param_info_from should be invoked with " + "Program") + + if len(self.blocks) != len(other.blocks): + raise ValueError("copy_param_info_from should be invoked with two " + "program, with represent the same topology") + self.global_block().copy_param_info_from(other.global_block()) + def list_vars(self): for each_block in self.blocks: for each_var in each_block.vars.itervalues(): diff --git a/python/paddle/v2/fluid/initializer.py b/python/paddle/v2/fluid/initializer.py index ded144ecd5db83ce50ca0dc6243fdc52ac0b7a2f..d3f648f8460814a3f251d7aa9560d748af85235c 100644 --- a/python/paddle/v2/fluid/initializer.py +++ b/python/paddle/v2/fluid/initializer.py @@ -1,10 +1,7 @@ -import paddle.v2.fluid.framework as framework +import framework import numpy as np -__all__ = [ - 'ConstantInitializer', 'UniformInitializer', 'NormalInitializer', - 'XavierInitializer' -] +__all__ = ['Constant', 'Uniform', 'Normal', 'Xavier'] class Initializer(object): @@ -93,7 +90,7 @@ class ConstantInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "value": self._value }) var.op = op @@ -140,7 +137,7 @@ class UniformInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "min": self._low, "max": self._high, "seed": self._seed @@ -188,7 +185,7 @@ class NormalInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "mean": self._mean, "std": self._std_dev, "seed": self._seed @@ -265,7 +262,7 @@ class XavierInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "min": -limit, "max": limit, "seed": self._seed @@ -278,10 +275,109 @@ class XavierInitializer(Initializer): outputs={"Out": var}, attrs={ "shape": var.shape, - "data_type": int(var.data_type), + "dtype": int(var.dtype), "mean": 0.0, "std": std, "seed": self._seed }) var.op = op return op + + +class MSRAInitializer(Initializer): + """Implements the MSRA initializer a.k.a. Kaiming Initializer + + This class implements the weight initialization from the paper + Delving Deep into Rectifiers: Surpassing Human-Level Performance on + ImageNet Classification[1] by Kaiming He, Xiangyu Zhang, Shaoqing Ren + and Jian Sun. This is a robust initialization method that particularly + considers the rectifier nonlinearities. In case of Uniform distribution, + the range is [-x, x], where x = sqrt(6 / fan_in). In case of Normal + distribution, the mean is 0 and the standard deviation + is sqrt(2/ fan_in). + + References: + [1] Delving Deep into Rectifiers: Surpassing Human-Level Performance + on ImageNet Classification + (https://arxiv.org/abs/1502.01852) + """ + + def __init__(self, uniform=True, fan_in=None, seed=0): + """Constructor for MSRAInitializer + + Args: + uniform: whether to use uniform or normal distribution + fan_in: fan_in for MSRAInitializer. If None, it is + inferred from the variable. + seed: random seed + + Note: It is recommended to set fan_in to None for most cases. + """ + assert uniform is not None + assert seed is not None + super(MSRAInitializer, self).__init__() + self._uniform = uniform + self._fan_in = fan_in + self._seed = seed + + def __call__(self, var, block): + """Add MSRA initialization ops for a variable + + Args: + var: Variable that needs to be initialized + block: The block in which initialization ops + should be added + + Returns: + the initialization op + """ + assert isinstance(var, framework.Variable) + assert isinstance(block, framework.Block) + f_in, f_out = self._compute_fans(var) + + # If fan_in is passed, use it + fan_in = f_in if self._fan_in is None else self._fan_in + + if self._uniform: + limit = np.sqrt(6.0 / float(fan_in)) + op = block.prepend_op( + type="uniform_random", + outputs={"Out": var}, + attrs={ + "shape": var.shape, + "dtype": int(var.dtype), + "min": -limit, + "max": limit, + "seed": self._seed + }) + + else: + std = np.sqrt(2.0 / float(fan_in)) + op = block.prepend_op( + type="gaussian_random", + outputs={"Out": var}, + attrs={ + "shape": var.shape, + "dtype": int(var.dtype), + "mean": 0.0, + "std": std, + "seed": self._seed + }) + var.op = op + return op + + +# We short the class name, since users will use the initializer with the package +# name. The sample code: +# +# import paddle.fluid as fluid +# +# hidden = fluid.layers.fc(..., +# param_attr=ParamAttr(fluid.initializer.Xavier())) +# +# It is no need to add an `Initializer` as the class suffix +Constant = ConstantInitializer +Uniform = UniformInitializer +Normal = NormalInitializer +Xavier = XavierInitializer +MSRA = MSRAInitializer diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py index 2d070814eef0b099ba71bef223596e30388ac48a..e5b2aa3b919df4cec1091c0bbd39b7e400cc6867 100644 --- a/python/paddle/v2/fluid/io.py +++ b/python/paddle/v2/fluid/io.py @@ -6,7 +6,8 @@ from paddle.v2.fluid.framework import Program, Parameter, g_main_program, \ __all__ = [ 'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params', - 'load_persistables', "save_inference_model", "load_inference_model" + 'load_persistables', "save_inference_model", "load_inference_model", + "get_inference_program" ] @@ -23,7 +24,7 @@ def _clone_var_in_block_(block, var): return block.create_var( name=var.name, shape=var.shape, - dtype=var.data_type, + dtype=var.dtype, type=var.type, lod_level=var.lod_level, persistable=True) @@ -151,6 +152,17 @@ def load_persistables(executor, dirname, main_program=None): predicate=is_persistable) +def get_inference_program(target_vars, main_program=None): + if main_program is None: + main_program = g_main_program + if not isinstance(target_vars, list): + target_vars = [target_vars] + + pruned_program = main_program.prune(targets=target_vars) + inference_program = pruned_program.inference_optimize() + return inference_program + + def save_inference_model(dirname, feeded_var_names, target_vars, @@ -177,13 +189,14 @@ def save_inference_model(dirname, if not os.path.isdir(dirname): os.makedirs(dirname) - pruned_program = main_program.prune(target_vars) + pruned_program = main_program.prune(targets=target_vars) + inference_program = pruned_program.inference_optimize() fetch_var_names = [v.name for v in target_vars] model_file_name = dirname + "/__model__" with open(model_file_name, "w") as f: pickle.dump({ - "program_desc_str": pruned_program.desc.serialize_to_string(), + "program_desc_str": inference_program.desc.serialize_to_string(), "feed_var_names": feeded_var_names, "fetch_var_names": fetch_var_names }, f, -1) diff --git a/python/paddle/v2/fluid/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py index a97e07982bd89be72386970f28a0dd049f82372d..5f8855551114a9a9b671d1630c9e8a3f0cb5c04b 100644 --- a/python/paddle/v2/fluid/layer_helper.py +++ b/python/paddle/v2/fluid/layer_helper.py @@ -1,10 +1,9 @@ import copy import itertools -from paddle.v2.fluid.framework import Variable, g_main_program, \ - g_startup_program, unique_name, Program -from paddle.v2.fluid.initializer import ConstantInitializer, \ - UniformInitializer, XavierInitializer +from framework import Variable, g_main_program, \ + g_startup_program, unique_name, dtype_is_floating +from paddle.v2.fluid.initializer import Constant, Xavier class LayerHelper(object): @@ -61,7 +60,7 @@ class LayerHelper(object): @property def param_attr(self): - default = {'name': None, 'initializer': XavierInitializer()} + default = {'name': None} actual = self.kwargs.get('param_attr', None) if actual is None: actual = default @@ -72,7 +71,7 @@ class LayerHelper(object): @property def bias_attr(self): - default = {'name': None, 'initializer': ConstantInitializer()} + default = {'name': None} bias_attr = self.kwargs.get('bias_attr', None) if bias_attr is None: bias_attr = default @@ -108,8 +107,8 @@ class LayerHelper(object): dtype = None for each in inputs: if dtype is None: - dtype = each.data_type - elif dtype != each.data_type: + dtype = each.dtype + elif dtype != each.dtype: raise ValueError("Data Type mismatch") return dtype @@ -119,12 +118,17 @@ class LayerHelper(object): attr_copy = copy.deepcopy(attr) if initializer is not None: attr_copy['initializer'] = initializer + else: + attr_copy['initializer'] = self._get_default_initializer(dtype) if attr_copy['name'] is None: attr_copy['name'] = unique_name(".".join([self.name, suffix])) self.startup_program.global_block().create_parameter( dtype=dtype, shape=shape, **attr_copy) return self.main_program.global_block().create_parameter( - name=attr_copy['name'], dtype=dtype, shape=shape) + name=attr_copy['name'], + dtype=dtype, + shape=shape, + trainable=attr_copy.get('trainable', True)) def create_tmp_variable(self, dtype): return self.main_program.current_block().create_var( @@ -144,18 +148,24 @@ class LayerHelper(object): self.startup_program.global_block().create_var( name=var.name, type=var.type, - dtype=var.data_type, + dtype=var.dtype, shape=var.shape, persistable=True, initializer=initializer) - def append_bias_op(self, input_var, dim_start=1, dim_end=None): + def append_bias_op(self, + input_var, + bias_initializer, + dim_start=1, + dim_end=None): """ Append bias operator and return its output. If the user does not set bias_attr, append_bias_op will return input_var - :param input_var: the input variable. The len(input_var.shape) is larger - or equal than 2. + :param input_var: the input variable. The len(input_var.shape) is + larger or equal than 2. + :bias_initializer: an instance of a subclass of Initializer used to + initialize the bias :param dim_start: :param dim_end: the shape of the bias will be input_var.shape[dim_start:dim_end]. The bias is broadcasted to other @@ -167,8 +177,12 @@ class LayerHelper(object): return input_var b = self.create_parameter( - attr=bias_attr, shape=size, dtype=input_var.data_type, suffix='b') - tmp = self.create_tmp_variable(dtype=input_var.data_type) + attr=bias_attr, + shape=size, + dtype=input_var.dtype, + suffix='b', + initializer=bias_initializer) + tmp = self.create_tmp_variable(dtype=input_var.dtype) self.append_op( type='elementwise_add', inputs={'X': [input_var], @@ -183,7 +197,7 @@ class LayerHelper(object): return input_var if isinstance(act, basestring): act = {'type': act} - tmp = self.create_tmp_variable(dtype=input_var.data_type) + tmp = self.create_tmp_variable(dtype=input_var.dtype) act_type = act.pop('type') self.append_op( type=act_type, @@ -191,3 +205,10 @@ class LayerHelper(object): outputs={"Y": [tmp]}, attrs=act) return tmp + + def _get_default_initializer(self, dtype): + if dtype is None or dtype_is_floating(dtype) is True: + return Xavier() + else: + # For integer and boolean types, initialize with all zeros + return Constant() diff --git a/python/paddle/v2/fluid/layers.py b/python/paddle/v2/fluid/layers.py index 1789d2f82a8813331b3610fc69f8447925cd7501..28bc3d214b559a089efb2bb736eb49cb1ba4de25 100644 --- a/python/paddle/v2/fluid/layers.py +++ b/python/paddle/v2/fluid/layers.py @@ -1,9 +1,7 @@ -import paddle.v2.fluid.core as core -import paddle.v2.fluid.proto.framework_pb2 as framework_pb2 -from paddle.v2.fluid.framework import OpProtoHolder, Variable, Program, \ - Operator -from paddle.v2.fluid.initializer import ConstantInitializer, \ - NormalInitializer +from . import core +import proto.framework_pb2 as framework_pb2 +from framework import OpProtoHolder, Variable, Program, Operator +from initializer import Constant, Normal, Xavier from paddle.v2.fluid.layer_helper import LayerHelper, unique_name import re import cStringIO @@ -17,11 +15,13 @@ __all__ = [ def fc(input, size, + num_flatten_dims=1, param_attr=None, + param_initializer=None, bias_attr=None, - name=None, + bias_initializer=None, act=None, - num_flatten_dims=1, + name=None, main_program=None, startup_program=None): """ @@ -30,11 +30,15 @@ def fc(input, Args: input: The input tensor to the function size: The size of the layer + num_flatten_dims: Number of columns in input param_attr: The parameters/weights to the FC Layer + param_initializer: Initializer used for the weight/parameter. + If None, XavierInitializer() is used bias_attr: The bias parameter for the FC layer - name: Name/alias of the function + bias_initializer: Initializer used for the bias. + If None, then ConstantInitializer() is used act: Activation to be applied to the output of FC layer - num_flatten_dims: Number of columns in input + name: Name/alias of the function main_program: Name of the main program that calls this startup_program: Name of the startup program @@ -50,10 +54,23 @@ def fc(input, to the LayerHelper constructor. """ + + def _get_default_param_initializer(): + return Xavier() + + def _get_default_bias_initializer(): + return Constant() + helper = LayerHelper('fc', **locals()) dtype = helper.input_dtype() + if param_initializer is None: + param_initializer = _get_default_param_initializer() + + if bias_initializer is None: + bias_initializer = _get_default_bias_initializer() + mul_results = [] for input_var, param_attr in helper.iter_inputs_and_params(): input_shape = input_var.shape @@ -61,7 +78,10 @@ def fc(input, reduce(lambda a, b: a * b, input_shape[num_flatten_dims:], 1) ] + [size] w = helper.create_parameter( - attr=param_attr, shape=param_shape, dtype=dtype) + attr=param_attr, + initializer=param_initializer, + shape=param_shape, + dtype=dtype) tmp = helper.create_tmp_variable(dtype) helper.append_op( type="mul", @@ -82,16 +102,17 @@ def fc(input, helper.append_op( type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias}) # add bias - pre_activation = helper.append_bias_op(pre_bias) + pre_activation = helper.append_bias_op(pre_bias, bias_initializer) # add activation return helper.append_activation(pre_activation) def embedding(input, size, - data_type='float32', is_sparse=False, + param_initializer=None, param_attr=None, + dtype='float32', main_program=None, startup_program=None): """ @@ -100,9 +121,9 @@ def embedding(input, Args: input: The input to the function size: The size of the layer - data_type: The type of data : float32, float_16, int etc is_sparse: A flag that decleares whether the input is sparse param_attr: Parameters for this layer + dtype: The type of data : float32, float_16, int etc main_program: Name of the main program that calls this startup_program: Name of the startup program @@ -114,10 +135,17 @@ def embedding(input, to the LayerHelper constructor. """ + + def _get_default_param_initializer(): + return Xavier() + helper = LayerHelper('embedding', **locals()) w = helper.create_parameter( - attr=helper.param_attr, shape=size, dtype=data_type) - tmp = helper.create_tmp_variable(data_type) + attr=helper.param_attr, + shape=size, + dtype=dtype, + initializer=param_initializer or _get_default_param_initializer()) + tmp = helper.create_tmp_variable(dtype) helper.append_op( type='lookup_table', inputs={'Ids': input, @@ -130,7 +158,6 @@ def embedding(input, # TODO(qijun): expose H0 and C0 def dynamic_lstm(input, size, - data_type='float32', param_attr=None, bias_attr=None, use_peepholes=True, @@ -138,22 +165,23 @@ def dynamic_lstm(input, gate_activation='sigmoid', cell_activation='tanh', candidate_activation='tanh', + dtype='float32', main_program=None, startup_program=None): helper = LayerHelper('lstm', **locals()) size = size / 4 weight = helper.create_parameter( - attr=helper.param_attr, shape=[size, 4 * size], dtype=data_type) + attr=helper.param_attr, shape=[size, 4 * size], dtype=dtype) bias_size = [1, 7 * size] if not use_peepholes: bias_size[1] = 4 * size bias = helper.create_parameter( - attr=helper.bias_attr, shape=bias_size, dtype=data_type, suffix='b') + attr=helper.bias_attr, shape=bias_size, dtype=dtype, suffix='b') - hidden = helper.create_tmp_variable(data_type) - cell = helper.create_tmp_variable(data_type) - batch_gate = helper.create_tmp_variable(data_type) - batch_cell_pre_act = helper.create_tmp_variable(data_type) + hidden = helper.create_tmp_variable(dtype) + cell = helper.create_tmp_variable(dtype) + batch_gate = helper.create_tmp_variable(dtype) + batch_cell_pre_act = helper.create_tmp_variable(dtype) helper.append_op( type='lstm', @@ -178,9 +206,9 @@ def dynamic_lstm(input, def data(name, shape, - data_type='float32', - type=core.VarDesc.VarType.LOD_TENSOR, append_batch_size=True, + dtype='float32', + type=core.VarDesc.VarType.LOD_TENSOR, main_program=None, startup_program=None, stop_gradient=True): @@ -190,9 +218,9 @@ def data(name, Args: name: The name/alias of the function shape: Tuple declaring the shape. - data_type: The type of data : float32, float_16, int etc - type: The output type. By default it is LOD_TENSOR. append_batch_size: Whether or not to append the data as a batch. + dtype: The type of data : float32, float_16, int etc + type: The output type. By default it is LOD_TENSOR. main_program: Name of the main program that calls this startup_program: Name of the startup program stop_gradient: A boolean that mentions whether gradient should flow. @@ -221,12 +249,12 @@ def data(name, return helper.create_global_variable( name=name, shape=shape, - dtype=data_type, + dtype=dtype, type=type, stop_gradient=stop_gradient) -def create_tensor(dtype, name=None, main_program=None): +def create_tensor(dtype, name=None, main_program=None, startup_program=None): helper = LayerHelper("create_tensor", **locals()) return helper.create_variable(name=helper.name, dtype=dtype) @@ -332,9 +360,9 @@ def _create_op_func_(op_type): o_name = not_intermediate_outputs[0].name intermediate_output_names = [output.name for output in intermediate_outputs] - def infer_and_check_data_type(op_proto, **kwargs): + def infer_and_check_dtype(op_proto, **kwargs): """ - This function performs the sanity check for data_type and + This function performs the sanity check for dtype and instance type. """ dtype = None @@ -349,8 +377,8 @@ def _create_op_func_(op_type): op_type)) if dtype is None: - dtype = each.data_type - elif dtype != each.data_type: + dtype = each.dtype + elif dtype != each.dtype: raise ValueError( "operator {0} must input same dtype".format(op_type)) @@ -359,7 +387,7 @@ def _create_op_func_(op_type): def func(**kwargs): helper = LayerHelper(op_type, **kwargs) - dtype = infer_and_check_data_type(op_proto, **kwargs) + dtype = infer_and_check_dtype(op_proto, **kwargs) inputs = dict() for ipt in op_proto.inputs: @@ -388,45 +416,28 @@ def _create_op_func_(op_type): _create_op_func_('mean') _create_op_func_('mul') _create_op_func_('elementwise_add') +_create_op_func_('elementwise_div') _create_op_func_('dropout') _create_op_func_('reshape') -_create_op_func_('elementwise_add') _create_op_func_('sigmoid') _create_op_func_('scale') _create_op_func_('reshape') _create_op_func_('transpose') -def fill_constant(data_type, shape, value=None, program=None): - """ - This function creates a tensor , with shape as mentioned in the input and - specified data_type and fills this up with a constant value that - comes in the input. - """ - helper = LayerHelper('fill_constant', **locals()) - out = helper.create_tmp_variable(dtype=data_type) - helper.append_op( - type='fill_constant', - outputs={'Out': [out]}, - attrs={'data_type': data_type, - 'shape': shape, - 'value': value}) - return out - - -def cast(x, data_type, main_program=None): +def cast(x, dtype, main_program=None): """ - This function takes in the input with input_data_type - and casts it to the output_data_type as the output. + This function takes in the input with input_dtype + and casts it to the output_dtype as the output. """ helper = LayerHelper('cast', **locals()) - out = helper.create_tmp_variable(dtype=data_type) + out = helper.create_tmp_variable(dtype=dtype) helper.append_op( type='cast', inputs={'X': [x]}, outputs={'Out': [out]}, - attrs={'in_data_type': x.data_type, - 'out_data_type': out.data_type}) + attrs={'in_dtype': x.dtype, + 'out_dtype': out.dtype}) return out @@ -445,18 +456,54 @@ def concat(input, axis, main_program=None, startup_program=None): return out -def sums(input, main_program=None, startup_program=None): +def sums(input, out=None, main_program=None, startup_program=None): """ This function takes in the input and performs the sum operation on it and returns that as the output. """ helper = LayerHelper('sum', **locals()) - out = helper.create_tmp_variable(dtype=helper.input_dtype()) + if out is None: + out = helper.create_tmp_variable(dtype=helper.input_dtype()) helper.append_op(type='sum', inputs={'X': input}, outputs={'Out': out}) return out -def assign(input, output, main_program=None): +def linear_chain_crf(input, + label, + param_attr=None, + param_initializer=None, + main_program=None, + startup_program=None): + def _get_default_param_initializer(): + return Xavier() + + helper = LayerHelper('linear_chain_crf', **locals()) + size = input.shape[1] + transition = helper.create_parameter( + attr=helper.param_attr, + shape=[size + 2, size], + dtype=helper.input_dtype(), + initializer=param_initializer or _get_default_param_initializer()) + alpha = helper.create_tmp_variable(dtype=helper.input_dtype()) + emission_exps = helper.create_tmp_variable(dtype=helper.input_dtype()) + transition_exps = helper.create_tmp_variable(dtype=helper.input_dtype()) + log_likelihood = helper.create_tmp_variable(dtype=helper.input_dtype()) + helper.append_op( + type='linear_chain_crf', + inputs={"Emission": [input], + "Transition": transition, + "Label": label}, + outputs={ + "Alpha": [alpha], + "EmissionExps": [emission_exps], + "TransitionExps": transition_exps, + "LogLikelihood": log_likelihood + }) + + return log_likelihood + + +def assign(input, output, main_program=None, startup_program=None): helper = LayerHelper('assign', **locals()) helper.append_op( type='scale', @@ -468,12 +515,12 @@ def assign(input, output, main_program=None): def split_lod_tensor(input, mask, - level, + level=0, main_program=None, startup_program=None): helper = LayerHelper('split_lod_tensor', **locals()) - out_true = helper.create_tmp_variable(dtype=input.data_type) - out_false = helper.create_tmp_variable(dtype=input.data_type) + out_true = helper.create_tmp_variable(dtype=input.dtype) + out_false = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( type='split_lod_tensor', inputs={ @@ -490,11 +537,11 @@ def merge_lod_tensor(in_true, in_false, x, mask, - level, + level=0, main_program=None, startup_program=None): helper = LayerHelper('merge_lod_tensor', **locals()) - out = helper.create_tmp_variable(dtype=x.data_type) + out = helper.create_tmp_variable(dtype=in_true.dtype) helper.append_op( type='merge_lod_tensor', inputs={'X': x, @@ -512,9 +559,9 @@ def cos_sim(X, Y, **kwargs): X and Y and returns that as the output. """ helper = LayerHelper('cos_sim', **kwargs) - out = helper.create_tmp_variable(dtype=X.data_type) - xnorm = helper.create_tmp_variable(dtype=X.data_type) - ynorm = helper.create_tmp_variable(dtype=X.data_type) + out = helper.create_tmp_variable(dtype=X.dtype) + xnorm = helper.create_tmp_variable(dtype=X.dtype) + ynorm = helper.create_tmp_variable(dtype=X.dtype) helper.append_op( type='cos_sim', inputs={'X': [X], @@ -530,7 +577,7 @@ def cross_entropy(input, label, **kwargs): This function computes cross_entropy using the input and label. """ helper = LayerHelper('cross_entropy', **kwargs) - out = helper.create_tmp_variable(dtype=input.data_type) + out = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( type='cross_entropy', inputs={'X': [input], @@ -546,26 +593,26 @@ def square_error_cost(input, label, **kwargs): The output is appending the op to do the above. """ helper = LayerHelper('square_error_cost', **kwargs) - minus_out = helper.create_tmp_variable(dtype=input.data_type) + minus_out = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( type='elementwise_sub', inputs={'X': [input], 'Y': [label]}, outputs={'Out': [minus_out]}) - square_out = helper.create_tmp_variable(dtype=input.data_type) + square_out = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( type='square', inputs={'X': [minus_out]}, outputs={'Y': [square_out]}) return square_out -def accuracy(input, label, k=1, **kwargs): +def accuracy(input, label, k=1, correct=None, total=None, **kwargs): """ This function computes the accuracy using the input and label. The output is the top_k inputs and their indices. """ helper = LayerHelper("accuracy", **kwargs) - topk_out = helper.create_tmp_variable(dtype=input.data_type) + topk_out = helper.create_tmp_variable(dtype=input.dtype) topk_indices = helper.create_tmp_variable(dtype="int64") helper.append_op( type="top_k", @@ -573,10 +620,11 @@ def accuracy(input, label, k=1, **kwargs): outputs={"Out": [topk_out], "Indices": [topk_indices]}, attrs={"k": k}) - acc_out_dtype = kwargs.get("out_dtype", "float32") acc_out = helper.create_tmp_variable(dtype="float32") - correct = helper.create_tmp_variable(dtype="int64") - total = helper.create_tmp_variable(dtype="int64") + if correct is None: + correct = helper.create_tmp_variable(dtype="int64") + if total is None: + total = helper.create_tmp_variable(dtype="int64") helper.append_op( type="accuracy", inputs={ @@ -596,10 +644,12 @@ def sequence_conv(input, num_filters, filter_size=3, filter_stride=1, - act=None, padding=None, bias_attr=None, + bias_initializer=None, param_attr=None, + param_initializer=None, + act=None, main_program=None, startup_program=None): """ @@ -607,6 +657,13 @@ def sequence_conv(input, other convolutional configurations for the filters and stride as given in the input parameters to the function. """ + + def _get_default_bias_initializer(): + return Constant() + + def _get_default_param_initializer(): + return Xavier() + # FIXME(dzh) : want to unify the argument of python layer # function. So we ignore some unecessary attributes. # such as, padding_trainable, context_start. @@ -614,9 +671,17 @@ def sequence_conv(input, helper = LayerHelper('sequence_conv', **locals()) dtype = helper.input_dtype() + if param_initializer is None: + param_initializer = _get_default_param_initializer() + if bias_initializer is None: + bias_initializer = _get_default_bias_initializer() + filter_shape = [filter_size * input.shape[1], num_filters] filter = helper.create_parameter( - attr=helper.param_attr, shape=filter_shape, dtype=dtype) + attr=helper.param_attr, + shape=filter_shape, + dtype=dtype, + initializer=param_initializer) pre_bias = helper.create_tmp_variable(dtype) helper.append_op( @@ -631,20 +696,22 @@ def sequence_conv(input, 'contextStart': -int(filter_size / 2), 'contextLength': filter_size }) - pre_act = helper.append_bias_op(pre_bias) + pre_act = helper.append_bias_op(pre_bias, bias_initializer) return helper.append_activation(pre_act) def conv2d(input, num_filters, - name=None, - filter_size=[1, 1], - act=None, - groups=None, + filter_size, stride=[1, 1], padding=None, - bias_attr=None, + groups=None, param_attr=None, + param_initializer=None, + bias_attr=None, + bias_initializer=None, + act=None, + name=None, main_program=None, startup_program=None): """ @@ -654,6 +721,14 @@ def conv2d(input, This funciton can also append an activation on top of the conv-2d output, if mentioned in the input parameters. """ + + def _get_default_bias_initializer(): + return Constant() + + def _get_default_param_initializer(filter_size, num_channels): + std = (2.0 / (filter_size[0]**2 * num_channels))**0.5 + return Normal(0.0, std, 0) + helper = LayerHelper('conv2d', **locals()) dtype = helper.input_dtype() @@ -661,7 +736,7 @@ def conv2d(input, if groups is None: num_filter_channels = num_channels else: - if num_channels % groups is not 0: + if num_channels % groups != 0: raise ValueError("num_channels must be divisible by groups.") num_filter_channels = num_channels / groups @@ -675,12 +750,17 @@ def conv2d(input, input_shape = input.shape filter_shape = [num_filters, num_filter_channels] + filter_size - std = (2.0 / (filter_size[0]**2 * num_channels))**0.5 + if param_initializer is None: + param_initializer = _get_default_param_initializer(filter_size, + num_channels) + if bias_initializer is None: + bias_initializer = _get_default_bias_initializer() + filter = helper.create_parameter( attr=helper.param_attr, shape=filter_shape, dtype=dtype, - initializer=NormalInitializer(0.0, std, 0)) + initializer=param_initializer) pre_bias = helper.create_tmp_variable(dtype) helper.append_op( @@ -694,7 +774,8 @@ def conv2d(input, 'paddings': padding, 'groups': groups}) - pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2) + pre_act = helper.append_bias_op( + pre_bias, bias_initializer, dim_start=1, dim_end=2) return helper.append_activation(pre_act) @@ -795,22 +876,20 @@ def batch_norm(input, attr=helper.param_attr, shape=param_shape, dtype=dtype, - initializer=ConstantInitializer(1.0)) + initializer=Constant(1.0)) bias = helper.create_parameter( attr=helper.param_attr, shape=param_shape, dtype=dtype, - initializer=ConstantInitializer(0.0)) + initializer=Constant(0.0)) mean = helper.create_global_variable( - dtype=input.data_type, shape=param_shape, persistable=True) - helper.set_variable_initializer( - var=mean, initializer=ConstantInitializer(0.0)) + dtype=input.dtype, shape=param_shape, persistable=True) + helper.set_variable_initializer(var=mean, initializer=Constant(0.0)) variance = helper.create_global_variable( - dtype=input.data_type, shape=param_shape, persistable=True) - helper.set_variable_initializer( - var=variance, initializer=ConstantInitializer(1.0)) + dtype=input.dtype, shape=param_shape, persistable=True) + helper.set_variable_initializer(var=variance, initializer=Constant(1.0)) # create output # mean and mean_out share the same memory @@ -847,8 +926,8 @@ def batch_norm(input, def beam_search_decode(ids, scores, main_program=None, startup_program=None): helper = LayerHelper('beam_search_decode', **locals()) - sentence_ids = helper.create_tmp_variable(dtype=ids.data_type) - sentence_scores = helper.create_tmp_variable(dtype=ids.data_type) + sentence_ids = helper.create_tmp_variable(dtype=ids.dtype) + sentence_scores = helper.create_tmp_variable(dtype=ids.dtype) helper.append_op( type="beam_search_decode", @@ -986,7 +1065,7 @@ class StaticRNN(object): boot_var = parent_block.create_var( name=var_name, shape=shape, - dtype=batch_ref.data_type, + dtype=batch_ref.dtype, persistable=False) parent_block.append_op( @@ -996,7 +1075,7 @@ class StaticRNN(object): attrs={ 'value': init_value, 'shape': boot_var.shape, - 'data_type': boot_var.data_type, + 'dtype': boot_var.dtype, 'input_dim_idx': ref_batch_dim_idx, 'output_dim_idx': init_batch_dim_idx }) @@ -1005,7 +1084,7 @@ class StaticRNN(object): else: pre_mem = self.helper.create_variable( name=unique_name("@".join([self.helper.name, "mem"])), - dtype=init.data_type, + dtype=init.dtype, shape=init.shape) self.memories[pre_mem.name] = StaticRNNMemoryLink( init=init, pre_mem=pre_mem) @@ -1021,10 +1100,7 @@ class StaticRNN(object): raise ValueError("Static RNN only take fix seq_len input") ipt = self.helper.create_variable( - name=x.name, - dtype=x.data_type, - shape=list(x.shape[1:]), - type=x.type) + name=x.name, dtype=x.dtype, shape=list(x.shape[1:]), type=x.type) self.inputs.append(ipt) return ipt @@ -1033,17 +1109,17 @@ class StaticRNN(object): if not isinstance(o, Variable): raise TypeError("step output takes a Variable") - tmp_o = self.helper.create_tmp_variable(dtype=o.data_type) + tmp_o = self.helper.create_tmp_variable(dtype=o.dtype) self.helper.append_op( type='rnn_memory_helper', inputs={'X': [o]}, outputs={'Out': tmp_o}, - attrs={'data_type': o.data_type}) + attrs={'dtype': o.dtype}) out_var = self.parent_block().create_var( name=tmp_o.name, shape=[self.seq_len] + list(tmp_o.shape), - dtype=tmp_o.data_type) + dtype=tmp_o.dtype) self.outputs.append(out_var) @@ -1115,13 +1191,13 @@ class StaticRNN(object): pre_memories.append(mem.pre_mem.name) mem_var = rnn_block.var(mem.mem.name) assert isinstance(mem_var, Variable) - new_mem = self.helper.create_tmp_variable(dtype=mem_var.data_type) + new_mem = self.helper.create_tmp_variable(dtype=mem_var.dtype) rnn_block.append_op( type='rnn_memory_helper', inputs={'X': [mem_var]}, outputs={'Out': [new_mem]}, - attrs={'data_type': mem_var.data_type}) + attrs={'dtype': mem_var.dtype}) memories.append(new_mem.name) @@ -1171,7 +1247,7 @@ class While(object): if not isinstance(cond, Variable): raise TypeError("condition should be a variable") assert isinstance(cond, Variable) - if cond.data_type != core.DataType.BOOL: + if cond.dtype != core.DataType.BOOL: raise TypeError("condition should be a bool variable") if reduce(lambda a, b: a * b, cond.shape, 1) != 1: raise TypeError("condition should be a bool scalar") @@ -1243,9 +1319,9 @@ def lstm(x, main_program=main_program, startup_program=startup_program) - data_type = x.data_type - c = helper.create_tmp_variable(data_type) - h = helper.create_tmp_variable(data_type) + dtype = x.dtype + c = helper.create_tmp_variable(dtype) + h = helper.create_tmp_variable(dtype) helper.append_op( type='lstm_unit', @@ -1278,6 +1354,33 @@ def lod_rank_table(x, level=0, main_program=None): return table +def max_sequence_len(rank_table, main_program=None): + """ + This function creates an operator to calculate the length of + max seqence through input rank_table(should be a lod_rank_table) + """ + helper = LayerHelper("max_seqence_len", **locals()) + res = helper.create_tmp_variable(dtype="int64") + helper.append_op( + type="max_sequence_len", + inputs={"RankTable": rank_table}, + outputs={"Out": res}) + return res + + +def topk(input, k, main_program=None, startup_program=None): + helper = LayerHelper('topk', **locals()) + topk_out = helper.create_tmp_variable(dtype=input.data_type) + topk_indices = helper.create_tmp_variable(dtype='int64') + helper.append_op( + type='top_k', + inputs={'X': [input]}, + outputs={'Out': [topk_out], + 'Indices': [topk_indices]}, + attrs={'k': k}) + return topk_out, topk_indices + + def lod_tensor_to_array(x, table, main_program=None): """ This function creates an operator to convert an LOD_Tensor to @@ -1287,7 +1390,7 @@ def lod_tensor_to_array(x, table, main_program=None): array = helper.create_variable( name=unique_name("lod_tensor_to_array"), type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, - dtype=x.data_type) + dtype=x.dtype) helper.append_op( type='lod_tensor_to_array', inputs={'X': x, @@ -1302,7 +1405,7 @@ def array_to_lod_tensor(x, table, main_program=None): LOD_Tensor. """ helper = LayerHelper("array_to_lod_tensor", **locals()) - tmp = helper.create_tmp_variable(dtype=x.data_type) + tmp = helper.create_tmp_variable(dtype=x.dtype) helper.append_op( type="array_to_lod_tensor", inputs={'X': x, @@ -1311,22 +1414,51 @@ def array_to_lod_tensor(x, table, main_program=None): return tmp -def fill_constant(shape, dtype, value, main_program=None): +def fill_constant(shape, + dtype, + value, + out=None, + main_program=None, + startup_program=None): """ This function creates a tensor , with shape as mentioned in the input and - specified data_type and fills this up with a constant value that + specified dtype and fills this up with a constant value that comes in the input. It also sets the stop_gradient to be True. """ helper = LayerHelper("fill_constant", **locals()) - out = helper.create_tmp_variable(dtype=dtype) + if out is None: + out = helper.create_tmp_variable(dtype=dtype) helper.append_op( type='fill_constant', inputs={}, outputs={'Out': [out]}, + attrs={'shape': shape, + 'dtype': out.dtype, + 'value': float(value)}) + out.stop_gradient = True + return out + + +def fill_constant_batch_size_like(input, + shape, + dtype, + value, + input_dim_idx=0, + output_dim_idx=0, + main_program=None, + startup_program=None): + helper = LayerHelper("fill_constant_batch_size_like", **locals()) + out = helper.create_tmp_variable(dtype=dtype) + helper.append_op( + type='fill_constant_batch_size_like', + inputs={'Input': input}, + outputs={'Out': [out]}, attrs={ 'shape': shape, - 'data_type': out.data_type, - 'value': float(value) + 'dtype': out.dtype, + 'value': float(value), + 'input_dim_idx': input_dim_idx, + 'output_dim_idx': output_dim_idx }) out.stop_gradient = True return out @@ -1356,7 +1488,7 @@ def increment(x, value=1.0, in_place=True, main_program=None): """ helper = LayerHelper("increment", **locals()) if not in_place: - out = helper.create_tmp_variable(dtype=x.data_type) + out = helper.create_tmp_variable(dtype=x.dtype) else: out = x helper.append_op( @@ -1377,7 +1509,7 @@ def array_write(x, i, array=None, main_program=None): array = helper.create_variable( name="{0}.out".format(helper.name), type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, - dtype=x.data_type) + dtype=x.dtype) helper.append_op( type='write_to_array', inputs={'X': [x], @@ -1394,7 +1526,7 @@ def create_array(dtype, main_program=None): dtype=dtype) -def less_than(x, y, cond=None, main_program=None): +def less_than(x, y, cond=None, main_program=None, **ignored): helper = LayerHelper("less_than", **locals()) if cond is None: cond = helper.create_tmp_variable(dtype='bool') @@ -1416,7 +1548,7 @@ def array_read(array, i, main_program=None): array, Variable) or array.type != core.VarDesc.VarType.LOD_TENSOR_ARRAY: raise TypeError("array should be tensor array vairable") - out = helper.create_tmp_variable(dtype=array.data_type) + out = helper.create_tmp_variable(dtype=array.dtype) helper.append_op( type='read_from_array', inputs={'X': [array], @@ -1431,7 +1563,7 @@ def shrink_memory(x, i, table, main_program=None): as mentioned in the input parameter. """ helper = LayerHelper('shrink_memory', **locals()) - out = helper.create_tmp_variable(dtype=x.data_type) + out = helper.create_tmp_variable(dtype=x.dtype) helper.append_op( type='shrink_rnn_memory', inputs={'X': [x], @@ -1472,13 +1604,20 @@ class ConditionalBlockGuard(BlockGuard): class ConditionalBlock(object): - def __init__(self, inputs, name=None, main_program=None): + def __init__(self, + inputs, + name=None, + main_program=None, + startup_program=None): for each_input in inputs: if not isinstance(each_input, Variable): raise TypeError("Each input should be variable") self.inputs = inputs self.helper = LayerHelper( - 'conditional_block', name=name, main_program=main_program) + 'conditional_block', + name=name, + main_program=main_program, + startup_program=startup_program) def block(self): return ConditionalBlockGuard(self) @@ -1523,3 +1662,148 @@ class ConditionalBlock(object): outputs={'Out': out_list, 'Scope': [step_scope]}, attrs={'block': inside_block}) + + +class IfElseBlockGuard(object): + def __init__(self, is_true, ifelse): + if not isinstance(ifelse, IfElse): + raise TypeError("ifelse must be an instance of IfElse class") + + if ifelse.status != IfElse.OUT_IF_ELSE_BLOCKS: + raise ValueError("You cannot invoke IfElse.block() inside a block") + + self.is_true = is_true + self.ie = ifelse + if is_true: + self.cond_block = ifelse.conditional_true_block + else: + self.cond_block = ifelse.conditional_false_block + + if not isinstance(self.cond_block, ConditionalBlock): + raise TypeError("Unexpected situation") + + self.cond_block = self.cond_block.block() + + def __enter__(self): + self.ie.status = IfElse.IN_IF_ELSE_TRUE_BLOCKS if self.is_true else IfElse.IN_IF_ELSE_FALSE_BLOCKS + self.cond_block.__enter__() + + def __exit__(self, exc_type, exc_val, exc_tb): + if not self.cond_block.__exit__(exc_type, exc_val, exc_tb): + # re-raise inside exception + return False + if len(self.ie.output_table[1 if self.is_true else 0]) == 0: + raise ValueError("Must set output inside block") + self.ie.status = IfElse.OUT_IF_ELSE_BLOCKS + + +class IfElse(object): + OUT_IF_ELSE_BLOCKS = 0 + IN_IF_ELSE_TRUE_BLOCKS = 1 + IN_IF_ELSE_FALSE_BLOCKS = 2 + + def __init__(self, cond, name=None, main_program=None, + startup_program=None): + if not isinstance(cond, Variable): + raise TypeError("cond must be a Variable") + self.helper = LayerHelper( + 'ifelse', + name=name, + main_program=main_program, + startup_program=startup_program) + self.cond = cond + self.input_table = {} + self.status = IfElse.OUT_IF_ELSE_BLOCKS + self.conditional_true_block = ConditionalBlock(inputs=[self.cond]) + self.conditional_false_block = ConditionalBlock(inputs=[self.cond]) + self.output_table = ([], []) # (true_outs, false_outs) + + def input(self, x): + if self.status == IfElse.OUT_IF_ELSE_BLOCKS: + raise ValueError("input must in true/false blocks") + if id(x) not in self.input_table: + parent_block = self.parent_block() + out_true = parent_block.create_var( + name=unique_name('ifelse_input' + self.helper.name), + dtype=x.dtype) + + out_false = parent_block.create_var( + name=unique_name('ifelse_input' + self.helper.name), + dtype=x.dtype) + parent_block.append_op( + type='split_lod_tensor', + inputs={ + 'X': x, + 'Mask': self.cond, + }, + outputs={'OutTrue': out_true, + 'OutFalse': out_false}, + attrs={'level': 0}) + self.input_table[id(x)] = (out_true, out_false) + else: + out_true, out_false = self.input_table[id(x)] + + if self.status == IfElse.IN_IF_ELSE_TRUE_BLOCKS: + return out_true + else: + return out_false + + def parent_block(self): + current_block = self.helper.main_program.current_block() + return self.helper.main_program.block(current_block.parent_idx) + + def true_block(self): + return IfElseBlockGuard(True, self) + + def false_block(self): + return IfElseBlockGuard(False, self) + + def output(self, *outs): + if self.status == self.OUT_IF_ELSE_BLOCKS: + raise ValueError("output can only be invoked in the sub-block") + + out_table = self.output_table[1 if self.status == + self.IN_IF_ELSE_TRUE_BLOCKS else 0] + parent_block = self.parent_block() + for each_out in outs: + if not isinstance(each_out, Variable): + raise TypeError("Each output should be a variable") + # create outside tensor + outside_out = parent_block.create_var( + name=unique_name("_".join([self.helper.name, 'output'])), + dtype=each_out.dtype) + out_table.append(outside_out) + + # assign local var to outside + assign( + input=each_out, + output=outside_out, + main_program=self.helper.main_program, + startup_program=self.helper.startup_program) + + def __call__(self): + if self.status != self.OUT_IF_ELSE_BLOCKS: + raise ValueError("IfElse::__call__ must be out of sub-block") + false_len, true_len = map(len, self.output_table) + if false_len == 0 and true_len == 0: + raise ValueError("Must invoke true_block/false_block before " + "__call__") + elif false_len != true_len and false_len != 0 and true_len != 0: + raise ValueError("The output side must be same") + elif false_len == 0 or true_len == 0: + return self.output_table[0 if false_len != 0 else 1] + + # else none of false_len/true_len is zero + # merge together + rlist = [] + for false_var, true_var in zip(*self.output_table): + rlist.append( + merge_lod_tensor( + in_true=true_var, + in_false=false_var, + mask=self.cond, + x=self.cond, + level=0, + main_program=self.helper.main_program, + startup_program=self.helper.startup_program)) + return rlist diff --git a/python/paddle/v2/fluid/nets.py b/python/paddle/v2/fluid/nets.py index 5e14ca594bc7965dc29039ba57bb7b26b1ce6871..05728ad75a5bd1e87aa3c75ffcc4eac34b6b956c 100644 --- a/python/paddle/v2/fluid/nets.py +++ b/python/paddle/v2/fluid/nets.py @@ -1,4 +1,4 @@ -import paddle.v2.fluid.layers as layers +import layers __all__ = ["simple_img_conv_pool", "sequence_conv_pool"] diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index d2841df6af7a0d860c239db952c767c995d30ba4..934e024742fd00bf05cc0d7caaaa870c18a68074 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -1,16 +1,13 @@ from collections import defaultdict -import paddle.v2.fluid.framework as framework -from paddle.v2.fluid.framework import unique_name, Program -from paddle.v2.fluid.backward import append_backward_ops -from paddle.v2.fluid.initializer import ConstantInitializer -from paddle.v2.fluid.regularizer import append_regularization_ops -from paddle.v2.fluid.layer_helper import LayerHelper +import framework +from backward import append_backward_ops +from framework import unique_name +from initializer import Constant +from layer_helper import LayerHelper +from regularizer import append_regularization_ops -__all__ = [ - 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', - 'AdamaxOptimizer', 'DecayedAdagradOptimizer' -] +__all__ = ['SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad'] class Optimizer(object): @@ -48,7 +45,7 @@ class Optimizer(object): persistable=True) param_lr = param_lr * self._learning_rate self.helper.set_variable_initializer( - var=param_lr_var, initializer=ConstantInitializer(param_lr)) + var=param_lr_var, initializer=Constant(param_lr)) return param_lr_var def _create_accumulators(self, block, parameters): @@ -92,11 +89,11 @@ class Optimizer(object): var = self.helper.create_global_variable( name=unique_name(name), persistable=True, - dtype=dtype or param.data_type, + dtype=dtype or param.dtype, type=param.type, shape=param.shape) self.helper.set_variable_initializer( - var, initializer=ConstantInitializer(value=float(fill_value))) + var, initializer=Constant(value=float(fill_value))) self._accumulators[name][param.name] = var def _get_accumulator(self, name, param): @@ -170,7 +167,8 @@ class Optimizer(object): optimize_ops = [] for param_and_grad in parameters_and_grads: - if param_and_grad[1] is not None: + if param_and_grad[0].trainable is True and param_and_grad[ + 1] is not None: optimize_op = self._append_optimize_op(loss.block, param_and_grad) optimize_ops.append(optimize_op) @@ -201,7 +199,7 @@ class Optimizer(object): """ params_grads = append_backward_ops(loss, parameter_list, no_grad_set or set()) - # Add regularization if any + # Add regularization if any params_grads = append_regularization_ops(params_grads) optimize_ops = self.create_optimization_pass(params_grads, loss, startup_program) @@ -359,7 +357,7 @@ class AdamOptimizer(Optimizer): lod_level=0, persistable=True) self.helper.set_variable_initializer( - self._beta1_pow_acc, initializer=ConstantInitializer(self._beta1)) + self._beta1_pow_acc, initializer=Constant(self._beta1)) self._beta2_pow_acc = self.helper.create_global_variable( name=unique_name('beta2_pow_acc'), @@ -369,7 +367,7 @@ class AdamOptimizer(Optimizer): persistable=True) self.helper.set_variable_initializer( - self._beta2_pow_acc, initializer=ConstantInitializer(self._beta2)) + self._beta2_pow_acc, initializer=Constant(self._beta2)) # Create accumulator tensors for first and second moments for p in parameters: @@ -461,7 +459,7 @@ class AdamaxOptimizer(Optimizer): lod_level=0, persistable=True) self.helper.set_variable_initializer( - self._beta1_pow_acc, initializer=ConstantInitializer(self._beta1)) + self._beta1_pow_acc, initializer=Constant(self._beta1)) # Create accumulator tensors for first moment and infinity norm for p in parameters: @@ -558,3 +556,19 @@ class DecayedAdagradOptimizer(Optimizer): attrs={"epsilon": self._epsilon}) return decayed_adagrad_op + + +# We short the class name, since users will use the optimizer with the package +# name. The sample code: +# +# import paddle.fluid as fluid +# +# sgd = fluid.optimizer.SGD(...) +# +# It is no need to add an `Optimizer` as the class suffix +SGD = SGDOptimizer +Momentum = MomentumOptimizer +Adagrad = AdagradOptimizer +Adam = AdamOptimizer +Adamax = AdamaxOptimizer +DecayedAdagrad = DecayedAdagradOptimizer diff --git a/python/paddle/v2/fluid/regularizer.py b/python/paddle/v2/fluid/regularizer.py index 098cd0dd6439554f49e429ab75fb11bfa2c9d28c..c2c18e1951234f7160ff9f92d6dd6922a56683dd 100644 --- a/python/paddle/v2/fluid/regularizer.py +++ b/python/paddle/v2/fluid/regularizer.py @@ -1,8 +1,6 @@ -import paddle.v2.fluid.framework as framework +import framework -__all__ = [ - 'append_regularization_ops', 'L2DecayRegularizer', 'L1DecayRegularizer' -] +__all__ = ['append_regularization_ops', 'L1Decay', 'L2Decay'] def append_regularization_ops(parameters_and_grads): @@ -139,3 +137,16 @@ class L1DecayRegularizer(WeightDecayRegularizer): attrs={"scale": self._regularization_coeff}) return decay + + +# We short the class name, since users will use the regulaizer with the package +# name. The sample code: +# +# import paddle.fluid as fluid +# +# hidden = fluid.layers.fc(..., +# param_attr=ParamAttr(fluid.regularizer.Xavier())) +# +# It is no need to add a `Regularizer` as the class suffix +L1Decay = L1DecayRegularizer +L2Decay = L2DecayRegularizer diff --git a/python/paddle/v2/fluid/tests/.gitignore b/python/paddle/v2/fluid/tests/.gitignore index fcc52c04886865d96c1bfe1597a9dc99c181de1f..a648f2b387c2c7b9422eea6749e43e7b8871f60f 100644 --- a/python/paddle/v2/fluid/tests/.gitignore +++ b/python/paddle/v2/fluid/tests/.gitignore @@ -1,2 +1,3 @@ image/ fit_a_line.model/ +tmp diff --git a/python/paddle/v2/fluid/tests/book/CMakeLists.txt b/python/paddle/v2/fluid/tests/book/CMakeLists.txt index 4d7664469e481344cf9eea84688f068b4fb99dee..a35abe3e0c436be4eaed01c9b9183344c6d3b275 100644 --- a/python/paddle/v2/fluid/tests/book/CMakeLists.txt +++ b/python/paddle/v2/fluid/tests/book/CMakeLists.txt @@ -1,5 +1,11 @@ file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +list(REMOVE_ITEM TEST_OPS test_image_classification_train) +py_test(test_image_classification_train_resnet SRCS test_image_classification_train.py ARGS resnet) +py_test(test_image_classification_train_vgg SRCS test_image_classification_train.py ARGS vgg) + +# default test foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() diff --git a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py index a7f3bfc0caf76302674a00c80c2bd9ebf834f872..9f98493adb21a03b8efde0f88c490e77c9d303e7 100644 --- a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py @@ -1,23 +1,18 @@ import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.core as core -import paddle.v2.fluid.framework as framework -import paddle.v2.fluid.layers as layers -from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.io import save_persistables, load_persistables -from paddle.v2.fluid.optimizer import SGDOptimizer +import paddle.v2.fluid as fluid -x = layers.data(name='x', shape=[13], data_type='float32') +x = fluid.layers.data(name='x', shape=[13], dtype='float32') -y_predict = layers.fc(input=x, size=1, act=None) +y_predict = fluid.layers.fc(input=x, size=1, act=None) -y = layers.data(name='y', shape=[1], data_type='float32') +y = fluid.layers.data(name='y', shape=[1], dtype='float32') -cost = layers.square_error_cost(input=y_predict, label=y) -avg_cost = layers.mean(x=cost) +cost = fluid.layers.square_error_cost(input=y_predict, label=y) +avg_cost = fluid.layers.mean(x=cost) -sgd_optimizer = SGDOptimizer(learning_rate=0.001) -opts = sgd_optimizer.minimize(avg_cost) +sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) +sgd_optimizer.minimize(avg_cost) BATCH_SIZE = 20 @@ -26,32 +21,24 @@ train_reader = paddle.batch( paddle.dataset.uci_housing.train(), buf_size=500), batch_size=BATCH_SIZE) -place = core.CPUPlace() -exe = Executor(place) +place = fluid.CPUPlace() +exe = fluid.Executor(place) -exe.run(framework.default_startup_program()) +exe.run(fluid.default_startup_program()) PASS_NUM = 100 for pass_id in range(PASS_NUM): - save_persistables(exe, "./fit_a_line.model/") - load_persistables(exe, "./fit_a_line.model/") + fluid.io.save_persistables(exe, "./fit_a_line.model/") + fluid.io.load_persistables(exe, "./fit_a_line.model/") for data in train_reader(): - x_data = np.array(map(lambda x: x[0], data)).astype("float32") - y_data = np.array(map(lambda x: x[1], data)).astype("float32") - - tensor_x = core.LoDTensor() - tensor_x.set(x_data, place) - # print tensor_x.get_dims() - - tensor_y = core.LoDTensor() - tensor_y.set(y_data, place) - # print tensor_y.get_dims() - outs = exe.run(framework.default_main_program(), - feed={'x': tensor_x, - 'y': tensor_y}, - fetch_list=[avg_cost]) - out = np.array(outs[0]) - - if out[0] < 10.0: + x_data = np.array(map(lambda _: _[0], data)).astype("float32") + y_data = np.array(map(lambda _: _[1], data)).astype("float32") + + avg_loss_value, = exe.run(fluid.default_main_program(), + feed={'x': x_data, + 'y': y_data}, + fetch_list=[avg_cost]) + + if avg_loss_value[0] < 10.0: exit(0) # if avg cost less than 10.0, we think our code is good. exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py index b8506125501b6e533c4594b37943ec36ca8e7d30..cc45b10b90868858c61334a3a43acf65c3d4eaf5 100644 --- a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py +++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py @@ -1,17 +1,14 @@ +from __future__ import print_function + import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.core as core -import paddle.v2.fluid.framework as framework -import paddle.v2.fluid.layers as layers -import paddle.v2.fluid.nets as nets -from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.initializer import XavierInitializer -from paddle.v2.fluid.optimizer import AdamOptimizer +import paddle.v2.fluid as fluid +import sys def resnet_cifar10(input, depth=32): def conv_bn_layer(input, ch_out, filter_size, stride, padding, act='relu'): - tmp = layers.conv2d( + tmp = fluid.layers.conv2d( input=input, filter_size=filter_size, num_filters=ch_out, @@ -19,12 +16,11 @@ def resnet_cifar10(input, depth=32): padding=padding, act=None, bias_attr=False) - return layers.batch_norm(input=tmp, act=act) + return fluid.layers.batch_norm(input=tmp, act=act) - def shortcut(input, ch_in, ch_out, stride, program, init_program): + def shortcut(input, ch_in, ch_out, stride): if ch_in != ch_out: - return conv_bn_layer(input, ch_out, 1, stride, 0, None, program, - init_program) + return conv_bn_layer(input, ch_out, 1, stride, 0, None) else: return input @@ -32,7 +28,7 @@ def resnet_cifar10(input, depth=32): tmp = conv_bn_layer(input, ch_out, 3, stride, 1) tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None) short = shortcut(input, ch_in, ch_out, stride) - return layers.elementwise_add(x=tmp, y=short, act='relu') + return fluid.layers.elementwise_add(x=tmp, y=short, act='relu') def layer_warp(block_func, input, ch_in, ch_out, count, stride): tmp = block_func(input, ch_in, ch_out, stride) @@ -47,14 +43,14 @@ def resnet_cifar10(input, depth=32): res1 = layer_warp(basicblock, conv1, 16, 16, n, 1) res2 = layer_warp(basicblock, res1, 16, 32, n, 2) res3 = layer_warp(basicblock, res2, 32, 64, n, 2) - pool = layers.pool2d( + pool = fluid.layers.pool2d( input=res3, pool_size=8, pool_type='avg', pool_stride=1) return pool def vgg16_bn_drop(input): def conv_block(input, num_filter, groups, dropouts): - return nets.img_conv_group( + return fluid.nets.img_conv_group( input=input, pool_size=2, pool_stride=2, @@ -71,44 +67,43 @@ def vgg16_bn_drop(input): conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0]) conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0]) - drop = layers.dropout(x=conv5, dropout_prob=0.5) - fc1 = layers.fc(input=drop, - size=512, - act=None, - param_attr={"initializer": XavierInitializer()}) - reshape1 = layers.reshape(x=fc1, shape=list(fc1.shape + (1, 1))) - bn = layers.batch_norm(input=reshape1, act='relu') - drop2 = layers.dropout(x=bn, dropout_prob=0.5) - fc2 = layers.fc(input=drop2, - size=512, - act=None, - param_attr={"initializer": XavierInitializer()}) + drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5) + fc1 = fluid.layers.fc(input=drop, size=512, act=None) + reshape1 = fluid.layers.reshape(x=fc1, shape=list(fc1.shape + (1, 1))) + bn = fluid.layers.batch_norm(input=reshape1, act='relu') + drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5) + fc2 = fluid.layers.fc(input=drop2, size=512, act=None) return fc2 classdim = 10 data_shape = [3, 32, 32] -images = layers.data(name='pixel', shape=data_shape, data_type='float32') -label = layers.data(name='label', shape=[1], data_type='int64') +images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') +label = fluid.layers.data(name='label', shape=[1], dtype='int64') -# Add neural network config -# option 1. resnet -# net = resnet_cifar10(images, 32) -# option 2. vgg -net = vgg16_bn_drop(images) +net_type = "vgg" +if len(sys.argv) >= 2: + net_type = sys.argv[1] -# print(program) +if net_type == "vgg": + print("train vgg net") + net = vgg16_bn_drop(images) +elif net_type == "resnet": + print("train resnet") + net = resnet_cifar10(images, 32) +else: + raise ValueError("%s network is not supported" % net_type) -predict = layers.fc(input=net, size=classdim, act='softmax') -cost = layers.cross_entropy(input=predict, label=label) -avg_cost = layers.mean(x=cost) -accuracy = layers.accuracy(input=predict, label=label) +predict = fluid.layers.fc(input=net, size=classdim, act='softmax') +cost = fluid.layers.cross_entropy(input=predict, label=label) +avg_cost = fluid.layers.mean(x=cost) -# optimizer = SGDOptimizer(learning_rate=0.001) -optimizer = AdamOptimizer(learning_rate=0.001) +optimizer = fluid.optimizer.Adam(learning_rate=0.001) opts = optimizer.minimize(avg_cost) +accuracy = fluid.evaluator.Accuracy(input=predict, label=label) + BATCH_SIZE = 128 PASS_NUM = 1 @@ -117,13 +112,13 @@ train_reader = paddle.batch( paddle.dataset.cifar.train10(), buf_size=128 * 10), batch_size=BATCH_SIZE) -place = core.CPUPlace() -exe = Executor(place) +place = fluid.CPUPlace() +exe = fluid.Executor(place) -exe.run(framework.default_startup_program()) +exe.run(fluid.default_startup_program()) for pass_id in range(PASS_NUM): - batch_id = 0 + accuracy.reset(exe) for data in train_reader(): img_data = np.array(map(lambda x: x[0].reshape(data_shape), data)).astype("float32") @@ -133,23 +128,13 @@ for pass_id in range(PASS_NUM): batch_size = batch_size * i y_data = y_data.reshape([batch_size, 1]) - tensor_img = core.LoDTensor() - tensor_y = core.LoDTensor() - tensor_img.set(img_data, place) - tensor_y.set(y_data, place) - - outs = exe.run(framework.default_main_program(), - feed={"pixel": tensor_img, - "label": tensor_y}, - fetch_list=[avg_cost, accuracy]) - - loss = np.array(outs[0]) - acc = np.array(outs[1]) - print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) + - " loss:" + str(loss) + " acc:" + str(acc)) - batch_id = batch_id + 1 - - if batch_id > 1: - # this model is slow, so if we can train two mini batch, we think it works properly. - exit(0) + loss, acc = exe.run(fluid.default_main_program(), + feed={"pixel": img_data, + "label": y_data}, + fetch_list=[avg_cost] + accuracy.metrics) + pass_acc = accuracy.eval(exe) + print("loss:" + str(loss) + " acc:" + str(acc) + " pass_acc:" + str( + pass_acc)) + # this model is slow, so if we can train two mini batch, we think it works properly. + exit(0) exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py new file mode 100644 index 0000000000000000000000000000000000000000..93987a2b80dc9ca304a708d4799bc38b448a68c4 --- /dev/null +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -0,0 +1,188 @@ +import numpy as np +import paddle.v2 as paddle +import paddle.v2.dataset.conll05 as conll05 +import paddle.v2.fluid as fluid + +word_dict, verb_dict, label_dict = conll05.get_dict() +word_dict_len = len(word_dict) +label_dict_len = len(label_dict) +pred_len = len(verb_dict) + +mark_dict_len = 2 +word_dim = 32 +mark_dim = 5 +hidden_dim = 512 +depth = 8 +mix_hidden_lr = 1e-3 + +IS_SPARSE = True +PASS_NUM = 10 +BATCH_SIZE = 20 + +embedding_name = 'emb' + + +def load_parameter(file_name, h, w): + with open(file_name, 'rb') as f: + f.read(16) # skip header. + return np.fromfile(f, dtype=np.float32).reshape(h, w) + + +def db_lstm(): + # 8 features + word = fluid.layers.data(name='word_data', shape=[1], dtype='int64') + predicate = fluid.layers.data(name='verb_data', shape=[1], dtype='int64') + ctx_n2 = fluid.layers.data(name='ctx_n2_data', shape=[1], dtype='int64') + ctx_n1 = fluid.layers.data(name='ctx_n1_data', shape=[1], dtype='int64') + ctx_0 = fluid.layers.data(name='ctx_0_data', shape=[1], dtype='int64') + ctx_p1 = fluid.layers.data(name='ctx_p1_data', shape=[1], dtype='int64') + ctx_p2 = fluid.layers.data(name='ctx_p2_data', shape=[1], dtype='int64') + mark = fluid.layers.data(name='mark_data', shape=[1], dtype='int64') + + predicate_embedding = fluid.layers.embedding( + input=predicate, + size=[pred_len, word_dim], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr={'name': 'vemb'}) + + mark_embedding = fluid.layers.embedding( + input=mark, + size=[mark_dict_len, mark_dim], + dtype='float32', + is_sparse=IS_SPARSE) + + word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] + emb_layers = [ + fluid.layers.embedding( + size=[word_dict_len, word_dim], + input=x, + param_attr={'name': embedding_name, + 'trainable': False}) for x in word_input + ] + emb_layers.append(predicate_embedding) + emb_layers.append(mark_embedding) + + hidden_0_layers = [ + fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers + ] + + hidden_0 = fluid.layers.sums(input=hidden_0_layers) + + lstm_0 = fluid.layers.dynamic_lstm( + input=hidden_0, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid') + + # stack L-LSTM and R-LSTM with direct edges + input_tmp = [hidden_0, lstm_0] + + for i in range(1, depth): + mix_hidden = fluid.layers.sums(input=[ + fluid.layers.fc(input=input_tmp[0], size=hidden_dim), + fluid.layers.fc(input=input_tmp[1], size=hidden_dim) + ]) + + lstm = fluid.layers.dynamic_lstm( + input=mix_hidden, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid', + is_reverse=((i % 2) == 1)) + + input_tmp = [mix_hidden, lstm] + + feature_out = fluid.layers.sums(input=[ + fluid.layers.fc(input=input_tmp[0], size=label_dict_len), + fluid.layers.fc(input=input_tmp[1], size=label_dict_len) + ]) + + return feature_out + + +def to_lodtensor(data, place): + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + res = fluid.LoDTensor() + res.set(flattened_data, place) + res.set_lod([lod]) + return res + + +def main(): + # define network topology + feature_out = db_lstm() + target = fluid.layers.data(name='target', shape=[1], dtype='int64') + crf_cost = fluid.layers.linear_chain_crf( + input=feature_out, + label=target, + param_attr={"name": 'crfw', + "learning_rate": mix_hidden_lr}) + avg_cost = fluid.layers.mean(x=crf_cost) + # TODO(qiao) + # 1. add crf_decode_layer and evaluator + # 2. use other optimizer and check why out will be NAN + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.0001) + sgd_optimizer.minimize(avg_cost) + + train_data = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.conll05.test(), buf_size=8192), + batch_size=BATCH_SIZE) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + exe.run(fluid.default_startup_program()) + + embedding_param = fluid.g_scope.find_var(embedding_name).get_tensor() + embedding_param.set( + load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place) + + batch_id = 0 + for pass_id in xrange(PASS_NUM): + for data in train_data(): + word_data = to_lodtensor(map(lambda x: x[0], data), place) + ctx_n2_data = to_lodtensor(map(lambda x: x[1], data), place) + ctx_n1_data = to_lodtensor(map(lambda x: x[2], data), place) + ctx_0_data = to_lodtensor(map(lambda x: x[3], data), place) + ctx_p1_data = to_lodtensor(map(lambda x: x[4], data), place) + ctx_p2_data = to_lodtensor(map(lambda x: x[5], data), place) + verb_data = to_lodtensor(map(lambda x: x[6], data), place) + mark_data = to_lodtensor(map(lambda x: x[7], data), place) + target = to_lodtensor(map(lambda x: x[8], data), place) + + outs = exe.run(fluid.default_main_program(), + feed={ + 'word_data': word_data, + 'ctx_n2_data': ctx_n2_data, + 'ctx_n1_data': ctx_n1_data, + 'ctx_0_data': ctx_0_data, + 'ctx_p1_data': ctx_p1_data, + 'ctx_p2_data': ctx_p2_data, + 'verb_data': verb_data, + 'mark_data': mark_data, + 'target': target + }, + fetch_list=[avg_cost]) + avg_cost_val = np.array(outs[0]) + + if batch_id % 10 == 0: + print("avg_cost=" + str(avg_cost_val)) + + # exit early for CI + exit(0) + + batch_id = batch_id + 1 + + +if __name__ == '__main__': + main() diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py index 75fbaf83e8f3e62eb0d0abef9cfa267b65e72973..ba686b56f8603834c12f5ed24e0ef7308c78585d 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py @@ -1,23 +1,18 @@ +from __future__ import print_function import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.core as core -import paddle.v2.fluid.evaluator as evaluator -import paddle.v2.fluid.framework as framework -import paddle.v2.fluid.layers as layers -import paddle.v2.fluid.nets as nets -from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.optimizer import AdamOptimizer +import paddle.v2.fluid as fluid -images = layers.data(name='pixel', shape=[1, 28, 28], data_type='float32') -label = layers.data(name='label', shape=[1], data_type='int64') -conv_pool_1 = nets.simple_img_conv_pool( +images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype='float32') +label = fluid.layers.data(name='label', shape=[1], dtype='int64') +conv_pool_1 = fluid.nets.simple_img_conv_pool( input=images, filter_size=5, num_filters=20, pool_size=2, pool_stride=2, act="relu") -conv_pool_2 = nets.simple_img_conv_pool( +conv_pool_2 = fluid.nets.simple_img_conv_pool( input=conv_pool_1, filter_size=5, num_filters=50, @@ -25,13 +20,13 @@ conv_pool_2 = nets.simple_img_conv_pool( pool_stride=2, act="relu") -predict = layers.fc(input=conv_pool_2, size=10, act="softmax") -cost = layers.cross_entropy(input=predict, label=label) -avg_cost = layers.mean(x=cost) -optimizer = AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999) -opts = optimizer.minimize(avg_cost) +predict = fluid.layers.fc(input=conv_pool_2, size=10, act="softmax") +cost = fluid.layers.cross_entropy(input=predict, label=label) +avg_cost = fluid.layers.mean(x=cost) +optimizer = fluid.optimizer.Adam(learning_rate=0.01) +optimizer.minimize(avg_cost) -accuracy, acc_out = evaluator.accuracy(input=predict, label=label) +accuracy = fluid.evaluator.Accuracy(input=predict, label=label) BATCH_SIZE = 50 PASS_NUM = 3 @@ -40,13 +35,12 @@ train_reader = paddle.batch( paddle.dataset.mnist.train(), buf_size=500), batch_size=BATCH_SIZE) -place = core.CPUPlace() -exe = Executor(place) +place = fluid.CPUPlace() +exe = fluid.Executor(place) -exe.run(framework.default_startup_program()) +exe.run(fluid.default_startup_program()) for pass_id in range(PASS_NUM): - count = 0 accuracy.reset(exe) for data in train_reader(): img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]), @@ -54,25 +48,19 @@ for pass_id in range(PASS_NUM): y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape([BATCH_SIZE, 1]) - tensor_img = core.LoDTensor() - tensor_y = core.LoDTensor() - tensor_img.set(img_data, place) - tensor_y.set(y_data, place) - - outs = exe.run(framework.default_main_program(), - feed={"pixel": tensor_img, - "label": tensor_y}, - fetch_list=[avg_cost, acc_out]) - loss = np.array(outs[0]) - acc = np.array(outs[1]) + loss, acc = exe.run(fluid.default_main_program(), + feed={"pixel": img_data, + "label": y_data}, + fetch_list=[avg_cost] + accuracy.metrics) pass_acc = accuracy.eval(exe) - print "pass id : ", pass_id, pass_acc + print("pass_id=" + str(pass_id) + " acc=" + str(acc) + " pass_acc=" + + str(pass_acc)) # print loss, acc - if loss < 10.0 and acc > 0.9: + if loss < 10.0 and pass_acc > 0.9: # if avg cost less than 10.0 and accuracy is larger than 0.9, we think our code is good. exit(0) pass_acc = accuracy.eval(exe) - print "pass id : ", pass_id, pass_acc + print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc)) exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py index cf10b1942e6a8243b18b0ae4586fdd7ec1a665fb..8ca45134dc01ec21e720ca46c8ad020128aa6e04 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py @@ -1,69 +1,97 @@ +from __future__ import print_function import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.core as core -import paddle.v2.fluid.framework as framework -import paddle.v2.fluid.layers as layers -from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.initializer import UniformInitializer -from paddle.v2.fluid.optimizer import MomentumOptimizer -from paddle.v2.fluid.regularizer import L2DecayRegularizer +import paddle.v2.fluid as fluid BATCH_SIZE = 128 -image = layers.data(name='x', shape=[784], data_type='float32') +image = fluid.layers.data(name='x', shape=[784], dtype='float32') param_attr = { 'name': None, - 'initializer': UniformInitializer( - low=-1.0, high=1.0), - 'regularization': L2DecayRegularizer(0.0005 * BATCH_SIZE) + 'regularization': fluid.regularizer.L2Decay(0.0005 * BATCH_SIZE) } -hidden1 = layers.fc(input=image, size=128, act='relu', param_attr=param_attr) -hidden2 = layers.fc(input=hidden1, size=64, act='relu', param_attr=param_attr) +hidden1 = fluid.layers.fc(input=image, + size=128, + act='relu', + param_attr=param_attr) +hidden2 = fluid.layers.fc(input=hidden1, + size=64, + act='relu', + param_attr=param_attr) -predict = layers.fc(input=hidden2, - size=10, - act='softmax', - param_attr=param_attr) +predict = fluid.layers.fc(input=hidden2, + size=10, + act='softmax', + param_attr=param_attr) -label = layers.data(name='y', shape=[1], data_type='int64') +label = fluid.layers.data(name='y', shape=[1], dtype='int64') -cost = layers.cross_entropy(input=predict, label=label) -avg_cost = layers.mean(x=cost) -accuracy = layers.accuracy(input=predict, label=label) +cost = fluid.layers.cross_entropy(input=predict, label=label) +avg_cost = fluid.layers.mean(x=cost) -optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) +optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9) opts = optimizer.minimize(avg_cost) +accuracy = fluid.evaluator.Accuracy(input=predict, label=label) + +inference_program = fluid.default_main_program().clone() +test_accuracy = fluid.evaluator.Accuracy( + input=predict, label=label, main_program=inference_program) +test_target = [avg_cost] + test_accuracy.metrics + test_accuracy.states +inference_program = fluid.io.get_inference_program( + test_target, main_program=inference_program) + train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=BATCH_SIZE) -place = core.CPUPlace() -exe = Executor(place) +test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) -exe.run(framework.default_startup_program()) +place = fluid.CPUPlace() +exe = fluid.Executor(place) + +exe.run(fluid.default_startup_program()) PASS_NUM = 100 for pass_id in range(PASS_NUM): + accuracy.reset(exe) for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.expand_dims(y_data, axis=1) - tensor_x = core.LoDTensor() + tensor_x = fluid.LoDTensor() tensor_x.set(x_data, place) - tensor_y = core.LoDTensor() + tensor_y = fluid.LoDTensor() tensor_y.set(y_data, place) - outs = exe.run(framework.default_main_program(), + outs = exe.run(fluid.default_main_program(), feed={'x': tensor_x, 'y': tensor_y}, - fetch_list=[avg_cost, accuracy]) + fetch_list=[avg_cost] + accuracy.metrics) out = np.array(outs[0]) acc = np.array(outs[1]) - if out[0] < 5.0: - exit(0) # if avg cost less than 5.0, we think our code is good. + pass_acc = accuracy.eval(exe) + + test_accuracy.reset(exe) + for data in test_reader(): + x_data = np.array(map(lambda x: x[0], data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = np.expand_dims(y_data, axis=1) + + out, acc = exe.run(inference_program, + feed={'x': x_data, + 'y': y_data}, + fetch_list=[avg_cost] + test_accuracy.metrics) + + test_pass_acc = test_accuracy.eval(exe) + print("pass_id=" + str(pass_id) + " train_cost=" + str( + out) + " train_acc=" + str(acc) + " train_pass_acc=" + str(pass_acc) + + " test_acc=" + str(test_pass_acc)) + + if test_pass_acc > 0.7: + exit(0) exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py index 55ded3aed3a23c8cd7795f915dc1cbd512c6d945..f8dc1518579d5a9d7a8d0498dcc5fd8a6d1692c4 100644 --- a/python/paddle/v2/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py @@ -18,11 +18,11 @@ def get_usr_combined_features(): USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 - uid = layers.data(name='user_id', shape=[1], data_type='int64') + uid = layers.data(name='user_id', shape=[1], dtype='int64') usr_emb = layers.embedding( input=uid, - data_type='float32', + dtype='float32', size=[USR_DICT_SIZE, 32], param_attr={'name': 'user_table'}, is_sparse=IS_SPARSE) @@ -31,7 +31,7 @@ def get_usr_combined_features(): USR_GENDER_DICT_SIZE = 2 - usr_gender_id = layers.data(name='gender_id', shape=[1], data_type='int64') + usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') usr_gender_emb = layers.embedding( input=usr_gender_id, @@ -42,7 +42,7 @@ def get_usr_combined_features(): usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) - usr_age_id = layers.data(name='age_id', shape=[1], data_type="int64") + usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") usr_age_emb = layers.embedding( input=usr_age_id, @@ -53,7 +53,7 @@ def get_usr_combined_features(): usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 - usr_job_id = layers.data(name='job_id', shape=[1], data_type="int64") + usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") usr_job_emb = layers.embedding( input=usr_job_id, @@ -75,11 +75,11 @@ def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 - mov_id = layers.data(name='movie_id', shape=[1], data_type='int64') + mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') mov_emb = layers.embedding( input=mov_id, - data_type='float32', + dtype='float32', size=[MOV_DICT_SIZE, 32], param_attr={'name': 'movie_table'}, is_sparse=IS_SPARSE) @@ -88,7 +88,7 @@ def get_mov_combined_features(): CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) - category_id = layers.data(name='category_id', shape=[1], data_type='int64') + category_id = layers.data(name='category_id', shape=[1], dtype='int64') mov_categories_emb = layers.embedding( input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) @@ -98,7 +98,7 @@ def get_mov_combined_features(): MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) - mov_title_id = layers.data(name='movie_title', shape=[1], data_type='int64') + mov_title_id = layers.data(name='movie_title', shape=[1], dtype='int64') mov_title_emb = layers.embedding( input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) @@ -126,7 +126,7 @@ def model(): # need cos sim inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features) - label = layers.data(name='score', shape=[1], data_type='float32') + label = layers.data(name='score', shape=[1], dtype='float32') square_cost = layers.square_error_cost(input=inference, label=label) diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py index e69b915a9cfaf9e06075991975563a1fc1196661..be875a952b7086ee64984525d70ffd3f1ecb5fae 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py @@ -1,39 +1,35 @@ +from __future__ import print_function import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.core as core -import paddle.v2.fluid.framework as framework -import paddle.v2.fluid.layers as layers -import paddle.v2.fluid.nets as nets -from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.optimizer import AdamOptimizer +import paddle.v2.fluid as fluid def convolution_net(input_dim, class_dim=2, emb_dim=32, hid_dim=32): - data = layers.data(name="words", shape=[1], data_type="int64") - label = layers.data(name="label", shape=[1], data_type="int64") + data = fluid.layers.data(name="words", shape=[1], dtype="int64") + label = fluid.layers.data(name="label", shape=[1], dtype="int64") - emb = layers.embedding(input=data, size=[input_dim, emb_dim]) - conv_3 = nets.sequence_conv_pool( + emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim]) + conv_3 = fluid.nets.sequence_conv_pool( input=emb, num_filters=hid_dim, filter_size=3, act="tanh", pool_type="sqrt") - conv_4 = nets.sequence_conv_pool( + conv_4 = fluid.nets.sequence_conv_pool( input=emb, num_filters=hid_dim, filter_size=4, act="tanh", pool_type="sqrt") - prediction = layers.fc(input=[conv_3, conv_4], - size=class_dim, - act="softmax") - cost = layers.cross_entropy(input=prediction, label=label) - avg_cost = layers.mean(x=cost) - adam_optimizer = AdamOptimizer(learning_rate=0.002) - opts = adam_optimizer.minimize(avg_cost) - acc = layers.accuracy(input=prediction, label=label) - return avg_cost, acc + prediction = fluid.layers.fc(input=[conv_3, conv_4], + size=class_dim, + act="softmax") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) + adam_optimizer.minimize(avg_cost) + accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) + return avg_cost, accuracy, accuracy.metrics[0] def to_lodtensor(data, place): @@ -45,7 +41,7 @@ def to_lodtensor(data, place): lod.append(cur_len) flattened_data = np.concatenate(data, axis=0).astype("int64") flattened_data = flattened_data.reshape([len(flattened_data), 1]) - res = core.LoDTensor() + res = fluid.LoDTensor() res.set(flattened_data, place) res.set_lod([lod]) return res @@ -59,36 +55,38 @@ def main(): dict_dim = len(word_dict) class_dim = 2 - cost, acc = convolution_net(input_dim=dict_dim, class_dim=class_dim) + cost, accuracy, acc_out = convolution_net( + input_dim=dict_dim, class_dim=class_dim) train_data = paddle.batch( paddle.reader.shuffle( paddle.dataset.imdb.train(word_dict), buf_size=1000), batch_size=BATCH_SIZE) - place = core.CPUPlace() - exe = Executor(place) + place = fluid.CPUPlace() + exe = fluid.Executor(place) - exe.run(framework.default_startup_program()) + exe.run(fluid.default_startup_program()) for pass_id in xrange(PASS_NUM): + accuracy.reset(exe) for data in train_data(): tensor_words = to_lodtensor(map(lambda x: x[0], data), place) label = np.array(map(lambda x: x[1], data)).astype("int64") label = label.reshape([BATCH_SIZE, 1]) - tensor_label = core.LoDTensor() + tensor_label = fluid.LoDTensor() tensor_label.set(label, place) - outs = exe.run(framework.default_main_program(), - feed={"words": tensor_words, - "label": tensor_label}, - fetch_list=[cost, acc]) - cost_val = np.array(outs[0]) - acc_val = np.array(outs[1]) - - print("cost=" + str(cost_val) + " acc=" + str(acc_val)) - if cost_val < 1.0 and acc_val > 0.7: + cost_val, acc_val = exe.run( + fluid.default_main_program(), + feed={"words": tensor_words, + "label": tensor_label}, + fetch_list=[cost, acc_out]) + pass_acc = accuracy.eval(exe) + print("cost=" + str(cost_val) + " acc=" + str(acc_val) + + " pass_acc=" + str(pass_acc)) + if cost_val < 1.0 and pass_acc > 0.8: exit(0) exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py index 65d44542501e6531fc1912cbc726a1d903b9c031..094a3cdcda12eaee351476e99a388c44b3c81cd6 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py @@ -1,10 +1,6 @@ import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.core as core -import paddle.v2.fluid.framework as framework -import paddle.v2.fluid.layers as layers -from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.optimizer import AdamOptimizer +import paddle.v2.fluid as fluid def stacked_lstm_net(input_dim, @@ -13,36 +9,36 @@ def stacked_lstm_net(input_dim, hid_dim=512, stacked_num=3): assert stacked_num % 2 == 1 - data = layers.data(name="words", shape=[1], data_type="int64") - label = layers.data(name="label", shape=[1], data_type="int64") + data = fluid.layers.data(name="words", shape=[1], dtype="int64") + label = fluid.layers.data(name="label", shape=[1], dtype="int64") - emb = layers.embedding(input=data, size=[input_dim, emb_dim]) + emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim]) # add bias attr # TODO(qijun) linear act - fc1 = layers.fc(input=emb, size=hid_dim) - lstm1, cell1 = layers.dynamic_lstm(input=fc1, size=hid_dim) + fc1 = fluid.layers.fc(input=emb, size=hid_dim) + lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim) inputs = [fc1, lstm1] for i in range(2, stacked_num + 1): - fc = layers.fc(input=inputs, size=hid_dim) - lstm, cell = layers.dynamic_lstm( + fc = fluid.layers.fc(input=inputs, size=hid_dim) + lstm, cell = fluid.layers.dynamic_lstm( input=fc, size=hid_dim, is_reverse=(i % 2) == 0) inputs = [fc, lstm] - fc_last = layers.sequence_pool(input=inputs[0], pool_type='max') - lstm_last = layers.sequence_pool(input=inputs[1], pool_type='max') + fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max') + lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max') - prediction = layers.fc(input=[fc_last, lstm_last], - size=class_dim, - act='softmax') - cost = layers.cross_entropy(input=prediction, label=label) - avg_cost = layers.mean(x=cost) - adam_optimizer = AdamOptimizer(learning_rate=0.002) - opts = adam_optimizer.minimize(avg_cost) - acc = layers.accuracy(input=prediction, label=label) - return avg_cost, acc + prediction = fluid.layers.fc(input=[fc_last, lstm_last], + size=class_dim, + act='softmax') + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) + adam_optimizer.minimize(avg_cost) + accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) + return avg_cost, accuracy, accuracy.metrics[0] def to_lodtensor(data, place): @@ -54,7 +50,7 @@ def to_lodtensor(data, place): lod.append(cur_len) flattened_data = np.concatenate(data, axis=0).astype("int64") flattened_data = flattened_data.reshape([len(flattened_data), 1]) - res = core.LoDTensor() + res = fluid.LoDTensor() res.set(flattened_data, place) res.set_lod([lod]) return res @@ -69,36 +65,38 @@ def main(): dict_dim = len(word_dict) class_dim = 2 - cost, acc = stacked_lstm_net(input_dim=dict_dim, class_dim=class_dim) + cost, accuracy, acc_out = stacked_lstm_net( + input_dim=dict_dim, class_dim=class_dim) train_data = paddle.batch( paddle.reader.shuffle( paddle.dataset.imdb.train(word_dict), buf_size=1000), batch_size=BATCH_SIZE) - place = core.CPUPlace() - exe = Executor(place) + place = fluid.CPUPlace() + exe = fluid.Executor(place) - exe.run(framework.default_startup_program()) + exe.run(fluid.default_startup_program()) for pass_id in xrange(PASS_NUM): + accuracy.reset(exe) for data in train_data(): tensor_words = to_lodtensor(map(lambda x: x[0], data), place) label = np.array(map(lambda x: x[1], data)).astype("int64") label = label.reshape([BATCH_SIZE, 1]) - tensor_label = core.LoDTensor() + tensor_label = fluid.LoDTensor() tensor_label.set(label, place) - outs = exe.run(framework.default_main_program(), - feed={"words": tensor_words, - "label": tensor_label}, - fetch_list=[cost, acc]) - cost_val = np.array(outs[0]) - acc_val = np.array(outs[1]) - - print("cost=" + str(cost_val) + " acc=" + str(acc_val)) - if cost_val < 1.0 and acc_val > 0.7: + cost_val, acc_val = exe.run( + fluid.default_main_program(), + feed={"words": tensor_words, + "label": tensor_label}, + fetch_list=[cost, acc_out]) + pass_acc = accuracy.eval(exe) + print("cost=" + str(cost_val) + " acc=" + str(acc_val) + + " pass_acc=" + str(pass_acc)) + if cost_val < 1.0 and acc_val > 0.8: exit(0) exit(1) diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py index 280f6e902c34512735a27586221c2be68963ef2b..b2479320330bde5771c3d4a8e2923b5ab1eecf2e 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py @@ -1,40 +1,39 @@ import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.core as core -import paddle.v2.fluid.framework as framework -import paddle.v2.fluid.layers as layers -from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.optimizer import AdamOptimizer +import paddle.v2.fluid as fluid def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50): - data = layers.data( + data = fluid.layers.data( name="words", shape=[seq_len * batch_size, 1], append_batch_size=False, - data_type="int64") - label = layers.data( + dtype="int64") + label = fluid.layers.data( name="label", shape=[batch_size, 1], append_batch_size=False, - data_type="int64") + dtype="int64") - emb = layers.embedding(input=data, size=[dict_dim, emb_dim]) - emb = layers.reshape(x=emb, shape=[batch_size, seq_len, emb_dim]) - emb = layers.transpose(x=emb, axis=[1, 0, 2]) + emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) + emb = fluid.layers.reshape(x=emb, shape=[batch_size, seq_len, emb_dim]) + emb = fluid.layers.transpose(x=emb, axis=[1, 0, 2]) - c_pre_init = layers.fill_constant( - dtype=emb.data_type, shape=[batch_size, emb_dim], value=0.0) - layer_1_out = layers.lstm(emb, c_pre_init=c_pre_init, hidden_dim=emb_dim) - layer_1_out = layers.transpose(x=layer_1_out, axis=[1, 0, 2]) + c_pre_init = fluid.layers.fill_constant( + dtype=emb.dtype, shape=[batch_size, emb_dim], value=0.0) + layer_1_out = fluid.layers.lstm( + emb, c_pre_init=c_pre_init, hidden_dim=emb_dim) + layer_1_out = fluid.layers.transpose(x=layer_1_out, axis=[1, 0, 2]) - prediction = layers.fc(input=layer_1_out, size=class_dim, act="softmax") - cost = layers.cross_entropy(input=prediction, label=label) + prediction = fluid.layers.fc(input=layer_1_out, + size=class_dim, + act="softmax") + cost = fluid.layers.cross_entropy(input=prediction, label=label) - avg_cost = layers.mean(x=cost) - adam_optimizer = AdamOptimizer(learning_rate=0.002) - opts = adam_optimizer.minimize(avg_cost) - acc = layers.accuracy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) + adam_optimizer.minimize(avg_cost) + acc = fluid.layers.accuracy(input=prediction, label=label) return avg_cost, acc @@ -48,57 +47,65 @@ def to_lodtensor(data, place): lod.append(cur_len) flattened_data = np.concatenate(data, axis=0).astype("int64") flattened_data = flattened_data.reshape([len(flattened_data), 1]) - res = core.LoDTensor() + res = fluid.LoDTensor() res.set(flattened_data, place) res.set_lod([lod]) return res -def chop_data(data, chop_len=80, batch_len=50): +def chop_data(data, chop_len=80, batch_size=50): data = [(x[0][:chop_len], x[1]) for x in data if len(x[0]) >= chop_len] - return data[:batch_len] + return data[:batch_size] def prepare_feed_data(data, place): tensor_words = to_lodtensor(map(lambda x: x[0], data), place) label = np.array(map(lambda x: x[1], data)).astype("int64") - label = label.reshape([50, 1]) - tensor_label = core.LoDTensor() + label = label.reshape([len(label), 1]) + tensor_label = fluid.LoDTensor() tensor_label.set(label, place) return tensor_words, tensor_label def main(): + BATCH_SIZE = 100 + PASS_NUM = 5 + word_dict = paddle.dataset.imdb.word_dict() - cost, acc = lstm_net(dict_dim=len(word_dict), class_dim=2) + print "load word dict successfully" + dict_dim = len(word_dict) + class_dim = 2 + + cost, acc = lstm_net(dict_dim=dict_dim, class_dim=class_dim) - batch_size = 100 train_data = paddle.batch( - paddle.reader.buffered( - paddle.dataset.imdb.train(word_dict), size=batch_size * 10), - batch_size=batch_size) - - data = chop_data(next(train_data())) - - place = core.CPUPlace() - tensor_words, tensor_label = prepare_feed_data(data, place) - exe = Executor(place) - exe.run(framework.default_startup_program()) - - while True: - outs = exe.run(framework.default_main_program(), - feed={"words": tensor_words, - "label": tensor_label}, - fetch_list=[cost, acc]) - cost_val = np.array(outs[0]) - acc_val = np.array(outs[1]) - - print("cost=" + str(cost_val) + " acc=" + str(acc_val)) - if acc_val > 0.9: - break + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=BATCH_SIZE * 10), + batch_size=BATCH_SIZE) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + exe.run(fluid.default_startup_program()) + + for pass_id in xrange(PASS_NUM): + for data in train_data(): + chopped_data = chop_data(data) + tensor_words, tensor_label = prepare_feed_data(chopped_data, place) + + outs = exe.run(fluid.default_main_program(), + feed={"words": tensor_words, + "label": tensor_label}, + fetch_list=[cost, acc]) + cost_val = np.array(outs[0]) + acc_val = np.array(outs[1]) + + print("cost=" + str(cost_val) + " acc=" + str(acc_val)) + if acc_val > 0.7: + exit(0) + exit(1) if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py index afa7b285198e0349317e123e4bd98e8336217afa..b0cd1a518cd1be60474df126470573a5a5b81b70 100644 --- a/python/paddle/v2/fluid/tests/book/test_word2vec.py +++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py @@ -1,10 +1,6 @@ import numpy as np import paddle.v2 as paddle -import paddle.v2.fluid.core as core -import paddle.v2.fluid.framework as framework -import paddle.v2.fluid.layers as layers -from paddle.v2.fluid.executor import Executor -from paddle.v2.fluid.optimizer import SGDOptimizer +import paddle.v2.fluid as fluid PASS_NUM = 100 EMBED_SIZE = 32 @@ -16,57 +12,57 @@ IS_SPARSE = True word_dict = paddle.dataset.imikolov.build_dict() dict_size = len(word_dict) -first_word = layers.data(name='firstw', shape=[1], data_type='int64') -second_word = layers.data(name='secondw', shape=[1], data_type='int64') -third_word = layers.data(name='thirdw', shape=[1], data_type='int64') -forth_word = layers.data(name='forthw', shape=[1], data_type='int64') -next_word = layers.data(name='nextw', shape=[1], data_type='int64') +first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64') +second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64') +third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64') +forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64') +next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') -embed_first = layers.embedding( +embed_first = fluid.layers.embedding( input=first_word, size=[dict_size, EMBED_SIZE], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE, param_attr={'name': 'shared_w'}) -embed_second = layers.embedding( +embed_second = fluid.layers.embedding( input=second_word, size=[dict_size, EMBED_SIZE], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE, param_attr={'name': 'shared_w'}) -embed_third = layers.embedding( +embed_third = fluid.layers.embedding( input=third_word, size=[dict_size, EMBED_SIZE], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE, param_attr={'name': 'shared_w'}) -embed_forth = layers.embedding( +embed_forth = fluid.layers.embedding( input=forth_word, size=[dict_size, EMBED_SIZE], - data_type='float32', + dtype='float32', is_sparse=IS_SPARSE, param_attr={'name': 'shared_w'}) -concat_embed = layers.concat( +concat_embed = fluid.layers.concat( input=[embed_first, embed_second, embed_third, embed_forth], axis=1) -hidden1 = layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid') -predict_word = layers.fc(input=hidden1, size=dict_size, act='softmax') -cost = layers.cross_entropy(input=predict_word, label=next_word) -avg_cost = layers.mean(x=cost) -sgd_optimizer = SGDOptimizer(learning_rate=0.001) -opts = sgd_optimizer.minimize(avg_cost) +hidden1 = fluid.layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid') +predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax') +cost = fluid.layers.cross_entropy(input=predict_word, label=next_word) +avg_cost = fluid.layers.mean(x=cost) +sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) +sgd_optimizer.minimize(avg_cost) train_reader = paddle.batch( paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) -place = core.CPUPlace() -exe = Executor(place) +place = fluid.CPUPlace() +exe = fluid.Executor(place) # fix https://github.com/PaddlePaddle/Paddle/issues/5434 then remove # below exit line. exit(0) -exe.run(framework.default_startup_program()) +exe.run(fluid.default_startup_program()) for pass_id in range(PASS_NUM): for data in train_reader(): @@ -74,36 +70,15 @@ for pass_id in range(PASS_NUM): input_data = map(lambda x: np.array(x).astype("int64"), input_data) input_data = map(lambda x: np.expand_dims(x, axis=1), input_data) - first_data = input_data[0] - first_tensor = core.LoDTensor() - first_tensor.set(first_data, place) - - second_data = input_data[1] - second_tensor = core.LoDTensor() - second_tensor.set(second_data, place) - - third_data = input_data[2] - third_tensor = core.LoDTensor() - third_tensor.set(third_data, place) - - forth_data = input_data[3] - forth_tensor = core.LoDTensor() - forth_tensor.set(forth_data, place) - - next_data = input_data[4] - next_tensor = core.LoDTensor() - next_tensor.set(next_data, place) - - outs = exe.run(framework.default_main_program(), - feed={ - 'firstw': first_tensor, - 'secondw': second_tensor, - 'thirdw': third_tensor, - 'forthw': forth_tensor, - 'nextw': next_tensor - }, - fetch_list=[avg_cost]) - out = np.array(outs[0]) - if out[0] < 10.0: + avg_cost_np = exe.run(fluid.default_main_program(), + feed={ + 'firstw': input_data[0], + 'secondw': input_data[1], + 'thirdw': input_data[2], + 'forthw': input_data[3], + 'nextw': input_data[4] + }, + fetch_list=[avg_cost]) + if avg_cost_np[0] < 10.0: exit(0) # if avg cost less than 10.0, we think our code is good. exit(1) diff --git a/python/paddle/v2/fluid/tests/op_test.py b/python/paddle/v2/fluid/tests/op_test.py index 90269e308a31d2606b23d741ce0d0fa91a0a6aeb..e83c4a0622013cbfebdf39434ef252412697acb1 100644 --- a/python/paddle/v2/fluid/tests/op_test.py +++ b/python/paddle/v2/fluid/tests/op_test.py @@ -261,7 +261,10 @@ class OpTest(unittest.TestCase): feed_map = self.feed_var(inputs, place) exe = Executor(place) - outs = exe.run(program, feed=feed_map, fetch_list=fetch_list) + outs = exe.run(program, + feed=feed_map, + fetch_list=fetch_list, + return_numpy=False) for out_name, out_dup in Operator.get_op_outputs(self.op_type): if out_name not in self.outputs: @@ -458,7 +461,7 @@ class OpTest(unittest.TestCase): mean_inputs = map(block.var, output_names) if len(mean_inputs) == 1: - loss = block.create_var(dtype=mean_inputs[0].data_type, shape=[1]) + loss = block.create_var(dtype=mean_inputs[0].dtype, shape=[1]) op = block.append_op( inputs={"X": mean_inputs}, outputs={"Out": loss}, type='mean') op.desc.infer_var_type(block.desc) @@ -466,8 +469,7 @@ class OpTest(unittest.TestCase): else: avg_sum = [] for cur_loss in mean_inputs: - cur_avg_loss = block.create_var( - dtype=cur_loss.data_type, shape=[1]) + cur_avg_loss = block.create_var(dtype=cur_loss.dtype, shape=[1]) op = block.append_op( inputs={"X": [cur_loss]}, outputs={"Out": [cur_avg_loss]}, @@ -476,13 +478,13 @@ class OpTest(unittest.TestCase): op.desc.infer_shape(block.desc) avg_sum.append(cur_avg_loss) - loss_sum = block.create_var(dtype=avg_sum[0].data_type, shape=[1]) + loss_sum = block.create_var(dtype=avg_sum[0].dtype, shape=[1]) op_sum = block.append_op( inputs={"X": avg_sum}, outputs={"Out": loss_sum}, type='sum') op_sum.desc.infer_var_type(block.desc) op_sum.desc.infer_shape(block.desc) - loss = block.create_var(dtype=loss_sum.data_type, shape=[1]) + loss = block.create_var(dtype=loss_sum.dtype, shape=[1]) op_loss = block.append_op( inputs={"X": loss_sum}, outputs={"Out": loss}, @@ -501,5 +503,6 @@ class OpTest(unittest.TestCase): fetch_list = [g for p, g in param_grad_list] executor = Executor(place) - result = executor.run(prog, feed_dict, fetch_list) - return map(np.array, result) + return map( + np.array, + executor.run(prog, feed_dict, fetch_list, return_numpy=False)) diff --git a/python/paddle/v2/fluid/tests/test_activation_op.py b/python/paddle/v2/fluid/tests/test_activation_op.py index 7649e60a3833e34523d87cb963af3888c3cef65d..bd52bef2605874d26e880fb09e589891fc1934d5 100644 --- a/python/paddle/v2/fluid/tests/test_activation_op.py +++ b/python/paddle/v2/fluid/tests/test_activation_op.py @@ -152,6 +152,49 @@ class TestAbs(OpTest): self.check_grad(['X'], 'Y', max_relative_error=0.007) +class TestCeil(OpTest): + def setUp(self): + self.op_type = "ceil" + x = np.random.uniform(-1, 1, [4, 4]).astype("float32") + self.inputs = {'X': x} + self.outputs = {'Y': np.ceil(self.inputs['X'])} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y', max_relative_error=0.007) + + +class TestFloor(OpTest): + def setUp(self): + self.op_type = "floor" + x = np.random.uniform(-1, 1, [4, 4]).astype("float32") + self.inputs = {'X': x} + # numpy floor need +1 + self.outputs = {'Y': np.floor(self.inputs['X']) + 1.0} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y', max_relative_error=0.007) + + +class TestRound(OpTest): + def setUp(self): + self.op_type = "round" + x = np.random.uniform(-1, 1, [4, 4]).astype("float32") + self.inputs = {'X': x} + self.outputs = {'Y': np.round(self.inputs['X'])} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y', max_relative_error=0.007) + + class TestRelu(OpTest): def setUp(self): self.op_type = "relu" diff --git a/python/paddle/v2/fluid/tests/test_array_read_write_op.py b/python/paddle/v2/fluid/tests/test_array_read_write_op.py index e019a4e15f0e25deaedf30911b44e576c8f89013..b7790b01062d480cbd6c9e1a626d318385b4f61e 100644 --- a/python/paddle/v2/fluid/tests/test_array_read_write_op.py +++ b/python/paddle/v2/fluid/tests/test_array_read_write_op.py @@ -52,15 +52,13 @@ class TestArrayReadWrite(unittest.TestCase): exe = Executor(cpu) - tensor = core.LoDTensor() - tensor.set(numpy.random.random(size=(100, 100)).astype('float32'), cpu) - - outs = map(numpy.array, - exe.run(feed={'x0': tensor, - 'x1': tensor, - 'x2': tensor}, - fetch_list=[a_sum, x_sum], - scope=scope)) + tensor = numpy.random.random(size=(100, 100)).astype('float32') + + outs = exe.run(feed={'x0': tensor, + 'x1': tensor, + 'x2': tensor}, + fetch_list=[a_sum, x_sum], + scope=scope) self.assertEqual(outs[0], outs[1]) total_sum = layers.sums(input=[a_sum, x_sum]) @@ -72,12 +70,11 @@ class TestArrayReadWrite(unittest.TestCase): [each_x.name + "@GRAD" for each_x in x]) g_out = [ item.sum() - for item in map( - numpy.array, - exe.run(feed={'x0': tensor, - 'x1': tensor, - 'x2': tensor}, - fetch_list=g_vars)) + for item in exe.run( + feed={'x0': tensor, + 'x1': tensor, + 'x2': tensor}, + fetch_list=g_vars) ] g_out_sum = numpy.array(g_out).sum() diff --git a/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py b/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py index 8a11820d2aba2dd4d17d925f0e0fe9f324100418..5fad7d8cce5af3677aa77dc0abb64f1ecd380419 100644 --- a/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py +++ b/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py @@ -35,15 +35,15 @@ class TestBeamSearchDecodeOp(unittest.TestCase): self.append_lod_tensor( scores, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]], np.array( - [1, 2, 3, 4, 5, 6], dtype="float32")) + [1, 2, 3, 4, 5, 6], dtype="float64")) self.append_lod_tensor( scores, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]], np.array( - [0, 1, 2, 3, 4, 5], dtype="float32")) + [0, 1, 2, 3, 4, 5], dtype="float64")) self.append_lod_tensor( scores, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]], np.array( - [0, 1, 2, 3, 4], dtype="float32")) + [0, 1, 2, 3, 4], dtype="float64")) sentence_ids = self.scope.var("sentence_ids").get_tensor() sentence_scores = self.scope.var("sentence_scores").get_tensor() diff --git a/python/paddle/v2/fluid/tests/test_cast_op.py b/python/paddle/v2/fluid/tests/test_cast_op.py index 0c4b6310652e84d3dd7f281a8b98ae0435072afb..4e431bb88da6070718d64a68467be20ca87f8fb9 100644 --- a/python/paddle/v2/fluid/tests/test_cast_op.py +++ b/python/paddle/v2/fluid/tests/test_cast_op.py @@ -10,8 +10,8 @@ class TestCastOp(op_test.OpTest): self.inputs = {'X': ipt.astype('float32')} self.outputs = {'Out': ipt.astype('float64')} self.attrs = { - 'in_data_type': int(core.DataType.FP32), - 'out_data_type': int(core.DataType.FP64) + 'in_dtype': int(core.DataType.FP32), + 'out_dtype': int(core.DataType.FP64) } self.op_type = 'cast' diff --git a/python/paddle/v2/fluid/tests/test_conditional_block.py b/python/paddle/v2/fluid/tests/test_conditional_block.py index 293803f004a1513611fba30634d5552e1da84fef..d953ee7ddc37d150d87cbd680379410a4d16f6b1 100644 --- a/python/paddle/v2/fluid/tests/test_conditional_block.py +++ b/python/paddle/v2/fluid/tests/test_conditional_block.py @@ -9,7 +9,7 @@ import numpy class ConditionalBlock(unittest.TestCase): def test_forward(self): - data = layers.data(name='X', shape=[1], data_type='float32') + data = layers.data(name='X', shape=[1], dtype='float32') data.stop_gradient = False cond = layers.ConditionalBlock(inputs=[data]) out = layers.create_tensor(dtype='float32') @@ -21,18 +21,15 @@ class ConditionalBlock(unittest.TestCase): exe = Executor(cpu) exe.run(g_startup_program) - x = core.LoDTensor() - x.set(numpy.random.random(size=(10, 1)).astype('float32'), cpu) + x = numpy.random.random(size=(10, 1)).astype('float32') - outs = map(numpy.array, exe.run(feed={'X': x}, fetch_list=[out]))[0] + outs = exe.run(feed={'X': x}, fetch_list=[out])[0] print outs loss = layers.mean(x=out) append_backward_ops(loss=loss) - outs = map(numpy.array, - exe.run(feed={'X': x}, - fetch_list=[ - g_main_program.block(0).var(data.name + "@GRAD") - ]))[0] + outs = exe.run( + feed={'X': x}, + fetch_list=[g_main_program.block(0).var(data.name + "@GRAD")])[0] print outs diff --git a/python/paddle/v2/fluid/tests/test_conv2d_op.py b/python/paddle/v2/fluid/tests/test_conv2d_op.py index 2240dc73cdd31f320fed174dd811e93c6640137f..e82e3ab0c9c0bc75a13a8948fda925bc4f0b6512 100644 --- a/python/paddle/v2/fluid/tests/test_conv2d_op.py +++ b/python/paddle/v2/fluid/tests/test_conv2d_op.py @@ -16,8 +16,8 @@ def conv2d_forward_naive(input, filter, group, conv_param): out_w = 1 + (in_w + 2 * pad[1] - (dilation[1] * (f_w - 1) + 1)) / stride[1] out = np.zeros((in_n, out_c, out_h, out_w)) - d_bolck_w = (dilation[0] * (f_h - 1) + 1) - d_bolck_h = (dilation[1] * (f_w - 1) + 1) + d_bolck_h = (dilation[0] * (f_h - 1) + 1) + d_bolck_w = (dilation[1] * (f_w - 1) + 1) input_pad = np.pad(input, ((0, ), (0, ), (pad[0], ), (pad[1], )), mode='constant', @@ -167,27 +167,27 @@ class TestWithDilation(TestConv2dOp): #----------------Conv2dCudnn---------------- class TestCudnn(TestConv2dOp): def init_op_type(self): - self.op_type = "conv_cudnn" + self.op_type = "conv2d_cudnn" class TestCudnnWithPad(TestWithPad): def init_op_type(self): - self.op_type = "conv_cudnn" + self.op_type = "conv2d_cudnn" class TestCudnnWithStride(TestWithStride): def init_op_type(self): - self.op_type = "conv_cudnn" + self.op_type = "conv2d_cudnn" class TestCudnnWithGroup(TestWithGroup): def init_op_type(self): - self.op_type = "conv_cudnn" + self.op_type = "conv2d_cudnn" class TestCudnnWith1x1(TestWith1x1): def init_op_type(self): - self.op_type = "conv_cudnn" + self.op_type = "conv2d_cudnn" # cudnn v5 does not support dilation conv. diff --git a/python/paddle/v2/fluid/tests/test_conv3d_op.py b/python/paddle/v2/fluid/tests/test_conv3d_op.py index 934ea46437d67b78309a86a2779e0c6577399136..8593dff20b5c283d5862206dfb0c0d2501039d07 100644 --- a/python/paddle/v2/fluid/tests/test_conv3d_op.py +++ b/python/paddle/v2/fluid/tests/test_conv3d_op.py @@ -169,5 +169,31 @@ class TestWithDilation(TestConv3dOp): self.groups = 3 +class TestCudnn(TestConv3dOp): + def init_op_type(self): + self.op_type = "conv3d_cudnn" + + +class TestWithGroup1Cudnn(TestWithGroup1): + def init_op_type(self): + self.op_type = "conv3d_cudnn" + + +class TestWithGroup2Cudnn(TestWithGroup2): + def init_op_type(self): + self.op_type = "conv3d_cudnn" + + +class TestWith1x1Cudnn(TestWith1x1): + def init_op_type(self): + self.op_type = "conv3d_cudnn" + + +# FIXME(typhoonzero): find a way to determine if +# using cudnn > 6 in python +# class TestWithDilationCudnn(TestWithDilation): +# def init_op_type(self): +# self.op_type = "conv3d_cudnn" + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_dropout_op.py b/python/paddle/v2/fluid/tests/test_dropout_op.py index b14a366fcad7f4bf6968b6013c6cfbb57090071d..4f5ea836b44102e5599a2302efd669291ebe920b 100644 --- a/python/paddle/v2/fluid/tests/test_dropout_op.py +++ b/python/paddle/v2/fluid/tests/test_dropout_op.py @@ -7,7 +7,7 @@ class TestDropoutOp(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 0.0, 'is_training': True} + self.attrs = {'dropout_prob': 0.0, 'is_test': False} self.outputs = { 'Out': self.inputs['X'], 'Mask': np.ones((32, 64)).astype('float32') @@ -24,7 +24,7 @@ class TestDropoutOp2(TestDropoutOp): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 1.0, 'is_training': True} + self.attrs = {'dropout_prob': 1.0, 'is_test': False} self.outputs = { 'Out': np.zeros((32, 64)).astype('float32'), 'Mask': np.zeros((32, 64)).astype('float32') @@ -35,7 +35,7 @@ class TestDropoutOp3(TestDropoutOp): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64, 2)).astype("float32")} - self.attrs = {'dropout_prob': 0.0, 'is_training': True} + self.attrs = {'dropout_prob': 0.0, 'is_test': False} self.outputs = { 'Out': self.inputs['X'], 'Mask': np.ones((32, 64, 2)).astype('float32') @@ -46,7 +46,7 @@ class TestDropoutOp4(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 0.35, 'is_training': False} + self.attrs = {'dropout_prob': 0.35, 'is_test': True} self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']} def test_check_output(self): @@ -57,7 +57,7 @@ class TestDropoutOp5(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64, 3)).astype("float32")} - self.attrs = {'dropout_prob': 0.75, 'is_training': False} + self.attrs = {'dropout_prob': 0.75, 'is_test': True} self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']} def test_check_output(self): diff --git a/python/paddle/v2/fluid/tests/test_dynamic_recurrent_op.py b/python/paddle/v2/fluid/tests/test_dynamic_recurrent_op.py deleted file mode 100644 index c2d8b48ea944ae40a451492b8e9fad38dda0835c..0000000000000000000000000000000000000000 --- a/python/paddle/v2/fluid/tests/test_dynamic_recurrent_op.py +++ /dev/null @@ -1,171 +0,0 @@ -import logging -import paddle.v2.fluid.core as core -import unittest -from paddle.v2.fluid.op import Operator, DynamicRecurrentOp -import numpy as np - -# for siplicity, just one level LoD -lod_py = [[0, 4, 7, 9, 10]] -input_dim = 30 -num_sents = len(lod_py[0]) - 1 -weight_dim = 15 - - -def create_tensor(scope, name, shape, np_data): - tensor = scope.var(name).get_tensor() - tensor.set_dims(shape) - tensor.set(np_data, core.CPUPlace()) - return tensor - - -class PyRNNStep(object): - def __init__(self): - - self.x = np.random.normal(size=(lod_py[0][-1], - input_dim)).astype("float32") - self.W = np.random.normal(size=(input_dim, input_dim)).astype("float32") - self.U = np.random.normal(size=(input_dim, input_dim)).astype("float32") - self.h_boot = np.random.normal(size=(num_sents, - input_dim)).astype("float32") - - -class DynamicRecurrentOpTest(unittest.TestCase): - ''' - Test RNNOp - - equation: - h_t = \sigma (W x_t + U h_{t-1}) - weights: - - W - - U - vars: - - x - states: - - h - outputs: - - h - ''' - - py = PyRNNStep() - - def forward(self): - self.scope = core.Scope() - self.create_global_variables() - self.create_rnn_op() - self.create_step_net() - ctx = core.DeviceContext.create(core.CPUPlace()) - self.rnnop.run(self.scope, ctx) - state = self.rnnop.get_state("h@state") - print 'state size: ', state.size() - - step_inputs = self.rnnop.get_step_input("x") - print "x size ", step_inputs.size() - for i in range(step_inputs.size()): - print "x %d" % i, np.array(step_inputs.read(i).get_dims()) - step_outputs = self.rnnop.get_step_output('h@state') - print 'step_outputs.size ', step_outputs.size() - output = self.scope.find_var("h@state").get_tensor() - print 'output', np.array(output).shape - - def create_global_variables(self): - # create inlink - x_tensor = create_tensor(self.scope, "x", [num_sents, input_dim], - self.py.x) - x_tensor.set_lod(lod_py) - create_tensor(self.scope, "W", [input_dim, input_dim], self.py.W) - create_tensor(self.scope, "U", [input_dim, input_dim], self.py.U) - create_tensor(self.scope, "h_boot", [num_sents, input_dim], - self.py.h_boot) - self.scope.var("step_scopes") - self.scope.var("h@state") - - def create_rnn_op(self): - # create RNNOp - self.rnnop = DynamicRecurrentOp( - # inputs - inputs=["x"], - initial_states=["h_boot"], - step_net="step_unit", - # outputs - outputs=["h@state"], - step_scopes="step_scopes", - # attributes - ex_states=["h@pre"], - states=["h@state"]) - - def create_step_net(self): - step_unit = core.Net.create() - x_fc_op = Operator("mul", X="x", Y="W", Out="Wx") - h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") - sum_op = Operator("sum", X=["Wx", "Uh"], Out="sum") - sig_op = Operator("sigmoid", X="sum", Y="h@state") - - for op in [x_fc_op, h_fc_op, sum_op, sig_op]: - step_unit.append_op(op) - step_unit.complete_add_op(True) - self.rnnop.set_step_unit(step_unit) - - def test_forward(self): - print 'test recurrent op forward' - pd_output = self.forward() - print 'pd_output', pd_output - - -class RecurrentGradientOpTest(unittest.TestCase): - py = PyRNNStep() - - def create_forward_op(self): - # create RNNOp - self.forward_op = DynamicRecurrentOp( - # inputs - inputs=["x"], - initial_states=["h_boot"], - step_net="step_unit", - # outputs - outputs=["h@state"], - step_scopes="step_scopes", - # attributes - ex_states=["h@pre"], - states=["h@state"]) - - def create_gradient_op(self): - a = set() - backward_op = core.DynamicRecurrentOp.backward(self.forward_op, a) - - def create_step_net(self): - step_unit = core.Net.create() - x_fc_op = Operator("mul", X="x", Y="W", Out="Wx") - h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") - sum_op = Operator("sum", X=["Wx", "Uh"], Out="sum") - sig_op = Operator("sigmoid", X="sum", Y="h@state") - - for op in [x_fc_op, h_fc_op, sum_op, sig_op]: - step_unit.append_op(op) - step_unit.complete_add_op(True) - self.forward_op.set_step_unit(step_unit) - - def create_global_variables(self): - # create inlink - x_tensor = create_tensor(self.scope, "x", [num_sents, input_dim], - self.py.x) - x_tensor.set_lod(lod_py) - create_tensor(self.scope, "W", [input_dim, input_dim], self.py.W) - create_tensor(self.scope, "U", [input_dim, input_dim], self.py.U) - create_tensor(self.scope, "h_boot", [num_sents, input_dim], - self.py.h_boot) - self.scope.var("step_scopes") - self.scope.var("h@state") - - def test_grad(self): - self.scope = core.Scope() - self.create_forward_op() - self.create_global_variables() - self.create_step_net() - self.create_gradient_op() - - -if __name__ == '__main__': - exit( - 0 - ) # FIXME(qijun): https://github.com/PaddlePaddle/Paddle/issues/5101#issuecomment-339814957 - unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_executor_and_mul.py b/python/paddle/v2/fluid/tests/test_executor_and_mul.py index 709250d0c86dde84ac22c37d8e2385ca4a80a40a..558273e30dff7fb74f78751f4fe569f79a453d0d 100644 --- a/python/paddle/v2/fluid/tests/test_executor_and_mul.py +++ b/python/paddle/v2/fluid/tests/test_executor_and_mul.py @@ -1,5 +1,5 @@ import unittest -from paddle.v2.fluid.layers import mul, data +from paddle.v2.fluid.layers import mul, data, sequence_pool import paddle.v2.fluid.core as core from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.framework import g_main_program @@ -8,26 +8,22 @@ import numpy class TestExecutor(unittest.TestCase): def test_mul(self): - a = data(name='a', shape=[784], data_type='float32') + a = data(name='a', shape=[784], dtype='float32') b = data( name='b', shape=[784, 100], - data_type='float32', + dtype='float32', append_batch_size=False) out = mul(x=a, y=b) place = core.CPUPlace() a_np = numpy.random.random((100, 784)).astype('float32') - tensor_a = core.LoDTensor() - tensor_a.set(a_np, place) b_np = numpy.random.random((784, 100)).astype('float32') - tensor_b = core.LoDTensor() - tensor_b.set(b_np, place) exe = Executor(place) outs = exe.run(g_main_program, - feed={'a': tensor_a, - 'b': tensor_b}, + feed={'a': a_np, + 'b': b_np}, fetch_list=[out]) - out = numpy.array(outs[0]) + out = outs[0] self.assertEqual((100, 100), out.shape) self.assertTrue(numpy.allclose(out, numpy.dot(a_np, b_np))) diff --git a/python/paddle/v2/fluid/tests/test_ftrl_op.py b/python/paddle/v2/fluid/tests/test_ftrl_op.py new file mode 100644 index 0000000000000000000000000000000000000000..f77ac4659a9b877829f7ae52dd005d9dd11dac07 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_ftrl_op.py @@ -0,0 +1,62 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestFTRLOp(OpTest): + def setUp(self): + self.op_type = "ftrl" + w = np.random.random((102, 105)).astype("float32") + g = np.random.random((102, 105)).astype("float32") + sq_accum = np.full((102, 105), 0.1).astype("float32") + linear_accum = np.full((102, 105), 0.1).astype("float32") + lr = np.array([0.01]).astype("float32") + l1 = 0.1 + l2 = 0.2 + lr_power = -0.5 + + self.inputs = { + 'Param': w, + 'SquaredAccumulator': sq_accum, + 'LinearAccumulator': linear_accum, + 'Grad': g, + 'LearningRate': lr + } + self.attrs = { + 'l1': l1, + 'l2': l2, + 'lr_power': lr_power, + 'learning_rate': lr + } + new_accum = sq_accum + g * g + if lr_power == -0.5: + linear_out = linear_accum + g - ( + (np.sqrt(new_accum) - np.sqrt(sq_accum)) / lr) * w + else: + linear_out = linear_accum + g - ((np.power( + new_accum, -lr_power) - np.power(sq_accum, -lr_power)) / lr) * w + + x = (l1 * np.sign(linear_out) - linear_out) + if lr_power == -0.5: + y = (np.sqrt(new_accum) / lr) + (2 * l2) + pre_shrink = x / y + param_out = np.where(np.abs(linear_out) > l1, pre_shrink, 0.0) + else: + y = (np.power(new_accum, -lr_power) / lr) + (2 * l2) + pre_shrink = x / y + param_out = np.where(np.abs(linear_out) > l1, pre_shrink, 0.0) + + sq_accum_out = sq_accum + g * g + + self.outputs = { + 'ParamOut': param_out, + 'SquaredAccumOut': sq_accum_out, + 'LinearAccumOut': linear_out + } + + def test_check_output(self): + self.check_output() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_gru_unit_op.py b/python/paddle/v2/fluid/tests/test_gru_unit_op.py index f356f6e9ec0da2d3e1fb67638d81e8d54c544f53..501d5aa5797d6def708338692f0861657f951ef7 100644 --- a/python/paddle/v2/fluid/tests/test_gru_unit_op.py +++ b/python/paddle/v2/fluid/tests/test_gru_unit_op.py @@ -28,8 +28,8 @@ def relu(x): class TestGRUUnitOp(OpTest): - batch_size = 3 - frame_size = 5 + batch_size = 5 + frame_size = 10 activate = { GRUActivationType.identity: identity, GRUActivationType.sigmoid: sigmoid, @@ -77,7 +77,7 @@ class TestGRUUnitOp(OpTest): c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) + g[:, frame_size * 2:]) g = np.hstack((u_r, c)) - h = u * h_p + (1 - u) * c + h = u * c + (1 - u) * h_p self.outputs = { 'Gate': g.astype('float64'), 'ResetHiddenPrev': r_h_p.astype('float64'), @@ -92,10 +92,7 @@ class TestGRUUnitOp(OpTest): self.check_output() def test_check_grad(self): - self.check_grad( - ['Input', 'HiddenPrev', 'Weight'], - ['Hidden', 'ResetHiddenPrev', 'Gate'], - max_relative_error=0.007) + self.check_grad(['Input', 'HiddenPrev', 'Weight'], ['Hidden']) class TestGRUUnitOpWithBias(TestGRUUnitOp): @@ -104,18 +101,20 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp): frame_size = self.frame_size super(TestGRUUnitOpWithBias, self).set_inputs() self.inputs['Bias'] = np.random.uniform( - -0.1, 0.1, (1, frame_size * 3)).astype('float32') + -0.1, 0.1, (1, frame_size * 3)).astype('float64') self.attrs = { 'activation': GRUActivationType.identity, 'gate_activation': GRUActivationType.sigmoid } def test_check_grad(self): + self.check_grad(['Input', 'HiddenPrev', 'Weight', 'Bias'], ['Hidden']) + + def test_check_grad_ingore_input(self): self.check_grad( - ['Input', 'HiddenPrev', 'Weight', 'Bias'], ['Hidden'], - max_relative_error=0.007) + ['HiddenPrev', 'Weight', 'Bias'], ['Hidden'], + no_grad_set=set('Input')) if __name__ == '__main__': - exit(0) # FIXME(yuyang18): This unittest is not pass. Fix it later unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_image_classification_layer.py b/python/paddle/v2/fluid/tests/test_image_classification_layer.py index bf5444107fa1609e67b09823b82e5fb92234b0a4..8e8e1b0a8c07a60cb1404462f976d10fe26e87f6 100644 --- a/python/paddle/v2/fluid/tests/test_image_classification_layer.py +++ b/python/paddle/v2/fluid/tests/test_image_classification_layer.py @@ -32,7 +32,7 @@ class TestLayer(unittest.TestCase): images = layers.data( name='pixel', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program) layers.batch_norm( input=images, @@ -47,7 +47,7 @@ class TestLayer(unittest.TestCase): images = layers.data( name='pixel', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program) layers.dropout( x=images, @@ -64,7 +64,7 @@ class TestLayer(unittest.TestCase): images = layers.data( name='pixel', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program, startup_program=startup_program) conv1 = conv_block(images, 64, 2, [0.3, 0], main_program, @@ -80,13 +80,13 @@ class TestLayer(unittest.TestCase): image1 = layers.data( name='pixel1', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program, startup_program=startup_program) image2 = layers.data( name='pixel2', shape=[3, 48, 48], - data_type='float32', + dtype='float32', main_program=main_program, startup_program=startup_program) out = layers.elementwise_add( diff --git a/python/paddle/v2/fluid/tests/test_inference_model_io.py b/python/paddle/v2/fluid/tests/test_inference_model_io.py index 98b95713b73e8eba93bd6a58eaaed603cfae7952..60aed62ead83dedbeb9438c431ec292558d88ce5 100644 --- a/python/paddle/v2/fluid/tests/test_inference_model_io.py +++ b/python/paddle/v2/fluid/tests/test_inference_model_io.py @@ -1,13 +1,13 @@ -import paddle.v2 as paddle -import paddle.v2.fluid.layers as layers +import unittest + +import numpy as np import paddle.v2.fluid.core as core -import paddle.v2.fluid.optimizer as optimizer +import paddle.v2.fluid.executor as executor +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.optimizer as optimizer from paddle.v2.fluid.framework import Program from paddle.v2.fluid.io import save_inference_model, load_inference_model -import paddle.v2.fluid.executor as executor -import unittest -import numpy as np class TestBook(unittest.TestCase): @@ -19,13 +19,13 @@ class TestBook(unittest.TestCase): x = layers.data( name='x', shape=[2], - data_type='float32', + dtype='float32', main_program=program, startup_program=init_program) y = layers.data( name='y', shape=[1], - data_type='float32', + dtype='float32', main_program=program, startup_program=init_program) @@ -44,7 +44,7 @@ class TestBook(unittest.TestCase): x=cost, main_program=program, startup_program=init_program) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) - opts = sgd_optimizer.minimize(avg_cost, init_program) + sgd_optimizer.minimize(avg_cost, init_program) place = core.CPUPlace() exe = executor.Executor(place) @@ -52,25 +52,20 @@ class TestBook(unittest.TestCase): exe.run(init_program, feed={}, fetch_list=[]) for i in xrange(100): - x_data = np.array( + tensor_x = np.array( [[1, 1], [1, 2], [3, 4], [5, 2]]).astype("float32") - y_data = np.array([[-2], [-3], [-7], [-7]]).astype("float32") + tensor_y = np.array([[-2], [-3], [-7], [-7]]).astype("float32") - tensor_x = core.LoDTensor() - tensor_x.set(x_data, place) - tensor_y = core.LoDTensor() - tensor_y.set(y_data, place) exe.run(program, feed={'x': tensor_x, 'y': tensor_y}, fetch_list=[avg_cost]) save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program) - outs = exe.run(program, - feed={'x': tensor_x, - 'y': tensor_y}, - fetch_list=[avg_cost]) - expected = np.array(outs[0]) + expected = exe.run(program, + feed={'x': tensor_x, + 'y': tensor_y}, + fetch_list=[avg_cost])[0] reload(executor) # reload to build a new scope exe = executor.Executor(place) @@ -83,7 +78,7 @@ class TestBook(unittest.TestCase): feed={feed_var_names[0]: tensor_x, feed_var_names[1]: tensor_y}, fetch_list=fetch_vars) - actual = np.array(outs[0]) + actual = outs[0] self.assertEqual(feed_var_names, ["x", "y"]) self.assertEqual(len(fetch_vars), 1) diff --git a/python/paddle/v2/fluid/tests/test_initializer.py b/python/paddle/v2/fluid/tests/test_initializer.py index f2eb79b209627f5814847db6d96c0a17300d9b5a..6c20203f8eca02b3f68ed2aa8664bed29551c070 100644 --- a/python/paddle/v2/fluid/tests/test_initializer.py +++ b/python/paddle/v2/fluid/tests/test_initializer.py @@ -223,5 +223,109 @@ class TestXavierInitializer(unittest.TestCase): self.assertEqual(init_op.attr('seed'), 134) +class TestMSRAInitializer(unittest.TestCase): + def test_uniform_msra_initializer(self): + """Test MSRA initializer with uniform distribution on + for matrix multiply. + """ + program = framework.Program() + block = program.global_block() + param = block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer()) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'uniform_random') + limit = np.sqrt(6.0 / param.shape[0]) + self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) + self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + def test_uniform_msra_initializer_conv(self): + """Test MSRA initializer with uniform distribution on + for convolutions. + """ + program = framework.Program() + block = program.global_block() + param = block.create_parameter( + dtype="float32", + shape=[5, 10, 15, 20], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer()) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'uniform_random') + receptive_field_size = float(15 * 20) + limit = np.sqrt(6.0 / (param.shape[1] * receptive_field_size)) + self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) + self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + def test_normal_msra_initializer(self): + """Test MSRA initializer with normal distribution on + for matrix multiply. + """ + program = framework.Program() + block = program.global_block() + param = block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer(uniform=False)) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'gaussian_random') + std = np.sqrt(2.0 / param.shape[0]) + self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA) + self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + def test_normal_msra_initializer_conv(self): + """Test MSRA initializer with normal distribution on + for convolutions. + """ + program = framework.Program() + block = program.global_block() + param = block.create_parameter( + dtype="float32", + shape=[5, 10, 15, 20], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer(uniform=False)) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'gaussian_random') + receptive_field_size = float(15 * 20) + std = np.sqrt(2.0 / (param.shape[1] * receptive_field_size)) + self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA) + self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + def test_msra_initializer_supplied_arguments(self): + """Test the MSRA initializer with supplied arguments + """ + program = framework.Program() + block = program.global_block() + block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer( + fan_in=12, seed=134)) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'uniform_random') + limit = np.sqrt(6.0 / 12) + self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) + self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 134) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index 3d18e7ce3a4dc6c6b917a1000de39fca71f6ac18..87dc6d1a6270e0f8425b56601d04049450c73380 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -1,25 +1,26 @@ +import unittest + import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets from paddle.v2.fluid.framework import Program -import paddle.v2.fluid.core as core -import unittest class TestBook(unittest.TestCase): def test_fit_a_line(self): program = Program() x = layers.data( - name='x', shape=[13], data_type='float32', main_program=program) + name='x', shape=[13], dtype='float32', main_program=program) y_predict = layers.fc(input=x, size=1, act=None, main_program=program) y = layers.data( - name='y', shape=[1], data_type='float32', main_program=program) + name='y', shape=[1], dtype='float32', main_program=program) cost = layers.square_error_cost( input=y_predict, label=y, main_program=program) avg_cost = layers.mean(x=cost, main_program=program) self.assertIsNotNone(avg_cost) program.append_backward(avg_cost) + print str(program) def test_recognize_digits_mlp(self): @@ -27,12 +28,9 @@ class TestBook(unittest.TestCase): # Change g_program, so the rest layers use `g_program` images = layers.data( - name='pixel', - shape=[784], - data_type='float32', - main_program=program) + name='pixel', shape=[784], dtype='float32', main_program=program) label = layers.data( - name='label', shape=[1], data_type='int32', main_program=program) + name='label', shape=[1], dtype='int32', main_program=program) hidden1 = layers.fc(input=images, size=128, act='relu', @@ -49,6 +47,7 @@ class TestBook(unittest.TestCase): input=predict, label=label, main_program=program) avg_cost = layers.mean(x=cost, main_program=program) self.assertIsNotNone(avg_cost) + print str(program) def test_simple_conv2d(self): @@ -56,7 +55,7 @@ class TestBook(unittest.TestCase): images = layers.data( name='pixel', shape=[3, 48, 48], - data_type='int32', + dtype='int32', main_program=program) layers.conv2d( input=images, @@ -72,10 +71,10 @@ class TestBook(unittest.TestCase): images = layers.data( name='pixel', shape=[1, 28, 28], - data_type='float32', + dtype='float32', main_program=program) label = layers.data( - name='label', shape=[1], data_type='int32', main_program=program) + name='label', shape=[1], dtype='int32', main_program=program) conv_pool_1 = nets.simple_img_conv_pool( input=images, filter_size=5, @@ -110,39 +109,39 @@ class TestBook(unittest.TestCase): dict_size = 10000 embed_size = 32 first_word = layers.data( - name='firstw', shape=[1], data_type='int64', main_program=program) + name='firstw', shape=[1], dtype='int64', main_program=program) second_word = layers.data( - name='secondw', shape=[1], data_type='int64', main_program=program) + name='secondw', shape=[1], dtype='int64', main_program=program) third_word = layers.data( - name='thirdw', shape=[1], data_type='int64', main_program=program) + name='thirdw', shape=[1], dtype='int64', main_program=program) forth_word = layers.data( - name='forthw', shape=[1], data_type='int64', main_program=program) + name='forthw', shape=[1], dtype='int64', main_program=program) next_word = layers.data( - name='nextw', shape=[1], data_type='int64', main_program=program) + name='nextw', shape=[1], dtype='int64', main_program=program) embed_first = layers.embedding( input=first_word, size=[dict_size, embed_size], - data_type='float32', + dtype='float32', param_attr={'name': 'shared_w'}, main_program=program) embed_second = layers.embedding( input=second_word, size=[dict_size, embed_size], - data_type='float32', + dtype='float32', param_attr={'name': 'shared_w'}, main_program=program) embed_third = layers.embedding( input=third_word, size=[dict_size, embed_size], - data_type='float32', + dtype='float32', param_attr={'name': 'shared_w'}, main_program=program) embed_forth = layers.embedding( input=forth_word, size=[dict_size, embed_size], - data_type='float32', + dtype='float32', param_attr={'name': 'shared_w'}, main_program=program) @@ -166,6 +165,20 @@ class TestBook(unittest.TestCase): print str(program) + def test_linear_chain_crf(self): + program = Program() + + # Change g_program, so the rest layers use `g_program` + images = layers.data( + name='pixel', shape=[784], dtype='float32', main_program=program) + label = layers.data( + name='label', shape=[1], dtype='int32', main_program=program) + hidden = layers.fc(input=images, size=128, main_program=program) + crf = layers.linear_chain_crf( + input=hidden, label=label, main_program=program) + + print str(program) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py b/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py index 6f06a66c825b37ee91214efc0a29a58f0b9057f9..c26634ff20c46e484d600c758be386ec8327d1c1 100644 --- a/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py +++ b/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py @@ -104,7 +104,7 @@ class TestLinearChainCrfOp(OpTest): transition_exps = np.exp(transition) labels = np.random.randint( - low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int32") + low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64") self.inputs = { "Emission": (emission, lod), diff --git a/python/paddle/v2/fluid/tests/test_lod_array_length_op.py b/python/paddle/v2/fluid/tests/test_lod_array_length_op.py index a01ae83772185df218b8c453557dc0cac719673b..8a4be545eda841dbda33b7c8cae9f91a4199f2f8 100644 --- a/python/paddle/v2/fluid/tests/test_lod_array_length_op.py +++ b/python/paddle/v2/fluid/tests/test_lod_array_length_op.py @@ -13,7 +13,7 @@ class TestLoDArrayLength(unittest.TestCase): arr_len = layers.array_length(arr) cpu = core.CPUPlace() exe = Executor(cpu) - result = numpy.array(exe.run(fetch_list=[arr_len])[0]) + result = exe.run(fetch_list=[arr_len])[0] self.assertEqual(11, result[0]) diff --git a/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py b/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py index b18cb6b49fa41f26e1b6de1128690507c5a2f099..0a916a55bc3d097e17fb504b0d6b2f2818f030c9 100644 --- a/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py +++ b/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py @@ -18,7 +18,11 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): tensor.set_lod([[0, 3, 9, 10]]) expect = map(lambda x: numpy.array(x).astype('int32'), [[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]]) - self.main(tensor=tensor, expect_array=expect, expect_lod=[] * 6) + self.main( + tensor=tensor, + expect_array=expect, + expect_lod=[] * 6, + expect_max_len=6) def test_lod_tensor_to_array_level_0_empty_seq(self): tensor = core.LoDTensor() @@ -27,7 +31,11 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): tensor.set_lod([[0, 3, 9, 9, 10]]) expect = map(lambda x: numpy.array(x).astype('int32'), [[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]]) - self.main(tensor=tensor, expect_array=expect, expect_lod=[] * 6) + self.main( + tensor=tensor, + expect_array=expect, + expect_lod=[] * 6, + expect_max_len=6) def test_lod_tensor_to_array_level_1(self): tensor = core.LoDTensor() @@ -44,7 +52,11 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): ] lod = [[[0, 2, 5]], [[0, 6, 12]], [[0, 3]]] - self.main(tensor=tensor, expect_array=expect, expect_lod=lod) + self.main( + tensor=tensor, + expect_array=expect, + expect_lod=lod, + expect_max_len=3) def test_lod_tensor_to_array_level_1_empty_seq(self): tensor = core.LoDTensor() @@ -63,7 +75,11 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): ] lod = [[[0, 5, 8, 8, 15]], [[0, 2, 6, 7, 8]], [[0, 2, 6]], [[0, 2]]] - self.main(tensor=tensor, expect_array=expect, expect_lod=lod) + self.main( + tensor=tensor, + expect_array=expect, + expect_lod=lod, + expect_max_len=4) def test_lod_tensor_to_array_level_2(self): tensor = core.LoDTensor() @@ -80,7 +96,11 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): ] lod = [[[0, 1, 3, 4], [0, 1, 4, 8, 12]], [[0, 4, 7], [0, 1, 5, 9, 17, 21, 27, 31]], [[0, 2], [0, 6, 7]]] - self.main(tensor=tensor, expect_array=expect, expect_lod=lod) + self.main( + tensor=tensor, + expect_array=expect, + expect_lod=lod, + expect_max_len=3) def test_lod_tensor_to_array_level_2_skip_level(self): tensor = core.LoDTensor() @@ -88,14 +108,21 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): numpy.arange(50).reshape(50, 1).astype('int32'), self.place()) tensor.set_lod([[0, 2, 5, 6], [0, 2, 5, 6, 10, 12, 13], [0, 3, 7, 11, 17, 21, 22, 23, 27, 31, 39, 45, 46, 50]]) - self.main(tensor=tensor, expect_array=None, expect_lod=None, level=1) - - def main(self, tensor, expect_array, expect_lod, level=0): + self.main( + tensor=tensor, + expect_array=None, + expect_lod=None, + expect_max_len=4, + level=1) + + def main(self, tensor, expect_array, expect_lod, expect_max_len, level=0): place = self.place() program = Program() x = layers.data(name='x', shape=[10], main_program=program) x.persistable = True table = layers.lod_rank_table(x, level=level, main_program=program) + max_len = layers.max_sequence_len(table, main_program=program) + max_len.persistable = True array = layers.lod_tensor_to_array(x, table, main_program=program) array.persistable = True @@ -110,6 +137,10 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): self.check_array_same(array, expect_array, expect_lod) self.check_tensor_same(scope.find_var(result.name).get_tensor(), tensor) + self.assertEqual( + numpy.array(scope.find_var(max_len.name).get_tensor())[0], + expect_max_len) + def check_array_same(self, array, expect_tensor, expect_lod): self.assertEqual(len(expect_tensor), len(array)) for i, exp in enumerate(zip(expect_tensor, expect_lod)): @@ -132,7 +163,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase): x = layers.data( name='x', shape=[1], - data_type='float32', + dtype='float32', main_program=program, stop_gradient=False) table = layers.lod_rank_table(x, level=0, main_program=program) @@ -151,10 +182,11 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase): exe = Executor(place) g_out = [ - item.sum() - for item in map( - numpy.array, - exe.run(program, feed={'x': tensor}, fetch_list=[g_vars])) + numpy.array(item).sum() + for item in exe.run(program, + feed={'x': tensor}, + fetch_list=[g_vars], + return_numpy=False) ] g_out_sum = numpy.array(g_out).sum() diff --git a/python/paddle/v2/fluid/tests/test_maxout_op.py b/python/paddle/v2/fluid/tests/test_maxout_op.py new file mode 100644 index 0000000000000000000000000000000000000000..5fbed43e254b811d38e441e946a73c24f87373de --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_maxout_op.py @@ -0,0 +1,37 @@ +import unittest +import numpy as np +from op_test import OpTest + + +def maxout_forward_naive(input, groups): + s0, s1, s2, s3 = input.shape + return np.ndarray([s0, s1 / groups, groups, s2, s3], \ + buffer = input, dtype=input.dtype).max(axis=(2)) + + +class TestMaxOutOp(OpTest): + def setUp(self): + self.op_type = "maxout" + self.init_test_case() + input = np.random.random(self.shape).astype("float32") + output = self.MaxOut_forward_naive(input, self.groups).astype("float32") + + self.inputs = {'X': input} + self.attrs = {'groups': self.groups} + + self.outputs = {'Out': output.astype('float32')} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + def init_test_case(self): + self.MaxOut_forward_naive = maxout_forward_naive + self.shape = [100, 6, 2, 2] + self.groups = 2 + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py new file mode 100644 index 0000000000000000000000000000000000000000..50fcc4a72ddbd6d7a3d3b73434c6ac8de5a006e2 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py @@ -0,0 +1,138 @@ +import paddle.v2.fluid.layers as layers +from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.executor import Executor +from paddle.v2.fluid.optimizer import MomentumOptimizer +import paddle.v2.fluid.core as core +import paddle.v2 as paddle +import unittest +import numpy as np + + +class TestMNISTIfElseOp(unittest.TestCase): + def test_raw_api(self): + kwargs = {'startup_program': Program(), 'main_program': Program()} + image = layers.data(name='x', shape=[784], dtype='float32', **kwargs) + + label = layers.data(name='y', shape=[1], dtype='int64', **kwargs) + + limit = layers.fill_constant_batch_size_like( + input=label, dtype='int64', shape=[1], value=5.0, **kwargs) + + cond = layers.less_than(x=label, y=limit, **kwargs) + true_image, false_image = layers.split_lod_tensor( + input=image, mask=cond, **kwargs) + + true_out = layers.create_tensor(dtype='float32', **kwargs) + true_cond = layers.ConditionalBlock([true_image], **kwargs) + + with true_cond.block(): + hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + layers.assign(input=prob, output=true_out, **kwargs) + + false_out = layers.create_tensor(dtype='float32', **kwargs) + false_cond = layers.ConditionalBlock([false_image], **kwargs) + + with false_cond.block(): + hidden = layers.fc(input=false_image, + size=200, + act='tanh', + **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + layers.assign(input=prob, output=false_out, **kwargs) + + prob = layers.merge_lod_tensor( + in_true=true_out, in_false=false_out, mask=cond, x=image, **kwargs) + loss = layers.cross_entropy(input=prob, label=label, **kwargs) + avg_loss = layers.mean(x=loss, **kwargs) + + optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) + optimizer.minimize(avg_loss, kwargs['startup_program']) + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=200) + + place = core.CPUPlace() + exe = Executor(place) + + exe.run(kwargs['startup_program']) + PASS_NUM = 100 + for pass_id in range(PASS_NUM): + for data in train_reader(): + x_data = np.array(map(lambda x: x[0], data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = np.expand_dims(y_data, axis=1) + + outs = exe.run(kwargs['main_program'], + feed={'x': x_data, + 'y': y_data}, + fetch_list=[avg_loss]) + print outs[0] + if outs[0] < 1.0: + return + self.assertFalse(True) + + def test_ifelse(self): + kwargs = {'startup_program': Program(), 'main_program': Program()} + image = layers.data(name='x', shape=[784], dtype='float32', **kwargs) + + label = layers.data(name='y', shape=[1], dtype='int64', **kwargs) + + limit = layers.fill_constant_batch_size_like( + input=label, dtype='int64', shape=[1], value=5.0, **kwargs) + + cond = layers.less_than(x=label, y=limit, **kwargs) + + ie = layers.IfElse(cond, **kwargs) + + with ie.true_block(): + true_image = ie.input(image) + hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + ie.output(prob) + + with ie.false_block(): + false_image = ie.input(image) + hidden = layers.fc(input=false_image, + size=200, + act='tanh', + **kwargs) + prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) + ie.output(prob) + + prob = ie() + loss = layers.cross_entropy(input=prob[0], label=label, **kwargs) + avg_loss = layers.mean(x=loss, **kwargs) + + optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) + optimizer.minimize(avg_loss, kwargs['startup_program']) + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=200) + + place = core.CPUPlace() + exe = Executor(place) + + exe.run(kwargs['startup_program']) + PASS_NUM = 100 + for pass_id in range(PASS_NUM): + for data in train_reader(): + x_data = np.array(map(lambda x: x[0], data)).astype("float32") + y_data = np.array(map(lambda x: x[1], data)).astype("int64") + y_data = y_data.reshape((y_data.shape[0], 1)) + + outs = exe.run(kwargs['main_program'], + feed={'x': x_data, + 'y': y_data}, + fetch_list=[avg_loss]) + print outs[0] + if outs[0] < 1.0: + return + self.assertFalse(True) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_nccl_init_op.py b/python/paddle/v2/fluid/tests/test_nccl_init_op.py deleted file mode 100644 index a536800ccd81fdc2f3b7c8320cede4f8ecf3a8cb..0000000000000000000000000000000000000000 --- a/python/paddle/v2/fluid/tests/test_nccl_init_op.py +++ /dev/null @@ -1,39 +0,0 @@ -import unittest, os -import numpy as np -import paddle.v2 as paddle -from paddle.v2.fluid.op import Operator -import paddle.v2.fluid.core as core -from op_test import OpTest, create_op, set_input - -if not core.is_compile_gpu(): - exit(0) - -gpu_count = core.get_cuda_device_count() - -if gpu_count <= 1: - exit(0) - -g_scope = core.Scope() -g_ctx = core.DeviceContext.create(core.CPUPlace()) - - -class TestNCCLInit(unittest.TestCase): - def test_init(self): - self.op_type = "ncclInit" - self.gpus = range(gpu_count) - - self.inputs = {} - self.attrs = {"gpus": self.gpus} - g_scope.var("Communicator").get_communicator() - self.outputs = {"Communicator": g_scope.find_var("Communicator")} - nccl_init = create_op( - g_scope, - op_type=self.op_type, - inputs=self.inputs, - outputs=self.outputs, - attrs=self.attrs) - nccl_init.run(g_scope, g_ctx) - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_parameter.py b/python/paddle/v2/fluid/tests/test_parameter.py index a633d22c2b1db2728b6eb767078ce4aec6cce163..13f6278ad8b7244e7980b32463f29d7a824b4572 100644 --- a/python/paddle/v2/fluid/tests/test_parameter.py +++ b/python/paddle/v2/fluid/tests/test_parameter.py @@ -20,11 +20,11 @@ class TestParameter(unittest.TestCase): self.assertIsNotNone(param) self.assertEqual('fc.w', param.name) self.assertEqual((784, 100), param.shape) - self.assertEqual(core.DataType.FP32, param.data_type) + self.assertEqual(core.DataType.FP32, param.dtype) self.assertEqual(0, param.block.idx) exe = Executor(core.CPUPlace()) p = exe.run(g_main_program, fetch_list=[param])[0] - self.assertTrue(np.allclose(np.array(p), np.ones(shape) * val)) + self.assertTrue(np.allclose(p, np.ones(shape) * val)) p = io.get_parameter_value_by_name('fc.w', exe, g_main_program) self.assertTrue(np.allclose(np.array(p), np.ones(shape) * val)) diff --git a/python/paddle/v2/fluid/tests/test_pool2d_op.py b/python/paddle/v2/fluid/tests/test_pool2d_op.py index ac3fa6aa87835b3cd6fb9bbf6fe66b1d0c577ca2..5dff6270f455395ce6ca8ae2428236f630467095 100644 --- a/python/paddle/v2/fluid/tests/test_pool2d_op.py +++ b/python/paddle/v2/fluid/tests/test_pool2d_op.py @@ -3,8 +3,7 @@ import numpy as np from op_test import OpTest -def max_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): - +def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0): N, C, H, W = x.shape if global_pool == 1: ksize = [H, W] @@ -23,8 +22,7 @@ def max_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): return out -def avg_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): - +def avg_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0): N, C, H, W = x.shape if global_pool == 1: ksize = [H, W] @@ -47,6 +45,7 @@ def avg_pool2D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): class TestPool2d_Op(OpTest): def setUp(self): self.init_test_case() + self.init_global_pool() self.init_op_type() self.init_pool_type() if self.global_pool: @@ -75,8 +74,6 @@ class TestPool2d_Op(OpTest): self.check_grad(set(['X']), 'Out', max_relative_error=0.07) def init_test_case(self): - self.global_pool = True - self.pool2D_forward_naive = avg_pool2D_forward_naive self.shape = [2, 3, 5, 5] self.ksize = [3, 3] self.strides = [1, 1] @@ -87,12 +84,14 @@ class TestPool2d_Op(OpTest): def init_pool_type(self): self.pool_type = "avg" + self.pool2D_forward_naive = avg_pool2D_forward_naive + + def init_global_pool(self): + self.global_pool = True class TestCase1(TestPool2d_Op): def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = avg_pool2D_forward_naive self.shape = [2, 3, 7, 7] self.ksize = [3, 3] self.strides = [1, 1] @@ -103,12 +102,14 @@ class TestCase1(TestPool2d_Op): def init_pool_type(self): self.pool_type = "avg" + self.pool2D_forward_naive = avg_pool2D_forward_naive + + def init_global_pool(self): + self.global_pool = False class TestCase2(TestPool2d_Op): def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = avg_pool2D_forward_naive self.shape = [2, 3, 7, 7] self.ksize = [3, 3] self.strides = [1, 1] @@ -119,152 +120,69 @@ class TestCase2(TestPool2d_Op): def init_pool_type(self): self.pool_type = "avg" + self.pool2D_forward_naive = avg_pool2D_forward_naive + def init_global_pool(self): + self.global_pool = False -class TestCase3(TestPool2d_Op): - def init_test_case(self): - self.global_pool = True - self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] +class TestCase3(TestPool2d_Op): def init_op_type(self): self.op_type = "pool2d" def init_pool_type(self): self.pool_type = "max" - - -class TestCase4(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] + +class TestCase4(TestCase1): def init_op_type(self): self.op_type = "pool2d" def init_pool_type(self): self.pool_type = "max" - - -class TestCase5(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [1, 1] + +class TestCase5(TestCase2): def init_op_type(self): self.op_type = "pool2d" def init_pool_type(self): self.pool_type = "max" + self.pool2D_forward_naive = max_pool2D_forward_naive #--------------------test pool2d_cudnn-------------------- -class TestCaseCudnn1(TestPool2d_Op): - def init_test_case(self): - self.global_pool = True - self.pool2D_forward_naive = avg_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] - +class TestCudnnCase1(TestPool2d_Op): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "avg" - - -class TestCaseCudnn2(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = avg_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] +class TestCudnnCase2(TestCase1): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "avg" - - -class TestCaseCudnn3(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = avg_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [1, 1] +class TestCudnnCase3(TestCase2): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "avg" - - -class TestCaseCudnn4(TestPool2d_Op): - def init_test_case(self): - self.global_pool = True - self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] +class TestCudnnCase4(TestCase3): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "max" - - -class TestCaseCudnn5(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [0, 0] +class TestCudnnCase5(TestCase4): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "max" - - -class TestCaseCudnn6(TestPool2d_Op): - def init_test_case(self): - self.global_pool = False - self.pool2D_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 7, 7] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [1, 1] +class TestCudnnCase6(TestCase5): def init_op_type(self): self.op_type = "pool2d_cudnn" - def init_pool_type(self): - self.pool_type = "max" - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_pool3d_op.py b/python/paddle/v2/fluid/tests/test_pool3d_op.py index 87483ae5e568c01141ff789f37e84069cb8e827d..2ba86665a7d207e61159c02643fa40daca3be080 100644 --- a/python/paddle/v2/fluid/tests/test_pool3d_op.py +++ b/python/paddle/v2/fluid/tests/test_pool3d_op.py @@ -3,8 +3,7 @@ import numpy as np from op_test import OpTest -def max_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): - +def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0): N, C, D, H, W = x.shape if global_pool == 1: ksize = [D, H, W] @@ -27,8 +26,7 @@ def max_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): return out -def avg_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): - +def avg_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0): N, C, D, H, W = x.shape if global_pool == 1: ksize = [D, H, W] @@ -55,6 +53,10 @@ def avg_pool3D_forward_naive(x, ksize, strides, paddings=[0, 0], global_pool=0): class TestPool3d_Op(OpTest): def setUp(self): self.init_test_case() + self.init_global_pool() + self.init_op_type() + self.init_pool_type() + if self.global_pool: self.paddings = [0 for _ in range(len(self.paddings))] input = np.random.random(self.shape).astype("float32") @@ -81,74 +83,115 @@ class TestPool3d_Op(OpTest): self.check_grad(set(['X']), 'Out', max_relative_error=0.07) def init_test_case(self): - self.global_pool = True - self.op_type = "pool3d" - self.pool_type = "avg" - self.pool3D_forward_naive = avg_pool3D_forward_naive self.shape = [2, 3, 5, 5, 5] self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [0, 0, 0] + def init_op_type(self): + self.op_type = "pool3d" + + def init_pool_type(self): + self.pool_type = "avg" + self.pool3D_forward_naive = avg_pool3D_forward_naive + + def init_global_pool(self): + self.global_pool = True + class TestCase1(TestPool3d_Op): def init_test_case(self): - self.global_pool = False self.op_type = "pool3d" - self.pool_type = "avg" - self.pool3D_forward_naive = avg_pool3D_forward_naive self.shape = [2, 3, 7, 7, 7] self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [0, 0, 0] - -class TestCase2(TestPool3d_Op): - def init_test_case(self): - self.global_pool = False + def init_op_type(self): self.op_type = "pool3d" + + def init_pool_type(self): self.pool_type = "avg" self.pool3D_forward_naive = avg_pool3D_forward_naive + + def init_global_pool(self): + self.global_pool = False + + +class TestCase2(TestPool3d_Op): + def init_test_case(self): self.shape = [2, 3, 7, 7, 7] self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [1, 1, 1] + def init_op_type(self): + self.op_type = "pool3d" + + def init_pool_type(self): + self.pool_type = "avg" + self.pool3D_forward_naive = avg_pool3D_forward_naive + + def init_global_pool(self): + self.global_pool = False + class TestCase3(TestPool3d_Op): - def init_test_case(self): - self.global_pool = True + def init_op_type(self): self.op_type = "pool3d" + + def init_pool_type(self): self.pool_type = "max" self.pool3D_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 5, 5, 5] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [0, 0, 0] -class TestCase4(TestPool3d_Op): - def init_test_case(self): - self.global_pool = False +class TestCase4(TestCase1): + def init_op_type(self): self.op_type = "pool3d" + + def init_pool_type(self): self.pool_type = "max" self.pool3D_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 7, 7, 7] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [0, 0, 0] -class TestCase5(TestPool3d_Op): - def init_test_case(self): - self.global_pool = False +class TestCase5(TestCase2): + def init_op_type(self): self.op_type = "pool3d" + + def init_pool_type(self): self.pool_type = "max" self.pool3D_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 7, 7, 7] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [1, 1, 1] + + +#--------------------test pool3d_cudnn-------------------- +class TestCudnnCase1(TestPool3d_Op): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase2(TestCase1): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase3(TestCase2): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase4(TestCase3): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase5(TestCase4): + def init_op_type(self): + self.op_type = "pool3d_cudnn" + + +class TestCudnnCase6(TestCase5): + def init_op_type(self): + self.op_type = "pool3d_cudnn" if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_pool_max_op.py b/python/paddle/v2/fluid/tests/test_pool_max_op.py index 04843a28ac19e076e097d1aa1034bcf9378aa495..9d2d61c43868701392e90542f3b7fb2c4ea07548 100644 --- a/python/paddle/v2/fluid/tests/test_pool_max_op.py +++ b/python/paddle/v2/fluid/tests/test_pool_max_op.py @@ -3,11 +3,13 @@ import numpy as np from op_test import OpTest -def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0): +def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=False): N, C, D, H, W = x.shape - if global_pool == 1: + if global_pool: ksize = [D, H, W] + paddings = [0, 0, 0] + D_out = (D - ksize[0] + 2 * paddings[0]) / strides[0] + 1 H_out = (H - ksize[1] + 2 * paddings[1]) / strides[1] + 1 W_out = (W - ksize[2] + 2 * paddings[2]) / strides[2] + 1 @@ -40,11 +42,13 @@ def max_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0): return out, mask -def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0): +def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=False): N, C, H, W = x.shape - if global_pool == 1: + if global_pool: ksize = [H, W] + paddings = [0, 0] + H_out = (H - ksize[0] + 2 * paddings[0]) / strides[0] + 1 W_out = (W - ksize[1] + 2 * paddings[1]) / strides[1] + 1 out = np.zeros((N, C, H_out, W_out)) @@ -74,13 +78,13 @@ def max_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0): class TestMaxPoolWithIndex_Op(OpTest): def setUp(self): self.init_test_case() - if self.global_pool: - self.paddings = [0 for _ in range(len(self.paddings))] + self.init_global() + input = np.random.random(self.shape).astype("float32") output, mask = self.pool_forward_naive(input, self.ksize, self.strides, self.paddings, self.global_pool) output = output.astype("float32") - mask = mask.astype("float32") + mask = mask.astype("int32") self.attrs = { 'strides': self.strides, @@ -99,41 +103,24 @@ class TestMaxPoolWithIndex_Op(OpTest): # self.check_grad(set(['X']), ['Out'], max_relative_error=0.07) def init_test_case(self): - self.global_pool = True - self.index = "max_pool3d_with_index" - self.op_type = "%s" % self.index + self.op_type = "max_pool3d_with_index" self.pool_forward_naive = max_pool3D_forward_naive self.shape = [2, 3, 5, 5, 5] self.ksize = [3, 3, 3] self.strides = [1, 1, 1] self.paddings = [1, 1, 1] + def init_global(self): + self.global_pool = False + class TestCase1(TestMaxPoolWithIndex_Op): - def init_test_case(self): + def init_global(self): self.global_pool = True - self.op_type = "max_pool3d_with_index" - self.pool_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 5, 5, 5] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [1, 1, 1] class TestCase2(TestMaxPoolWithIndex_Op): def init_test_case(self): - self.global_pool = False - self.op_type = "max_pool3d_with_index" - self.pool_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 7, 7, 7] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [1, 1, 1] - - -class TestCase3(TestMaxPoolWithIndex_Op): - def init_test_case(self): - self.global_pool = False self.op_type = "max_pool3d_with_index" self.pool_forward_naive = max_pool3D_forward_naive self.shape = [2, 3, 7, 7, 7] @@ -141,32 +128,18 @@ class TestCase3(TestMaxPoolWithIndex_Op): self.strides = [2, 2, 2] self.paddings = [0, 0, 0] - -class TestCase4(TestMaxPoolWithIndex_Op): - def init_test_case(self): + def init_global(self): self.global_pool = True - self.op_type = "max_pool3d_with_index" - self.pool_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 5, 5, 5] - self.ksize = [3, 3, 3] - self.strides = [1, 1, 1] - self.paddings = [1, 1, 1] -class TestCase5(TestMaxPoolWithIndex_Op): - def init_test_case(self): - self.global_pool = True - self.op_type = "max_pool3d_with_index" - self.pool_forward_naive = max_pool3D_forward_naive - self.shape = [2, 3, 5, 5, 5] - self.ksize = [3, 3, 3] - self.strides = [2, 2, 2] - self.paddings = [0, 0, 0] +class TestCase3(TestCase2): + def init_global(self): + self.global_pool = False -class TestCase6(TestMaxPoolWithIndex_Op): +#----------------max_pool2d_with_index---------------- +class TestCase4(TestMaxPoolWithIndex_Op): def init_test_case(self): - self.global_pool = False self.op_type = "max_pool2d_with_index" self.pool_forward_naive = max_pool2D_forward_naive self.shape = [2, 3, 7, 7] @@ -174,10 +147,17 @@ class TestCase6(TestMaxPoolWithIndex_Op): self.strides = [1, 1] self.paddings = [1, 1] + def init_global(self): + self.global_pool = True + -class TestCase7(TestMaxPoolWithIndex_Op): - def init_test_case(self): +class TestCase5(TestCase4): + def init_global(self): self.global_pool = False + + +class TestCase6(TestMaxPoolWithIndex_Op): + def init_test_case(self): self.op_type = "max_pool2d_with_index" self.pool_forward_naive = max_pool2D_forward_naive self.shape = [2, 3, 7, 7] @@ -185,27 +165,13 @@ class TestCase7(TestMaxPoolWithIndex_Op): self.strides = [2, 2] self.paddings = [0, 0] - -class TestCase8(TestMaxPoolWithIndex_Op): - def init_test_case(self): + def init_global(self): self.global_pool = True - self.op_type = "max_pool2d_with_index" - self.pool_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [1, 1] - self.paddings = [1, 1] -class TestCase9(TestMaxPoolWithIndex_Op): - def init_test_case(self): - self.global_pool = True - self.op_type = "max_pool2d_with_index" - self.pool_forward_naive = max_pool2D_forward_naive - self.shape = [2, 3, 5, 5] - self.ksize = [3, 3] - self.strides = [2, 2] - self.paddings = [0, 0] +class TestCase7(TestCase6): + def init_global(self): + self.global_pool = False if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_program.py b/python/paddle/v2/fluid/tests/test_program.py index e9bcefd21569aaa9225c676ea03b5c8e37d00333..15653a1dbf5b1a66edd3f768bee5a36be1bb7a7a 100644 --- a/python/paddle/v2/fluid/tests/test_program.py +++ b/python/paddle/v2/fluid/tests/test_program.py @@ -1,7 +1,9 @@ +from __future__ import print_function import unittest from paddle.v2.fluid.framework import Program from paddle.v2.fluid.framework import g_main_program +import paddle.v2.fluid.layers as layers class TestProgram(unittest.TestCase): @@ -48,8 +50,8 @@ class TestProgram(unittest.TestCase): # FIXME(yuyang18): We manual compare the output string, since the order # of variable could be changed. - print prog - print prog.clone() + print(prog) + print(prog.clone()) def test_parse_program_from_string(self): prog = Program() @@ -67,8 +69,8 @@ class TestProgram(unittest.TestCase): binary_str = prog.desc.serialize_to_string() prog_restored = Program.parse_from_string(binary_str) - print prog - print prog_restored + print(prog) + print(prog_restored) def test_append_backward(self): prog = Program() @@ -123,6 +125,20 @@ class TestProgram(unittest.TestCase): actual_ops.append(op.type) self.assertEqual(actual_ops, expect_ops) + def test_program_clone_with_parameter(self): + main_program = Program() + startup_program = Program() + kwargs = { + 'main_program': main_program, + 'startup_program': startup_program + } + d = layers.data(name='x', shape=[784], dtype='float32', **kwargs) + hidden = layers.fc(input=d, size=100, **kwargs) + layers.fc(input=hidden, size=100, **kwargs) + + new_program = main_program.clone() + self.assertNotEqual(0, len(new_program.blocks[0].all_parameters())) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_protobuf_descs.py b/python/paddle/v2/fluid/tests/test_protobuf_descs.py index 098a9802dfc6763ce2a2356b7267a439145b7939..d8abe17606c4ddb2ff51d5f918b1e5d7e110f7fa 100644 --- a/python/paddle/v2/fluid/tests/test_protobuf_descs.py +++ b/python/paddle/v2/fluid/tests/test_protobuf_descs.py @@ -101,13 +101,13 @@ class TestVarDesc(unittest.TestCase): self.assertEqual(src_shape, res_shape) self.assertEqual(core.VarDesc.VarType.SELECTED_ROWS, var.type()) - def test_data_type(self): + def test_dtype(self): program_desc = core.ProgramDesc() block = program_desc.block(0) var = block.var('my_var') var.set_type(core.VarDesc.VarType.LOD_TENSOR) - var.set_data_type(core.DataType.INT32) - self.assertEqual(core.DataType.INT32, var.data_type()) + var.set_dtype(core.DataType.INT32) + self.assertEqual(core.DataType.INT32, var.dtype()) self.assertEqual(core.VarDesc.VarType.LOD_TENSOR, var.type()) diff --git a/python/paddle/v2/fluid/tests/test_recurrent_op.py b/python/paddle/v2/fluid/tests/test_recurrent_op.py index b623d1231838faff9e91c9234befb1f647fe8ec2..84548847f76c6315da000e1b3d062deafe55a05e 100644 --- a/python/paddle/v2/fluid/tests/test_recurrent_op.py +++ b/python/paddle/v2/fluid/tests/test_recurrent_op.py @@ -118,14 +118,14 @@ class RecurrentOpTest1(unittest.TestCase): def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot = layers.data( shape=[self.input_dim], - data_type='float32', + dtype='float32', name='h_boot', **self.p_info) h_boot.stop_gradient = False @@ -156,7 +156,7 @@ class RecurrentOpTest1(unittest.TestCase): feed=self.feed_map, fetch_list=[self.output]) - return np.array(out[0]) + return out[0] def backward(self): self.feed_map = { @@ -171,7 +171,8 @@ class RecurrentOpTest1(unittest.TestCase): exe = Executor(self.place) return exe.run(self.main_program, feed=self.feed_map, - fetch_list=fetch_list) + fetch_list=fetch_list, + return_numpy=False) def test_backward(self): self.check_forward() @@ -251,14 +252,14 @@ class RecurrentOpTest2(RecurrentOpTest1): def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot = layers.data( shape=[self.input_dim], - data_type='float32', + dtype='float32', name='h_boot', **self.p_info) h_boot.stop_gradient = False @@ -350,21 +351,21 @@ class RecurrentOpMultipleMemoryTest(RecurrentOpTest1): def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot1 = layers.data( shape=[self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='h_boot1', append_batch_size=False, **self.p_info) h_boot1.stop_gradient = False h_boot2 = layers.data( shape=[self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='h_boot2', append_batch_size=False, **self.p_info) @@ -435,7 +436,7 @@ class RecurrentOpNoMemBootTest(RecurrentOpTest1): def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], - data_type='float32', + dtype='float32', name='x', append_batch_size=False, **self.p_info) diff --git a/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py b/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py index a3cba92504a28590083df57e69f7662a887d94a6..9999165ed509aa40f31f26aa676f381561bd0016 100644 --- a/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py +++ b/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py @@ -7,12 +7,6 @@ import numpy as np import paddle.v2.fluid.core as core -def create_tensor(np_data, place): - tensor = core.LoDTensor() - tensor.set(np_data, place) - return tensor - - class RNNMemoryHelperOpTest(unittest.TestCase): def setUp(self): self.program = Program() @@ -30,13 +24,13 @@ class RNNMemoryHelperOpTest(unittest.TestCase): def test_forward(self): x_np = np.random.normal(size=(2, 3)).astype("float32") - self.feed_map = {'X': create_tensor(x_np, self.place)} + self.feed_map = {'X': x_np} self.fetch_list = [self.Out] exe = Executor(self.place) out = exe.run(self.program, feed=self.feed_map, fetch_list=self.fetch_list) - np.isclose(np.array(out[0]), x_np, rtol=1e-5) + self.assertTrue(np.allclose(out[0], x_np, rtol=1e-5)) class RNNMemoryHelperGradOpTest(unittest.TestCase): @@ -66,8 +60,7 @@ class RNNMemoryHelperGradOpTest(unittest.TestCase): def test_backward(self): self.feed_map = { - name: create_tensor( - np.random.normal(size=(2, 3)).astype("float32"), self.place) + name: np.random.normal(size=(2, 3)).astype("float32") for name in self.input_names } self.fetch_list = [self.output_vars['X@GRAD']] @@ -76,7 +69,7 @@ class RNNMemoryHelperGradOpTest(unittest.TestCase): out = exe.run(self.program, feed=self.feed_map, fetch_list=self.fetch_list) - np.isclose(np.array(out[0]), self.feed_map['Out@GRAD'], rtol=1e-5) + np.isclose(out[0], self.feed_map['Out@GRAD'], rtol=1e-5) class RNNMemoryHelperGradOpWithoutInputTest(unittest.TestCase): @@ -110,8 +103,7 @@ class RNNMemoryHelperGradOpWithoutInputTest(unittest.TestCase): def test_backward(self): self.feed_map = { - name: create_tensor( - np.random.normal(size=(2, 3)).astype("float32"), self.place) + name: np.random.normal(size=(2, 3)).astype("float32") for name in ['X', 'Out'] } self.fetch_list = [self.output_vars['X@GRAD']] @@ -120,10 +112,9 @@ class RNNMemoryHelperGradOpWithoutInputTest(unittest.TestCase): out = exe.run(self.program, feed=self.feed_map, fetch_list=self.fetch_list) - np.isclose( - np.array(out[0]), - np.zeros(shape=(2, 3)).astype("float32"), - rtol=1e-5) + self.assertTrue( + np.allclose( + out[0], np.zeros(shape=(2, 3)).astype("float32"), rtol=1e-5)) if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_roi_pool_op.py b/python/paddle/v2/fluid/tests/test_roi_pool_op.py new file mode 100644 index 0000000000000000000000000000000000000000..a28d9c7f82d3735c410369eb61e350168c267cea --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_roi_pool_op.py @@ -0,0 +1,123 @@ +import unittest +import numpy as np +import math +import sys +from op_test import OpTest + + +class TestROIPoolOp(OpTest): + def set_data(self): + self.init_test_case() + self.make_rois() + self.calc_roi_pool() + + self.inputs = {'X': self.x, 'ROIs': self.rois} + + self.attrs = { + 'spatial_scale': self.spatial_scale, + 'pooled_height': self.pooled_height, + 'pooled_width': self.pooled_width + } + + self.outputs = {'Out': self.outs, 'Argmax': self.argmaxes} + + def init_test_case(self): + self.batch_size = 5 + self.channels = 3 + self.height = 6 + self.width = 4 + + # n, c, h, w + self.x_dim = (self.batch_size, self.channels, self.height, self.width) + + self.spatial_scale = 1.0 / 4.0 + self.pooled_height = 2 + self.pooled_width = 2 + self.rois_num = 2 + + self.x = np.random.random(self.x_dim).astype('float32') + + def calc_roi_pool(self): + out_data = np.zeros((self.rois_num, self.channels, self.pooled_height, + self.pooled_width)) + argmax_data = np.zeros((self.rois_num, self.channels, + self.pooled_height, self.pooled_width)) + + for i in range(self.rois_num): + roi = self.rois[i] + roi_batch_id = roi[0] + roi_start_w = int(round(roi[1] * self.spatial_scale)) + roi_start_h = int(round(roi[2] * self.spatial_scale)) + roi_end_w = int(round(roi[3] * self.spatial_scale)) + roi_end_h = int(round(roi[4] * self.spatial_scale)) + + roi_height = int(max(roi_end_h - roi_start_h + 1, 1)) + roi_width = int(max(roi_end_w - roi_start_w + 1, 1)) + + x_i = self.x[roi_batch_id] + + bin_size_h = float(roi_height) / float(self.pooled_height) + bin_size_w = float(roi_width) / float(self.pooled_width) + + for c in range(self.channels): + for ph in range(self.pooled_height): + for pw in range(self.pooled_width): + hstart = int(math.floor(ph * bin_size_h)) + wstart = int(math.floor(pw * bin_size_w)) + hend = int(math.ceil((ph + 1) * bin_size_h)) + wend = int(math.ceil((pw + 1) * bin_size_w)) + + hstart = min(max(hstart + roi_start_h, 0), self.height) + hend = min(max(hend + roi_start_h, 0), self.height) + wstart = min(max(wstart + roi_start_w, 0), self.width) + wend = min(max(wend + roi_start_w, 0), self.width) + + is_empty = (hend <= hstart) or (wend <= wstart) + if is_empty: + out_data[i, c, ph, pw] = 0 + else: + out_data[i, c, ph, pw] = -sys.float_info.max + + argmax_data[i, c, ph, pw] = -1 + + for h in range(hstart, hend): + for w in range(wstart, wend): + if x_i[c, h, w] > out_data[i, c, ph, pw]: + out_data[i, c, ph, pw] = x_i[c, h, w] + argmax_data[i, c, ph, pw] = h * \ + self.width + w + + self.outs = out_data.astype('float32') + self.argmaxes = argmax_data.astype('int64') + + def make_rois(self): + rois = [] + batch_ids = np.random.randint(0, self.batch_size, size=self.rois_num) + for i in range(self.rois_num): + x1 = np.random.random_integers( + 0, self.width / self.spatial_scale - self.pooled_width) + y1 = np.random.random_integers( + 0, self.height / self.spatial_scale - self.pooled_height) + + x2 = np.random.random_integers(x1 + self.pooled_width, + self.width / self.spatial_scale) + y2 = np.random.random_integers(y1 + self.pooled_height, + self.height / self.spatial_scale) + + roi = [batch_ids[i], x1, y1, x2, y2] + rois.append(roi) + self.rois = np.array(rois).astype("int64") + + def setUp(self): + self.op_type = "roi_pool" + self.set_data() + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_sequence_slice_op.py b/python/paddle/v2/fluid/tests/test_sequence_slice_op.py new file mode 100644 index 0000000000000000000000000000000000000000..ccd9a05343b0c4aa05b258959665c0662f271512 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_sequence_slice_op.py @@ -0,0 +1,47 @@ +import unittest +import numpy as np +import sys +from op_test import OpTest + + +class TestSequenceSliceOp(OpTest): + def set_data(self): + self.init_test_case() + # only supprot one level LoD + x = np.random.random(self.x_dim).astype('float32') + lod = self.x_lod + offset = np.array(self.offset).astype("int64") + length = np.array(self.length).astype("int64") + + self.inputs = {'X': (x, lod), 'Offset': offset, 'Length': length} + outs = [] #np.zeros((100, 3, 2)).astype('float32') + out_lod = [[0]] + out_lod_offset = 0 + for i in range(len(offset)): + sub_x = x[lod[0][i] + offset[i, 0]:lod[0][i] + offset[i, 0] + + length[i, 0], :] + out_lod_offset = out_lod_offset + len(sub_x) + outs.append(sub_x) + out_lod[0].append(out_lod_offset) + outs = np.concatenate(outs, axis=0) + self.outputs = {'Out': (outs, out_lod)} + + def init_test_case(self): + self.x_dim = (100, 3, 2) + self.x_lod = [[0, 20, 40, 60, 80, 100]] + self.offset = [[1], [2], [3], [4], [5]] + self.length = [[10], [8], [6], [4], [2]] + + def setUp(self): + self.op_type = "sequence_slice" + self.set_data() + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py index 1a3b88e18e38b88d75ad17a0bb6a2965d1e60406..05f6a560644f18da6ff2e015911901cd73cc36c9 100644 --- a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py +++ b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py @@ -9,7 +9,7 @@ import numpy class TestShrinkRNNMemory(unittest.TestCase): def test_shrink_rnn_memory(self): - x = layers.data('x', shape=[100], data_type='float32') + x = layers.data('x', shape=[100], dtype='float32') x.stop_gradient = False table = layers.lod_rank_table(x=x) i = layers.zeros(dtype='int64', shape=[1]) @@ -27,19 +27,16 @@ class TestShrinkRNNMemory(unittest.TestCase): tensor_np = numpy.random.random(size=(3, 100)).astype('float32') tensor.set(tensor_np, cpu) exe = Executor(cpu) - outs = map(numpy.array, - exe.run(feed={'x': tensor}, fetch_list=[mem1, mem2, mem3])) + outs = exe.run(feed={'x': tensor}, fetch_list=[mem1, mem2, mem3]) self.assertTrue(numpy.allclose(tensor_np[0:3], outs[0])) self.assertTrue(numpy.allclose(tensor_np[0:2], outs[1])) self.assertTrue(numpy.allclose(tensor_np[0:1], outs[2])) mem3_mean = layers.mean(x=mem3) append_backward_ops(loss=mem3_mean) - x_grad = map(numpy.array, - exe.run(feed={'x': tensor}, - fetch_list=[ - g_main_program.global_block().var('x@GRAD') - ]))[0] + x_grad = exe.run( + feed={'x': tensor}, + fetch_list=[g_main_program.global_block().var('x@GRAD')])[0] self.assertAlmostEqual(1.0, x_grad.sum(), delta=0.1) diff --git a/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py b/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py index 3aed83b2ea3418c54f9540279ae6e2e0045421fa..f5da4e408f0a83dbf6da530b478e91bbf9cd5ab2 100644 --- a/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py +++ b/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py @@ -98,7 +98,11 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): exe = Executor(place) scope = core.Scope() - exe.run(program, feed={'x': tensor, 'y': mask}, scope=scope) + exe.run(program, + feed={'x': tensor, + 'y': mask}, + scope=scope, + return_numpy=False) var_true = scope.find_var(out_true.name).get_tensor() @@ -123,13 +127,13 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): x = layers.data( name='x', shape=[1], - data_type='float32', + dtype='float32', main_program=program, stop_gradient=False) y = layers.data( name='y', shape=[1], - data_type='bool', + dtype='bool', main_program=program, stop_gradient=False) @@ -169,7 +173,8 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): feed={'x': tensor, 'y': mask}, fetch_list=[g_vars], - scope=scope)) + scope=scope, + return_numpy=False)) ] g_out_sum = np.array(g_out).sum() diff --git a/python/paddle/v2/fluid/tests/test_tensor_array.py b/python/paddle/v2/fluid/tests/test_tensor_array.py deleted file mode 100644 index d6929ba16e4dae0c57adcceb4f0e78c094eee55c..0000000000000000000000000000000000000000 --- a/python/paddle/v2/fluid/tests/test_tensor_array.py +++ /dev/null @@ -1,106 +0,0 @@ -import logging -import paddle.v2.fluid.core as core -import unittest -import numpy as np - - -class TestTensorArray(unittest.TestCase): - def setUp(self): - self.ta = core.TensorArray() - - self.batch_size = 10 - self.dim = 2 - - # create a LoDTensor - self.scope = core.Scope() - var = self.scope.var("test_tensor") - self.place = core.CPUPlace() - tensor = var.get_tensor() - tensor.set_dims([self.batch_size, self.dim]) - tensor.alloc_float(self.place) - tensor_array = np.array(tensor) - tensor_array[0, 0] = 0 - tensor_array[1, 0] = 1 - tensor_array[2, 0] = 2 - tensor_array[3, 0] = 3 - tensor_array[4, 0] = 4 - tensor_array[5, 0] = 5 - tensor_array[6, 0] = 6 - tensor_array[7, 0] = 7 - tensor_array[8, 0] = 8 - tensor_array[9, 0] = 9 - - lod_py = [[0, 2, 5, 10]] - lod_tensor = core.LoDTensor(lod_py) - lod_tensor.set(tensor_array, self.place) - - self.py_seq_meta = [[5, 10, 2], [2, 5, 1], [0, 2, 0]] - - self.tensor = lod_tensor - - def test_unstack(self): - self.ta.unstack(self.tensor) - self.assertEqual(self.tensor.get_dims()[0], self.ta.size()) - - def test_read(self): - self.ta.unstack(self.tensor) - for i in range(self.batch_size): - tensor = self.ta.read(i) - - def test_write(self): - self.ta.unstack(self.tensor) - - # create a tensor with shape of [1, self.dim] - var = self.scope.var("hell") - tensor = var.get_tensor() - tensor.set_dims([1, self.dim]) - tensor.alloc_float(self.place) - tensor_array = np.array(tensor) - for i in range(self.dim): - tensor_array[0, i] = i - tensor.set(tensor_array, self.place) - - self.ta.write(2, tensor) - - ta_tensor = self.ta.read(2) - ta_tensor_array = np.array(ta_tensor) - self.assertEqual(ta_tensor.get_dims(), [1, self.dim]) - self.assertTrue((tensor_array == ta_tensor_array).all()) - - def test_write_shared(self): - self.ta.unstack(self.tensor) - - # create a tensor with shape of [1, self.dim] - var = self.scope.var("hell") - tensor = var.get_tensor() - tensor.set_dims([1, self.dim]) - tensor.alloc_float(self.place) - tensor_array = np.array(tensor) - for i in range(self.dim): - tensor_array[0, i] = i - tensor.set(tensor_array, self.place) - - self.ta.write_shared(2, tensor) - - ta_tensor = self.ta.read(2) - ta_tensor_array = np.array(ta_tensor) - self.assertEqual(ta_tensor.get_dims(), [1, self.dim]) - self.assertTrue((tensor_array == ta_tensor_array).all()) - - def test_unpack(self): - meta = self.ta.unpack(self.tensor, 0, True) - self.assertEqual(self.ta.size(), 5) - self.assertEqual(meta, self.py_seq_meta) - - def test_pack(self): - meta = self.ta.unpack(self.tensor, 0, True) - print "meta", meta - tensor = self.ta.pack(0, meta, self.tensor.lod()) - print np.array(self.tensor) - print np.array(tensor) - self.assertTrue((np.array(self.tensor) == np.array(tensor)).all()) - self.assertTrue(tensor.lod(), self.tensor.lod()) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_variable.py b/python/paddle/v2/fluid/tests/test_variable.py index a3e60a751719666bdca56a3096b688125d09f4b2..92ffdceb6c84fb2669f8c1bb556c46fb1c03c411 100644 --- a/python/paddle/v2/fluid/tests/test_variable.py +++ b/python/paddle/v2/fluid/tests/test_variable.py @@ -1,5 +1,5 @@ import unittest -from paddle.v2.fluid.framework import Variable, g_main_program, Program +from paddle.v2.fluid.framework import g_main_program, Program, convert_np_dtype_to_dtype_ import paddle.v2.fluid.core as core import numpy as np @@ -7,7 +7,7 @@ import numpy as np class TestVariable(unittest.TestCase): def test_np_dtype_convert(self): DT = core.DataType - convert = Variable._convert_np_dtype_to_dtype_ + convert = convert_np_dtype_to_dtype_ self.assertEqual(DT.FP32, convert(np.float32)) self.assertEqual(DT.FP16, convert("float16")) self.assertEqual(DT.FP64, convert("float64")) @@ -22,13 +22,13 @@ class TestVariable(unittest.TestCase): w = b.create_var( dtype="float64", shape=[784, 100], lod_level=0, name="fc.w") self.assertNotEqual(str(w), "") - self.assertEqual(core.DataType.FP64, w.data_type) + self.assertEqual(core.DataType.FP64, w.dtype) self.assertEqual((784, 100), w.shape) self.assertEqual("fc.w", w.name) self.assertEqual(0, w.lod_level) w = b.create_var(name='fc.w') - self.assertEqual(core.DataType.FP64, w.data_type) + self.assertEqual(core.DataType.FP64, w.dtype) self.assertEqual((784, 100), w.shape) self.assertEqual("fc.w", w.name) self.assertEqual(0, w.lod_level) diff --git a/python/paddle/v2/fluid/tests/test_while_op.py b/python/paddle/v2/fluid/tests/test_while_op.py index 84b432333f950f754a97bc1a051b59c16fb22aed..033b03a4957131e1155c61e8ed2f10eefb23fda4 100644 --- a/python/paddle/v2/fluid/tests/test_while_op.py +++ b/python/paddle/v2/fluid/tests/test_while_op.py @@ -9,11 +9,11 @@ import numpy class TestWhileOp(unittest.TestCase): def test_simple_forward(self): d0 = layers.data( - "d0", shape=[10], append_batch_size=False, data_type='float32') + "d0", shape=[10], append_batch_size=False, dtype='float32') d1 = layers.data( - "d1", shape=[10], append_batch_size=False, data_type='float32') + "d1", shape=[10], append_batch_size=False, dtype='float32') d2 = layers.data( - "d2", shape=[10], append_batch_size=False, data_type='float32') + "d2", shape=[10], append_batch_size=False, dtype='float32') i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') @@ -55,19 +55,10 @@ class TestWhileOp(unittest.TestCase): for i in xrange(3): d.append(numpy.random.random(size=[10]).astype('float32')) - d_tensor = [] - for item in d: - t = core.LoDTensor() - t.set(item, cpu) - d_tensor.append(t) - - outs = map(numpy.array, - exe.run(feed={ - 'd0': d_tensor[0], - 'd1': d_tensor[1], - 'd2': d_tensor[2] - }, - fetch_list=[sum_result])) + outs = exe.run(feed={'d0': d[0], + 'd1': d[1], + 'd2': d[2]}, + fetch_list=[sum_result]) self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01)