diff --git a/.travis.yml b/.travis.yml index 162bebba091d84b295f929527de9804e65df5a65..4fb2ca938795bb6a69f7d7991aee9f7386947bf2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ cache: - $HOME/third_party - $HOME/.ccache - $HOME/.cache/pip + - $HOME/Library/Caches/Homebrew sudo: required dist: trusty os: @@ -54,7 +55,9 @@ before_install: fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi - - pip install numpy wheel protobuf sphinx recommonmark sphinx_rtd_theme virtualenv pre-commit requests==2.9.2 LinkChecker + # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python + # protobuf version. + - pip install numpy wheel 'protobuf==3.1' sphinx recommonmark sphinx_rtd_theme virtualenv pre-commit requests==2.9.2 LinkChecker script: - paddle/scripts/travis/main.sh notifications: diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 26306f9849100d4463dde267acae5392cc81d7ac..235c95f017f2b6ef24195a0210ccafff36b6ed61 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -16,7 +16,8 @@ set(CBLAS_FOUND OFF) ## Find MKL First. -set(MKL_ROOT $ENV{MKLROOT} CACHE PATH "Folder contains MKL") +set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs") +set(MKL_ROOT ${INTEL_ROOT}/mkl CACHE PATH "Folder contains MKL") find_path(MKL_INCLUDE_DIR mkl.h PATHS ${MKL_ROOT}/include) diff --git a/demo/mnist/api_train.py b/demo/mnist/api_train.py index 8573d8143a085b8d2e0bcf7df17b1abe177029df..ea1caa7dd9653a2cc2860ace736fe3d25a3767e0 100644 --- a/demo/mnist/api_train.py +++ b/demo/mnist/api_train.py @@ -6,25 +6,15 @@ passed to C++ side of Paddle. The user api could be simpler and carefully designed. """ -import py_paddle.swig_paddle as api -from py_paddle import DataProviderConverter -import paddle.trainer.PyDataProvider2 as dp -import numpy as np import random -from mnist_util import read_from_mnist -from paddle.trainer_config_helpers import * -import paddle.v2 +import numpy as np +import paddle.v2 as paddle_v2 +import py_paddle.swig_paddle as api +from paddle.trainer_config_helpers import * +from py_paddle import DataProviderConverter -def network_config(): - imgs = data_layer(name='pixel', size=784) - hidden1 = fc_layer(input=imgs, size=200) - hidden2 = fc_layer(input=hidden1, size=200) - inference = fc_layer(input=hidden2, size=10, act=SoftmaxActivation()) - cost = classification_cost( - input=inference, label=data_layer( - name='label', size=10)) - outputs(cost) +from mnist_util import read_from_mnist def init_parameter(network): @@ -67,7 +57,7 @@ def input_order_converter(generator): def main(): api.initPaddle("-use_gpu=false", "-trainer_count=4") # use 4 cpu cores - optimizer = paddle.v2.optimizer.Adam( + optimizer = paddle_v2.optimizer.Adam( learning_rate=1e-4, batch_size=1000, model_average=ModelAverage(average_window=0.5), @@ -79,8 +69,20 @@ def main(): updater = optimizer.create_local_updater() assert isinstance(updater, api.ParameterUpdater) + # define network + images = paddle_v2.layer.data( + name='pixel', type=paddle_v2.data_type.dense_vector(784)) + label = paddle_v2.layer.data( + name='label', type=paddle_v2.data_type.integer_value(10)) + hidden1 = paddle_v2.layer.fc(input=images, size=200) + hidden2 = paddle_v2.layer.fc(input=hidden1, size=200) + inference = paddle_v2.layer.fc(input=hidden2, + size=10, + act=paddle_v2.activation.Softmax()) + cost = paddle_v2.layer.classification_cost(input=inference, label=label) + # Create Simple Gradient Machine. - model_config = parse_network_config(network_config) + model_config = paddle_v2.layer.parse_network(cost) m = api.GradientMachine.createFromConfigProto(model_config, api.CREATE_MODE_NORMAL, optimizer.enable_types()) @@ -97,8 +99,7 @@ def main(): # DataProvider Converter is a utility convert Python Object to Paddle C++ # Input. The input format is as same as Paddle's DataProvider. - converter = DataProviderConverter( - input_types=[dp.dense_vector(784), dp.integer_value(10)]) + converter = DataProviderConverter(input_types=[images.type, label.type]) train_file = './data/raw_data/train' test_file = './data/raw_data/t10k' diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..6fc01ce58be57c77144c6558d039430b22d3a746 --- /dev/null +++ b/demo/mnist/api_train_v2.py @@ -0,0 +1,61 @@ +import numpy +import paddle.v2 as paddle + +import mnist_util + + +def train_reader(): + train_file = './data/raw_data/train' + generator = mnist_util.read_from_mnist(train_file) + for item in generator: + yield item + + +def main(): + paddle.init(use_gpu=False, trainer_count=1) + + # define network topology + images = paddle.layer.data( + name='pixel', type=paddle.data_type.dense_vector(784)) + label = paddle.layer.data( + name='label', type=paddle.data_type.integer_value(10)) + hidden1 = paddle.layer.fc(input=images, size=200) + hidden2 = paddle.layer.fc(input=hidden1, size=200) + inference = paddle.layer.fc(input=hidden2, + size=10, + act=paddle.activation.Softmax()) + cost = paddle.layer.classification_cost(input=inference, label=label) + + parameters = paddle.parameters.create(cost) + for param_name in parameters.keys(): + array = parameters.get(param_name) + array[:] = numpy.random.uniform(low=-1.0, high=1.0, size=array.shape) + parameters.set(parameter_name=param_name, value=array) + + adam_optimizer = paddle.optimizer.Adam(learning_rate=0.01) + + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + para = parameters.get('___fc_2__.w0') + print "Pass %d, Batch %d, Cost %f, Weight Mean Of Fc 2 is %f" % ( + event.pass_id, event.batch_id, event.cost, para.mean()) + + else: + pass + + trainer = paddle.trainer.SGD(update_equation=adam_optimizer) + + trainer.train(train_data_reader=train_reader, + topology=cost, + parameters=parameters, + event_handler=event_handler, + batch_size=32, # batch size should be refactor in Data reader + data_types={ # data_types will be removed, It should be in + # network topology + 'pixel': images.type, + 'label': label.type + }) + + +if __name__ == '__main__': + main() diff --git a/demo/sentiment/dataprovider.py b/demo/sentiment/dataprovider.py index 00f72cecacb454a0dd1184fa2098be4543007de7..4b7f5d0e504aef3884a04cbed8c16503a4079772 100755 --- a/demo/sentiment/dataprovider.py +++ b/demo/sentiment/dataprovider.py @@ -32,4 +32,6 @@ def process(settings, file_name): word_slot = [ settings.word_dict[w] for w in words if w in settings.word_dict ] + if not word_slot: + continue yield word_slot, label diff --git a/demo/sentiment/predict.py b/demo/sentiment/predict.py index 8ec490f64691924013200a3d0038d39aa834b038..64c78e0d6b9297e7a321a4f070517593b0bfe332 100755 --- a/demo/sentiment/predict.py +++ b/demo/sentiment/predict.py @@ -138,7 +138,11 @@ def main(): batch = [] for line in sys.stdin: - batch.append([predict.get_index(line)]) + words = predict.get_index(line) + if words: + batch.append([words]) + else: + print('All the words in [%s] are not in the dictionary.' % line) if len(batch) == batch_size: predict.batch_predict(batch) batch = [] diff --git a/doc/api/trainer_config_helpers/layers.rst b/doc/api/trainer_config_helpers/layers.rst index 8b0e553eacc932bc59062103ac6e6ac4245d03cb..2793d6afd9565eb461c8657b838b146fe1992b20 100644 --- a/doc/api/trainer_config_helpers/layers.rst +++ b/doc/api/trainer_config_helpers/layers.rst @@ -279,6 +279,12 @@ concat_layer :members: concat_layer :noindex: +seq_concat_layer +---------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: seq_concat_layer + :noindex: + Reshaping Layers ================ @@ -302,6 +308,12 @@ repeat_layer :members: repeat_layer :noindex: +seq_reshape_layer +----------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: seq_reshape_layer + :noindex: + Math Layers =========== diff --git a/doc/design/api.md b/doc/design/api.md new file mode 100644 index 0000000000000000000000000000000000000000..8185d2af0ea264a2e7b4e28b9ed05279e4a22014 --- /dev/null +++ b/doc/design/api.md @@ -0,0 +1,262 @@ +# PaddlePaddle Design Doc + +## Ingredients + +As our design principle is starting from the essence: how could we +allow users to express and solve their problems at neural networks. +Some essential concepts that our API have to provide include: + +1. A *topology* is an expression of *layers*. + +1. A layer could be any kind of computation, including *cost*. + +1. Some layers have parameters, some don't. Most costs don't have + parameters. + +1. In some topologies, layers share parameters. For + example, + [the network for training a ranking model](https://github.com/PaddlePaddle/Paddle/issues/1311#issuecomment-279121850). + +1. At programming time, users specify topologies and possible sharing + of parameters. PaddlePaddle can figure out and create parameters + required (and possibly shared) by one or more topologies. + + +## Starting from Examples + +As a summarization +of +[our disucssion](https://github.com/PaddlePaddle/Paddle/issues/1315), +let us present two examples here: + + +### Example 1. Sharing Parameters between Layers + +We use +the +[3-branch ranking](https://github.com/PaddlePaddle/Paddle/issues/1311#issuecomment-279121850) model +in this example. For your convenience, I copy-a-paste the model's +topology as follows: + +``` +A -> f -\ +Q -> f --> cost +B -> f -/ +``` + +The following program trains the topology including the cost, and then +use the sub-network in the trained topology in inference: + +```python +def f(in): + e = paddle.layer.embedding(in, parameter_name="embedding") + o = paddle.layer.softmax(e, parameter_name="semantic") + return o + +# Create 3 topologies (subnets), they share parameters because all +# correspoinding layers have the same parameter names. +fA = f(paddle.layer.data(input_name="A")) +fB = f(paddle.layer.data(input_name="B")) +fQ = f(paddle.layer.data(input_name="Q")) + +topology = paddle.layer.less_than( + paddle.layer.cross_entropy(fA, fQ), + paddle.layer.corss_entropy(fB, fQ)) + +# Derive parameters required in topology and create them in model. +parameters = paddle.parameters.create(topology) + +# Estimate parameters used in topology from data. +paddle.train(topology, parameters, reader=read_ranking_model_data) + +# Inference using fA (or fB or fC, as they share their parameters). +[testA, testB, testQ] = read_ranking_model_data() +print "The sematic-vector of testA: ", paddle.infer(fA, parameters, testA) +``` + + +### Example 2. Sharing Parameters between "Models" + +We use [GAN](https://github.com/PaddlePaddle/book/tree/develop/gan) in +this example. In the following example program, `d0` and `d1` +correspond to the two networks in the following figure: + + + +```python +def G(in): + # over-simplified example as G has only one layers: + return paddle.layer.fc(in, parameter_name="G") + +def D(in); + # again, over-simplified: + return paddle.layer.fc(in, parameter_name="D") + +# Construct the first topology, which contains both D and G. +# By learning this topology, we update parameters of G. +d0 = paddle.layer.should_be_false(D(G(paddle.layer.data()))) + +# Construct a second topology d1, which contains only D. By +# training this topology, we update parameters of D. Note +# that d1 share parameters with d0. +d1 = paddle.layer.should_be_true(D(paddle.layer.data())) + +# Create parameters from a list of multiple topologies (models) for +# the chance to share parameters between these topologies. +parameters = paddle.parameters.create([d0, d1]) + +# Iterative training of GAN. +for ...: + train(d0, parameters, reader=read_from_rng, immutable_parameters={"D"}) + train(d1, parameters, reader=read_from_realistic_images) + +# Use d1 for inference: +print "D thinks a batch of images are realistic ", infer(d1, parameters, read_mnist_images) +``` + + +### Summarization + + +Above two programs reveal some important design concerns: + +1. Users describe a topology as an expression of layers. Every layer + has a *parameter name*. If the users don't specify it explicitly, it's automatically generated as a unique name. By + specifying the parameter name, users can specify the sharing of + parameters between layers and even between topologies. + +1. `paddle.parameters.create` figures out parameters required by one + or more topologies from parameter names of layers. It creates these + parameters and returns a `ParameterSet` object, which is in essence + a map from *parameter names* to *parameters*. + +1. At training and inference time, `paddle.train` and `paddle.infer` + requires both a topology and the parameter set that holds the parameters of that topology. There are some reasons: + + 1. This prevents users from forgetting to call + `paddle.parameters.create`. + 1. `paddle.train` needs to know which parameter set to update. + 1. Users could load another (pre-trained) parameter set and use it + with a topology in `train.infer`. + +1. By specifying the `immutable_parameters` parameter of + `paddle.train`, we can forbid the update of these parameters. + + +## Reader + +Not all programming frameworks allow users to define I/O functions. +An example is Google MapReduce, which can only read from text, +SSTable, and RecordIO files. Hadoop MapReduce allows users to define +readers and writers by deriving from base classes `Reader` and +`Writer`. The former is less flexible but also less error-prone. We +decide to provide the flexibility to users to define their readers. + + +There are some open questions here: + +1. **Should a reader return a Python dictionary?** + +1. **How to map multiple outputs from a reader to multiple data layers?** + +1. **How to easily compose some existing readers to read more data and + feed a topology with more data layers?** + + +## Training + +The recommended way to training a model is to call `paddle.train`, +which simply calls `paddle.trainer.Default`, a global variable of +type `paddle.trainer.SGD`. Equivalently, we can do + +```python +opt = paddle.trainer.SGD(..., paddle.updater.Adam(...)) +opt.train(topology, parameters, reader=read, ...) +``` + +### Updater + +Please be aware that a trainer can accept an updater as its data +member, where an updater is a class derived from +`paddle.trainer.Updater`. This is to make it easier to customize +trainers, as discussed +[here](https://github.com/PaddlePaddle/Paddle/issues/1319). + +### Event Handler + +`paddle.train` and `paddle.trainer.XXX.train` take an optional +parameter `event_handler`, which should be either `None` or a function +that handle some events: + +1. BeginTraining +1. EndTraining +1. BeginIteration +1. EndIteration +1. BeginPass +1. EndPass + +where EndPass is sent if and only if the reader yields +`end_pass=True`. + +An example as follows: + +```python +def event_handler(event): + if ininstance(event, paddle.event.EndIteration): + print paddle.test(...) + +paddle.train(topology, parameters, reader, event_handler) +``` + +If we are writing a PaddlePaddle program in and for iPython/Jypyter, +we can use metaplotlib in the event handler to plot a curve of +cost/error versus iterations, as shown +[here](https://blog.dominodatalab.com/interactive-dashboards-in-jupyter/). + +### Distributed Training + +If users want to do distributed training on a cluster, s/he should +call `paddle.dist_train` and provides access tokens to the cluster as +a parameter. + +For example, if the user has a TLS certificate that allows him to +access a Kubernetes cluster, s/he should be able to call + +```python +paddle.dist_train(model, + trainer=paddle.trainer.SGD(..., + paddle.updater.Adam(...)), + reader=read, + k8s_user="yi", + k8s_token="kube_cluster_tls.pem", + k8s_job="hello", + num_parameter_servers=15) +``` + +The pseudo code if `paddle.dist_train` is as follows: + +```python +def dist_train(topology, parameters, trainer, reader, ...): + if os.getenv("KUBERNETES_SERVICE_HOST") == None: + image_name = k8s_user + '/' + k8s_job + docker_build(image_name) + docker_push() + kube_ctrl_start_job(image_name, k8s_user, k8s_token) + else: + rank = kube_list_containers_in_job_and_return_current_containers_rank() + if rank == 0: + master() + elif rank < 15: + parameter_server() + else: + trainer.train(model, reader=read) +``` + +Please be aware that if a process is running on the Kubernetes +cluster, it will have some environment variables pre-defined. + +If `dist_train` doesn't see these environment variables, it knows +that it's running on users' personal computer, and it should work as a +*launcher*. Otherwise, it knows that it's running on the cluster and +need to figure out its role as either the master, or a trainer, or a +parameter server. diff --git a/doc/design/reader/README.md b/doc/design/reader/README.md new file mode 100644 index 0000000000000000000000000000000000000000..17d52b9e20b8130688028092421f4b33f44763ac --- /dev/null +++ b/doc/design/reader/README.md @@ -0,0 +1,161 @@ +# Python Data Reader Design Doc + +At training and testing time, PaddlePaddle programs need to read data. To ease the users' work to write data reading code, we define that + +- A *reader* is a function that reads data (from file, network, random number generator, etc) and yields data items. +- A *reader creator* is a function that returns a reader function. +- A *reader* decorator is a function, which accepts one or more readers, and returns a reader. + +and provide frequently used reader creators and reader decorators. + +## Data Reader Interface + +Indeed, *data reader* doesn't have to be a function that reads and yields data items. It can be any function with no parameter that creates a iterable (anything can be used in `for x in iterable`): + +``` +iterable = data_reader() +``` + +Element produced from the iterable should be a **single** entry of data, **not** a mini batch. That entry of data could be a single item, or a tuple of items. Item should be of [supported type](http://www.paddlepaddle.org/doc/ui/data_provider/pydataprovider2.html?highlight=dense_vector#input-types) (e.g., numpy 1d array of float32, int, list of int) + +An example implementation for single item data reader creator: + +```python +def reader_creator_random_image(width, height): + def reader(): + while True: + yield numpy.random.uniform(-1, 1, size=width*height) + return reader +``` + +An example implementation for multiple item data reader creator: +```python +def reader_creator_random_imageand_label(widht, height, label): + def reader(): + while True: + yield numpy.random.uniform(-1, 1, size=width*height), label + return reader +``` + +## Usage + +data reader, mapping from item(s) read to data layer, batch size and number of total pass will be passed into `paddle.train`: + +```python +# two data layer is created: +image_layer = paddle.layer.data("image", ...) +label_layer = paddle.layer.data("label", ...) + +# ... + +paddle.train(paddle.dataset.mnist, {"image":0, "label":1}, 128, 10, ...) +``` + +## Data Reader Decorator + +*Data reader decorator* takes a single or multiple data reader, returns a new data reader. It is similar to a [python decorator](https://wiki.python.org/moin/PythonDecorators), but it does not use `@` syntax. + +Since we have a strict interface for data readers (no parameter, return a single data item). Data reader can be used flexiable via data reader decorators. Following are a few examples: + +### Prefetch Data + +Since reading data may take time and training can not proceed without data. It is generally a good idea to prefetch data. + +Use `paddle.reader.buffered` to prefetch data: + +```python +buffered_reader = paddle.reader.buffered(paddle.dataset.mnist, 100) +``` + +`buffered_reader` will try to buffer (prefetch) `100` data entries. + +### Compose Multiple Data Readers + +For example, we want to use a source of real images (reusing mnist dataset), and a source of random images as input for [Generative Adversarial Networks](https://arxiv.org/abs/1406.2661). + +We can do: + +```python +def reader_creator_random_image(width, height): + def reader(): + while True: + yield numpy.random.uniform(-1, 1, size=width*height) + return reader + +def reader_creator_bool(t): + def reader: + while True: + yield t + return reader + +true_reader = reader_creator_bool(True) +false_reader = reader_creator_bool(False) + +reader = paddle.reader.compose(paddle.dataset.mnist, data_reader_creator_random_image(20, 20), true_reader, false_reader) +# Skipped 1 because paddle.dataset.mnist produces two items per data entry. +# And we don't care second item at this time. +paddle.train(reader, {"true_image":0, "fake_image": 2, "true_label": 3, "false_label": 4}, ...) +``` + +### Shuffle + +Given shuffle buffer size `n`, `paddle.reader.shuffle` will return a data reader that buffers `n` data entries and shuffle them before a data entry is read. + +Example: +```python +reader = paddle.reader.shuffle(paddle.dataset.mnist, 512) +``` + +## Q & A + +### Why return only a single entry, but not a mini batch? + +If a mini batch is returned, data reader need to take care of batch size. But batch size is a concept for training, it makes more sense for user to specify batch size as a parameter for `train`. + +Practically, always return a single entry make reusing existing data readers much easier (e.g., if existing reader return not a single entry but 3 entries, training code will be more complex because it need to handle cases like batch size 2). + +### Why use a dictionary but not a list to provide mapping? + +We decided to use dictionary (`{"image":0, "label":1}`) instead of list (`["image", "label"]`) is because that user can easily resue item (e.g., using `{"image_a":0, "image_b":0, "label":1}`) or skip item (e.g., using `{"image_a":0, "label":2}`). + +### How to create custom data reader creator + +```python +def image_reader_creator(image_path, label_path, n): + def reader(): + f = open(image_path) + l = open(label_path) + images = numpy.fromfile( + f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32') + images = images / 255.0 * 2.0 - 1.0 + labels = numpy.fromfile(l, 'ubyte', count=n).astype("int") + for i in xrange(n): + yield images[i, :], labels[i] # a single entry of data is created each time + f.close() + l.close() + return reader + +# images_reader_creator creates a reader +reader = image_reader_creator("/path/to/image_file", "/path/to/label_file", 1024) +paddle.train(reader, {"image":0, "label":1}, ...) +``` + +### How is `paddle.train` implemented + +An example implementation of paddle.train could be: + +```python +def make_minibatch(reader, minibatch_size): + def ret(): + r = reader() + buf = [r.next() for x in xrange(minibatch_size)] + while len(buf) > 0: + yield buf + buf = [r.next() for x in xrange(minibatch_size)] + return ret + +def train(reader, mapping, batch_size, total_pass): + for pass_idx in range(total_pass): + for mini_batch in make_minibatch(reader): # this loop will never end in online learning. + do_forward_backward(mini_batch, mapping) +``` diff --git a/paddle/cuda/include/hl_matrix.h b/paddle/cuda/include/hl_matrix.h index 40828dd5cc76f4197e6cfbb1121f2eef2c1ac580..6f21b82afdc6cdde785fdd8f13eef17a0fdd6324 100644 --- a/paddle/cuda/include/hl_matrix.h +++ b/paddle/cuda/include/hl_matrix.h @@ -188,48 +188,6 @@ extern void hl_param_relu_backward_diff(real* grad_o, int width, int height, int partial_sum); -/** - * @brief cos sim forward - * - * @param[out] output output data - * @param[in] input1 input1 data(matrix) - * @param[in] input2 input2 data(matrix or vector) - * @param[in] width matrix width - * @param[in] input1_height input1_height - * @param[in] input2_height input2_height - * @param[in] scale scale factor - */ -extern void hl_cossim(real* output, - real* input1, - real* input2, - int width, - int input1_height, - int input2_height, - real scale); -/** - * @brief cos sim derivate - * - * @param[in] grad output grad - * @param[in] output output data - * @param[in] prevOutX input1 data - * @param[in] prevOutY input2 data - * @param[out] prevGradX input1 grad - * @param[out] prevGradY input2 grad - * @param[in] width matrix width - * @param[in] input1_height input1 height - * @param[in] input2_height input2 height - * @param[in] scale scale factor - */ -extern void hl_cossim_derivative(real* grad, - real* output, - real* prevOutX, - real* prevOutY, - real* prevGradX, - real* prevGradY, - int width, - int input1_height, - int input2_height, - real scale); /** * @brief Matrix addition: A_d[i][j] += scale * B_d[j/channel]. diff --git a/paddle/cuda/include/stub/hl_matrix_stub.h b/paddle/cuda/include/stub/hl_matrix_stub.h index a1712d1e4d2a5dc80526b7d7b5ad7bd4f5d8c1ed..f4e6461cdcf198637b2c96fee88d1de2766aaf18 100644 --- a/paddle/cuda/include/stub/hl_matrix_stub.h +++ b/paddle/cuda/include/stub/hl_matrix_stub.h @@ -74,25 +74,6 @@ inline void hl_param_relu_backward_diff(real* grad_o, int height, int partial_sum) {} -inline void hl_cossim(real* output, - real* input1, - real* input2, - int width, - int input1_height, - int input2_height, - real scale) {} - -inline void hl_cossim_derivative(real* grad, - real* output, - real* prevOutX, - real* prevOutY, - real* prevGradX, - real* prevGradY, - int width, - int input1_height, - int input2_height, - real scale) {} - inline void hl_matrix_add_shared_bias(real* A_d, real* B_d, const int channel, diff --git a/paddle/cuda/src/hl_cuda_matrix.cu b/paddle/cuda/src/hl_cuda_matrix.cu index cd23bd31057c5c8cd10173bc5fa5fa67f2d0e422..96c07d9c3b7a37daa9198fd7ea66b7d811600348 100644 --- a/paddle/cuda/src/hl_cuda_matrix.cu +++ b/paddle/cuda/src/hl_cuda_matrix.cu @@ -584,177 +584,6 @@ void hl_param_relu_backward_diff(real* grad_o, CHECK_SYNC("hl_param_relu_backward_diff failed"); } -template -__global__ void KeCosSim(real* output, - real* input1, - real* input2, - int width, - int input1_height, - int input2_height, - real scale) { - const int ty = blockIdx.y; - int tid = threadIdx.x; - - __shared__ real xx[blockSize]; - __shared__ real yy[blockSize]; - __shared__ real xy[blockSize]; - - xx[tid] = 0.0; - yy[tid] = 0.0; - xy[tid] = 0.0; - __syncthreads(); - - input1 += ty * width; - if (input2_height > 1) { - input2 += ty * width; - } - for (int index = tid; index < width; index += blockSize) { - real x = input1[index]; - real y = input2[index]; - xx[tid] += x * x; - yy[tid] += y * y; - xy[tid] += x * y; - } - __syncthreads(); - - for (int s = blockSize / 2; s > 0; s >>= 1) { - if (tid < s) { - xx[tid] += xx[tid + s]; - yy[tid] += yy[tid + s]; - xy[tid] += xy[tid + s]; - } - __syncthreads(); - } - if (tid == 0) { - output[ty] = scale * xy[0] / (sqrt(xx[0]) * sqrt(yy[0])); - } -} - -void hl_cossim(real* output, - real* input1, - real* input2, - int width, - int input1_height, - int input2_height, - real scale) { - CHECK_NOTNULL(output); - CHECK_NOTNULL(input1); - CHECK_NOTNULL(input2); - const int blockSize = 256; - dim3 threads(blockSize, 1); - dim3 grid(1, input1_height); - - KeCosSim<<>> - (output, input1, input2, width, input1_height, input2_height, scale); - CHECK_SYNC("hl_cossim failed"); -} - -template -__global__ void KeCosSimDerivative(real* grad, - real* output, - real* prevOutX, - real* prevOutY, - real* prevGradX, - real* prevGradY, - int width, - int input1_height, - int input2_height, - real scale) { - const int ty = blockIdx.y; - int tid = threadIdx.x; - - __shared__ real xx[blockSize]; - __shared__ real yy[blockSize]; - __shared__ real xy[blockSize]; - - xx[tid] = 0.0; - yy[tid] = 0.0; - xy[tid] = 0.0; - __syncthreads(); - - prevOutX += ty * width; - prevGradX += ty * width; - if (input2_height > 1) { - prevOutY += ty * width; - prevGradY += ty * width; - } - for (int index = tid; index < width; index += blockSize) { - real x = prevOutX[index]; - real y = prevOutY[index]; - xx[tid] += x * x; - yy[tid] += y * y; - xy[tid] += x * y; - } - __syncthreads(); - - for (int s = blockSize / 2; s > 0; s >>= 1) { - if (tid < s) { - xx[tid] += xx[tid + s]; - yy[tid] += yy[tid + s]; - xy[tid] += xy[tid + s]; - } - __syncthreads(); - } - if (xy[0] == 0) { - real reciprocal = 1.0 / (sqrt(xx[0]) * sqrt(yy[0])); - for (int index = tid; index < width; index += blockSize) { - prevGradX[index] += - scale * grad[ty] * prevOutY[index] * reciprocal; - if (input2_height > 1) { - prevGradY[index] += - scale * grad[ty] * prevOutX[index] * reciprocal; - } else { - paddle::paddleAtomicAdd(prevGradY + index, - scale * grad[ty] * prevOutX[index] * reciprocal); - } - } - } else { - real reciprocalXY = 1.0 / xy[0]; - real reciprocalSquareSumX = 1.0 / xx[0]; - real reciprocalSquareSumY = 1.0 / yy[0]; - for (int index = tid; index < width; index += blockSize) { - prevGradX[index] += output[ty] * grad[ty] * - (prevOutY[index] * reciprocalXY - - prevOutX[index] * reciprocalSquareSumX); - if (input2_height > 1) { - prevGradY[index] += output[ty] * grad[ty] * - (prevOutX[index] * reciprocalXY - - prevOutY[index] * reciprocalSquareSumY); - } else { - paddle::paddleAtomicAdd(prevGradY + index, output[ty] * grad[ty] * - (prevOutX[index] * reciprocalXY - - prevOutY[index] * reciprocalSquareSumY)); - } - } - } -} - - -void hl_cossim_derivative(real* grad, - real* output, - real* prevOutX, - real* prevOutY, - real* prevGradX, - real* prevGradY, - int width, - int input1_height, - int input2_height, - real scale) { - CHECK_NOTNULL(grad); - CHECK_NOTNULL(output); - CHECK_NOTNULL(prevOutX); - CHECK_NOTNULL(prevOutY); - CHECK_NOTNULL(prevGradX); - CHECK_NOTNULL(prevGradY); - const int blockSize = 256; - dim3 threads(blockSize, 1); - dim3 grid(1, input1_height); - KeCosSimDerivative<<>> - (grad, output, prevOutX, prevOutY, prevGradX, prevGradY, width, - input1_height, input2_height, scale); - CHECK_SYNC("hl_cossim_derivate failed"); -} - __global__ void KeMatrixAddSharedBias(real* A, real* B, const int channel, diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index 349b21e7e64064804c5d0ee26e82698925832c35..0dc7792f646457c22ee4791f18814afaa3809f7b 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -190,7 +190,7 @@ public: : BufferArg(VALUE_TYPE_INT32, shape, argType) { bufferType_ = TENSOR_SEQUENCE_ID; CHECK_EQ(shape_.ndims(), 1UL); - CHECK_GT(shape_[0], 1UL); + CHECK_GE(shape_[0], 1UL); numSeqs_ = shape_[0] - 1; } @@ -226,7 +226,8 @@ public: SequenceArg(ValueType valueType, const TensorShape& shape, ArgType argType = UNSPECIFIED) - : BufferArg(valueType, shape, argType), startPositions_(TensorShape()) { + : BufferArg(valueType, shape, argType), + startPositions_(TensorShape({shape[0]})) { bufferType_ = TENSOR_SEQUENCE_DATA; } diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index fae3b7b20a70b56dc44ea2df637281afe01a7e5a..1522510e8bb9816cb468fcf406e22560163950cc 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -27,6 +27,7 @@ if(WITH_TESTING) add_simple_unittest(ContextProjectionOpTest) add_simple_unittest(PadOpTest) add_simple_unittest(MulOpTest) + add_simple_unittest(CosSimOpTest) endif() endif() diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index 6cd4e4abee8fccf3a4745b0bfc6701df4ddfa5c0..b87750b74247bd0eb822340bc5a85d41b86ecec2 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -108,26 +108,23 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK(1 == inputs.size() || 2 == inputs.size()); - CHECK_EQ((size_t)1, outputs.size()); + CHECK(1UL == inputs.size() || 2UL == inputs.size()); + CHECK_EQ(1UL, outputs.size()); CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) << "SequenceArg required here"; const auto val_seqs = dynamic_cast(inputs[0]); auto out_seq = dynamic_cast(outputs[0]); CHECK(out_seq.data() && val_seqs.data() && val_seqs.getSequenceId().data()); - CHECK_EQ(out_seq.shape().ndims(), (size_t)2); - CHECK_EQ(val_seqs.shape().ndims(), (size_t)2); - CHECK_EQ(val_seqs.getSequenceId().shape().ndims(), (size_t)1); - if (2 == inputs.size()) { - CHECK_EQ(inputs[1].shape().ndims(), (size_t)2); - } + CHECK_EQ(out_seq.shape().ndims(), 2UL); + CHECK_EQ(val_seqs.shape().ndims(), 2UL); /// dim of output = dim of input * context_length CHECK_EQ(out_seq.shape()[1], val_seqs.shape()[1] * context_length_); /// input and output has the same batch_size CHECK_EQ(val_seqs.shape()[0], out_seq.shape()[0]); - /// dim of input == dim of weight - if (2 == inputs.size()) { + if (2UL == inputs.size()) { + CHECK_EQ(inputs[1].shape().ndims(), 2UL); + /// dim of input == dim of weight CHECK_EQ(val_seqs.shape()[1], inputs[1].shape()[1]); } @@ -135,10 +132,11 @@ public: auto out_mat = out_seq.matrix(); const auto in_mat = val_seqs.matrix(); const auto w_mat = - (2 == inputs.size()) + (2UL == inputs.size() && inputs[1].data()) ? inputs[1].matrix() : typename Tensor::Matrix(nullptr, 0, 0); const auto seq_vec = val_seqs.getSequenceId().vector(); + ContextProjectionForward(out_mat, in_mat, w_mat, @@ -235,36 +233,40 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ((size_t)1, inputs.size()); - CHECK_EQ((size_t)2, outputs.size()); + CHECK_EQ(1UL, inputs.size()); + CHECK(1UL == outputs.size() || 2UL == outputs.size()); CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) << "SequenceArg required here"; const auto in_seq = dynamic_cast(inputs[0]); auto out_seq = dynamic_cast(outputs[0]); CHECK(in_seq.data() && in_seq.getSequenceId().data()); - CHECK_EQ(in_seq.shape().ndims(), (size_t)2); - CHECK_EQ(in_seq.getSequenceId().shape().ndims(), (size_t)1); - CHECK_EQ(out_seq.shape().ndims(), (size_t)2); - CHECK_EQ(out_seq.getSequenceId().shape().ndims(), (size_t)1); - CHECK_EQ(outputs[1].shape().ndims(), (size_t)2); + CHECK_EQ(in_seq.shape().ndims(), 2UL); + CHECK_EQ(out_seq.shape().ndims(), 2UL); + CHECK_EQ(out_seq.getSequenceId().shape().ndims(), 1UL); - /// dim of input grad == dim of weight - CHECK_EQ(out_seq.shape()[1], outputs[1].shape()[1]); /// input and output grad has the same batch_size CHECK_EQ(out_seq.shape()[0], in_seq.shape()[0]); /// dim of output grad = dim of input grad * context_length CHECK_EQ(in_seq.shape()[1], out_seq.shape()[1] * context_length_); CHECK_EQ(out_seq.getArgType(), ADD_TO); - CHECK_EQ(outputs[1].getArgType(), ADD_TO); + + if (2UL == outputs.size()) { + CHECK_EQ(outputs[1].shape().ndims(), 2UL); + /// dim of input grad == dim of weight + CHECK_EQ(out_seq.shape()[1], outputs[1].shape()[1]); + CHECK_EQ(outputs[1].getArgType(), ADD_TO); + } const auto seq_vec = in_seq.getSequenceId().vector(); const auto out_grad_mat = in_seq.matrix(); auto in_grad_mat = !out_seq.data() ? typename Tensor::Matrix(nullptr, 0, 0) : out_seq.matrix(); - auto w_grad_mat = !outputs[1].data() - ? typename Tensor::Matrix(nullptr, 0, 0) - : outputs[1].matrix(); + auto w_grad_mat = + (2UL == outputs.size() && outputs[1].data()) + ? outputs[1].matrix() + : typename Tensor::Matrix(nullptr, 0, 0); + ContextProjectionBackward(out_grad_mat, in_grad_mat, w_grad_mat, @@ -304,17 +306,17 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1, static_cast(inputs.size())); - CHECK_EQ(1, static_cast(outputs.size())); + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg()) << "SequenceArg required here"; const auto in_seq = dynamic_cast(inputs[0]); const auto out_seq = dynamic_cast(outputs[0]); CHECK(in_seq.data() && out_seq.data() && in_seq.getSequenceId().data()); - CHECK_EQ(static_cast(out_seq.shape().ndims()), 2); - CHECK_EQ(static_cast(in_seq.shape().ndims()), 2); - CHECK_EQ(static_cast(in_seq.getSequenceId().shape().ndims()), 1); + CHECK_EQ(out_seq.shape().ndims(), 2UL); + CHECK_EQ(in_seq.shape().ndims(), 2UL); + CHECK_EQ(in_seq.getSequenceId().shape().ndims(), 1UL); /// output layer grad dim == input layer grad dim * context_length_ CHECK_EQ(in_seq.shape().ndims(), out_seq.shape().ndims() * context_length_); /// input and output has the same batch_size @@ -355,14 +357,14 @@ public: } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(1, static_cast(inputs.size())); - CHECK_EQ(1, static_cast(outputs.size())); + CHECK_EQ(1UL, inputs.size()); + CHECK_EQ(1UL, outputs.size()); CHECK(inputs[0].isSequenceArg()) << "SequenceArg required here"; const auto in_seq = dynamic_cast(inputs[0]); CHECK(in_seq.data() && in_seq.getSequenceId().data() && outputs[0].data()); - CHECK_EQ(static_cast(outputs[0].shape().ndims()), 2); - CHECK_EQ(static_cast(in_seq.shape().ndims()), 2); - CHECK_EQ(static_cast(in_seq.getSequenceId().shape().ndims()), 1); + CHECK_EQ(outputs[0].shape().ndims(), 2UL); + CHECK_EQ(in_seq.shape().ndims(), 2UL); + CHECK_EQ(in_seq.getSequenceId().shape().ndims(), 1UL); CHECK_EQ(in_seq.shape()[0], outputs[0].shape()[0]); /// output layer grad dim == weight dim * context_length_ CHECK_EQ(in_seq.shape()[1], outputs[0].shape()[1] * context_length_); diff --git a/paddle/function/ContextProjectionOp.h b/paddle/function/ContextProjectionOp.h index 2bdd47e4e9b02483c2c5af82bf00c4e55d68f93e..6f7d936379a5378e6fd85dd86618d1b6094bd14f 100644 --- a/paddle/function/ContextProjectionOp.h +++ b/paddle/function/ContextProjectionOp.h @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once - #include "Function.h" namespace paddle { diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/function/ContextProjectionOpTest.cpp index c9db2ff8008e0bb0fa04370fb7b3ecd7641d2062..0f5d6a848d406d14984a0b6edad8192dab42e88b 100644 --- a/paddle/function/ContextProjectionOpTest.cpp +++ b/paddle/function/ContextProjectionOpTest.cpp @@ -28,55 +28,26 @@ void testMatrixProjectionForward(int context_start, std::max(0, (int)(context_start + context_length - 1)); if (pad == 0) is_padding = false; - FunctionCompare compare("ContextProjectionForward", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", std::max(0, -context_start))); - - CpuMatrix cpu_in(batch_size, input_dim); - cpu_in.randomizeUniform(); - GpuMatrix gpu_in(batch_size, input_dim); - gpu_in.copyFrom(cpu_in); - auto cpu_weight = - is_padding ? std::make_shared(pad, input_dim) : nullptr; - auto gpu_weight = - is_padding ? std::make_shared(pad, input_dim) : nullptr; - if (is_padding) { - cpu_weight->randomizeUniform(); - gpu_weight->copyFrom(*cpu_weight); + FunctionCompare test("ContextProjectionForward", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", std::max(0, -context_start))); + + // prepare input arguments + test.addSequence(SequenceIdArg(TensorShape{batch_size})); + test.addInputs( + SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim})); + if (is_padding) { // weight + test.addInputs(SequenceArg(VALUE_TYPE_FLOAT, TensorShape{pad, input_dim})); } - IVectorPtr cpu_seq; - generateSequenceStartPositions(batch_size, cpu_seq); - IVectorPtr gpu_seq = IVector::create(cpu_seq->getSize(), true); - gpu_seq->copyFrom(*cpu_seq); - - CpuMatrix cpu_out(batch_size, input_dim * context_length); - GpuMatrix gpu_out(batch_size, input_dim * context_length); - cpu_out.randomizeUniform(); - gpu_out.copyFrom(cpu_out); - - BufferArgs cpu_inputs; - BufferArgs cpu_outputs; - cpu_inputs.addArg(cpu_in, *cpu_seq); - if (cpu_weight) { - cpu_inputs.addArg(*cpu_weight, *cpu_seq); - } - cpu_outputs.addArg(cpu_out, *cpu_seq, ADD_TO); - - compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs); + test.addOutputs( + SequenceArg(VALUE_TYPE_FLOAT, + TensorShape{batch_size, input_dim * context_length}), + ADD_TO); - BufferArgs gpu_inputs; - BufferArgs gpu_outputs; - gpu_inputs.addArg(gpu_in, *gpu_seq); - if (gpu_weight) { - gpu_inputs.addArg(*gpu_weight, *gpu_seq); - } - gpu_outputs.addArg(gpu_out, *gpu_seq, ADD_TO); - - compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs); - - autotest::TensorCheckEqual(cpu_out, gpu_out); + // run Function + test.run(); } void testMatrixProjectionBackward(int context_start, @@ -88,63 +59,31 @@ void testMatrixProjectionBackward(int context_start, std::max(0, (int)(context_start + context_length - 1)); if (pad == 0) is_padding = false; - FunctionCompare compare("ContextProjectionBackward", - FuncConfig() - .set("context_length", context_length) - .set("context_start", context_start) - .set("begin_pad", std::max(0, -context_start)) - .set("is_padding", is_padding) - .set("total_pad", pad)); - - CpuMatrix cpu_in_grad(batch_size, input_dim); - cpu_in_grad.randomizeUniform(); - GpuMatrix gpu_in_grad(batch_size, input_dim); - gpu_in_grad.copyFrom(cpu_in_grad); - - CpuMatrix cpu_out_grad(batch_size, input_dim * context_length); - cpu_out_grad.randomizeUniform(); - GpuMatrix gpu_out_grad(batch_size, input_dim * context_length); - gpu_out_grad.copyFrom(cpu_out_grad); - - IVectorPtr cpu_seq; - generateSequenceStartPositions(batch_size, cpu_seq); - IVectorPtr gpu_seq = IVector::create(cpu_seq->getSize(), true); - gpu_seq->copyFrom(*cpu_seq); - - auto cpu_w_grad = - is_padding ? std::make_shared(pad, input_dim) : nullptr; - auto gpu_w_grad = - is_padding ? std::make_shared(pad, input_dim) : nullptr; - if (is_padding) { - cpu_w_grad->randomizeUniform(); - gpu_w_grad->copyFrom(*cpu_w_grad); + FunctionCompare test("ContextProjectionBackward", + FuncConfig() + .set("context_length", context_length) + .set("context_start", context_start) + .set("begin_pad", std::max(0, -context_start)) + .set("is_padding", is_padding) + .set("total_pad", pad)); + + // prepare input arguments + test.addSequence(SequenceIdArg(TensorShape{batch_size})); + test.addInputs(SequenceArg( + VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim * context_length})); + test.addOutputs( + SequenceArg(VALUE_TYPE_FLOAT, TensorShape{batch_size, input_dim}), + ADD_TO); + if (is_padding) { // weight + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{pad, input_dim}), + ADD_TO); } - BufferArgs cpu_inputs; - BufferArgs cpu_outputs; - cpu_inputs.addArg(cpu_out_grad, *cpu_seq); - cpu_outputs.addArg(cpu_in_grad, *cpu_seq, ADD_TO); - cpu_outputs.addArg( - cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO); - - compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs); - - BufferArgs gpu_inputs; - BufferArgs gpu_outputs; - gpu_inputs.addArg(gpu_out_grad, *gpu_seq); - gpu_outputs.addArg(gpu_in_grad, *gpu_seq, ADD_TO); - gpu_outputs.addArg( - gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO); - - compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs); - - autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad); - if (is_padding) { - autotest::TensorCheckErr(*cpu_w_grad, *gpu_w_grad); - } + // run Function + test.run(); } -TEST(ContextProjection, projection) { +TEST(ContextProjection, Projection) { for (auto context_start : {-5, -3, -1, 0, 3}) { for (auto context_length : {1, 2, 5, 7}) { for (auto trainable_padding : {false, true}) { diff --git a/paddle/function/CosSimOp.cpp b/paddle/function/CosSimOp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7ece7b2dfedaf460741c97b5a700eb632d85cabc --- /dev/null +++ b/paddle/function/CosSimOp.cpp @@ -0,0 +1,240 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "CosSimOp.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/Vector.h" + +namespace paddle { +/** + * Cosine Similarity for CpuMatrix + * + * \param out_mat, output value, size: nSamples * 1. + * \param in1_mat, input value 1, size: nSamples * dim. + * \param in2_mat, input value 2, size: n2 * dim (n2 == 1 or n2 == nSamples). + * \param scale, default 1.0 + * + */ +template <> +void CosSimForward(CpuMatrix& out_mat, + const CpuMatrix& in1_mat, + const CpuMatrix& in2_mat, + real scale) { + CHECK(out_mat.getData() && in1_mat.getData() && in2_mat.getData()); + size_t num_samples = out_mat.getHeight(); + size_t dim = in1_mat.getWidth(); + /// column vector [nSamples, 1] + real* out = out_mat.getData(); + const real* x = in1_mat.getData(); + const real* y = in2_mat.getData(); + + /// in2 might only have one row or full rows + CHECK(in2_mat.getHeight() == 1LU || in2_mat.getHeight() == num_samples); + size_t inc = (in2_mat.getHeight() == 1LU) ? 0 : dim; + for (size_t i = 0; i < num_samples; ++i, x += dim, y += inc) { + real square_sum_x = 0; + real square_sum_y = 0; + real xy = 0; + for (size_t j = 0; j < dim; ++j) { + square_sum_x += x[j] * x[j]; + square_sum_y += y[j] * y[j]; + xy += x[j] * y[j]; + } + CHECK(square_sum_x > 0 && square_sum_y > 0); + out[i] = scale * xy / (std::sqrt(square_sum_x) * std::sqrt(square_sum_y)); + } +} + +/** + * Cosine Similarity + * for each row i, + * out[i] = scale * cos(input1[i], input2[i]) + * = scale * /sqrt(|input1[i]|^2 * |input2[i]|^2) + * when input2 only has one row, then for each row i, + * out[i] = cos(input1[i], input2[0]) + * + * \param inputs[0] input matrix 1, size: nSamples * dim. + * \param inputs[1] input matrix 2, size: n2 * dim (n2 == 1 or n2 == nSamples). + * \param outputs[0] output matrix, size : nSamples * 1. + */ + +template +class CosSimForwardFunc : public FunctionBase { + void init(const FuncConfig& config) override { + scale_ = config.get("scale"); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(inputs.size(), 2UL); + CHECK_EQ(outputs.size(), 1UL); + + CHECK_EQ(inputs[0].shape().ndims(), 2UL); + CHECK_EQ(inputs[1].shape().ndims(), 2UL); + CHECK_EQ(outputs[0].shape().ndims(), 2UL); + + CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]); + CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]); + CHECK_EQ(outputs[0].shape()[1], 1UL); + + CHECK(outputs[0].data() && inputs[0].data() && inputs[1].data()); + + CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + auto out_mat = outputs[0].matrix(); + const auto in1_mat = inputs[0].matrix(); + const auto in2_mat = inputs[1].matrix(); + + CosSimForward(out_mat, in1_mat, in2_mat, scale_); + } + +private: + real scale_; +}; + +/** + * Cosine Similarity Derivative for CpuMatrix + * + * \param in1_grad forward input grad 1, size: nSamples * dim. + * \param in2_grad forward input grad 2, + * size: n2 * dim (n2 == 1 or n2 == nSamples). + * + * \param out_grad backward loss output grad, size : nSamples * 1. + * \param out_val forward output value, size: nSamples * 1. + * \param in1_val forward input value 1, size: nSamples * dim. + * \param in2_val forward input value 2, + * size: n2 * dim (n2 == 1 or n2 == nSamples). + * \param scale, default 1.0 + */ +template <> +void CosSimBackward(const CpuMatrix& out_grad, + const CpuMatrix& out_val, + const CpuMatrix& in1_val, + const CpuMatrix& in2_val, + CpuMatrix& in1_grad, + CpuMatrix& in2_grad, + real scale) { + CHECK(out_grad.getData() && out_val.getData() && in1_val.getData() && + in2_val.getData() && in1_grad.getData() && in2_grad.getData()); + CHECK_EQ(out_val.useGpu_, false) << "Matrix type are GPU, CPU required"; + + const real* grad = out_grad.getData(); + const real* out = out_val.getData(); + const real* prev_out_x = in1_val.getData(); + const real* prev_out_y = in2_val.getData(); + real* prev_grad_x = in1_grad.getData(); + real* prev_grad_y = in2_grad.getData(); + + size_t num_samples = out_grad.getHeight(); + size_t dim = in1_val.getWidth(); + CHECK_EQ(in2_val.getHeight(), in2_grad.getHeight()); + CHECK(in2_val.getHeight() == 1LU || in2_val.getHeight() == num_samples); + size_t inc = (in2_val.getHeight() == 1LU) ? 0 : dim; + for (size_t i = 0; i < num_samples; ++i, + prev_out_x += dim, + prev_out_y += inc, + prev_grad_x += dim, + prev_grad_y += inc) { + real square_sum_x = 0; + real square_sum_y = 0; + real xy = 0; + for (size_t j = 0; j < dim; ++j) { + square_sum_x += prev_out_x[j] * prev_out_x[j]; + square_sum_y += prev_out_y[j] * prev_out_y[j]; + xy += prev_out_x[j] * prev_out_y[j]; + } + CHECK(square_sum_x > 0 && square_sum_y > 0); + if (xy == 0) { + real reciprocal = + 1.0f / (std::sqrt(square_sum_x) * std::sqrt(square_sum_y)); + for (size_t j = 0; j < dim; ++j) { + prev_grad_x[j] += scale * grad[i] * prev_out_y[j] * reciprocal; + prev_grad_y[j] += scale * grad[i] * prev_out_x[j] * reciprocal; + } + } else { + real reciprocal_xy = 1.0f / xy; + real reciprocal_square_sum_x = 1.0f / square_sum_x; + real reciprocal_square_sum_y = 1.0f / square_sum_y; + for (size_t j = 0; j < dim; ++j) { + prev_grad_x[j] += + out[i] * grad[i] * (prev_out_y[j] * reciprocal_xy - + prev_out_x[j] * reciprocal_square_sum_x); + prev_grad_y[j] += + out[i] * grad[i] * (prev_out_x[j] * reciprocal_xy - + prev_out_y[j] * reciprocal_square_sum_y); + } + } + } +} + +/** + * Cosine Similarity backward Derivative + * + * \param outputs[0] forward input grad 1, size: nSamples * dim. + * \param outputs[1] forward input grad 2, + * size: n2 * dim (n2 == 1 or n2 == nSamples). + * + * \param inputs[0] backward loss output grad, size : nSamples * 1. + * \param inputs[1] forward output value, size: nSamples * 1. + * \param inputs[2] forward input value 1, size: nSamples * dim. + * \param inputs[3] forward input value 2, + * size: n2 * dim (n2 == 1 or n2 == nSamples). + */ +template +class CosSimBackwardFunc : public FunctionBase { + void init(const FuncConfig& config) override { + scale_ = config.get("scale"); + } + + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { + CHECK_EQ(inputs.size(), 4UL); + CHECK_EQ(outputs.size(), 2UL); + /// dim of out_grad and out_val == 1, column vector + CHECK_EQ(inputs[0].shape()[1], 1UL); + CHECK_EQ(inputs[1].shape()[1], 1UL); + /// nSamples of out_grad == out_val == in_val1 == in_grad1 + CHECK_EQ(inputs[1].shape()[0], inputs[0].shape()[0]); + CHECK_EQ(inputs[0].shape()[0], inputs[0].shape()[0]); + CHECK_EQ(outputs[0].shape()[0], inputs[0].shape()[0]); + /// dim of in1_val1 == in_val2 == in_grad1 == in_grad2 + CHECK_EQ(inputs[3].shape()[1], inputs[2].shape()[1]); + CHECK_EQ(outputs[0].shape()[1], inputs[2].shape()[1]); + CHECK_EQ(outputs[1].shape()[1], inputs[2].shape()[1]); + + CHECK(inputs[0].data() && inputs[1].data() && inputs[2].data() && + inputs[3].data() && outputs[0].data() && outputs[1].data()); + + CHECK_EQ(outputs[0].getArgType(), ADD_TO); + CHECK_EQ(outputs[1].getArgType(), ADD_TO); + + const auto out_grad = inputs[0].matrix(); + const auto out_val = inputs[1].matrix(); + const auto in1_val = inputs[2].matrix(); + const auto in2_val = inputs[3].matrix(); + auto in1_grad = outputs[0].matrix(); + auto in2_grad = outputs[1].matrix(); + + CosSimBackward( + out_grad, out_val, in1_val, in2_val, in1_grad, in2_grad, scale_); + } + +private: + real scale_; +}; + +REGISTER_TYPED_FUNC(CosSimForward, CPU, CosSimForwardFunc); +REGISTER_TYPED_FUNC(CosSimBackward, CPU, CosSimBackwardFunc); +#ifndef PADDLE_ONLY_CPU +REGISTER_TYPED_FUNC(CosSimForward, GPU, CosSimForwardFunc); +REGISTER_TYPED_FUNC(CosSimBackward, GPU, CosSimBackwardFunc); +#endif +} // namespace paddle diff --git a/paddle/function/CosSimOp.h b/paddle/function/CosSimOp.h new file mode 100644 index 0000000000000000000000000000000000000000..be73064e6375bf1e6c6a7ca6de52e9b9b755880b --- /dev/null +++ b/paddle/function/CosSimOp.h @@ -0,0 +1,61 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Function.h" + +namespace paddle { + +/** + * \brief Cosine Similarity Forward. + * for each row i, + * out[i] = scale * cos(in1[i], in2[i]) + * = scale * \sum_j (in1[i][j] * in2[i][j]) / + * sqrt(sum_j (in1[i][j]^2) * sum_j (in2[i][j])^2) + * + * \param[out] output output value. + * \param[in] intput1 input value. + * \param[in] intput2 input value. + * \param[in] scale default 1.0. + * + */ +template +void CosSimForward(typename Tensor::Matrix& output, + const typename Tensor::Matrix& input1, + const typename Tensor::Matrix& input2, + real scale); + +/** + * \brief Cosine Similarity BackWard for Derivative. + * + * \param[in] output grad backward loss output grad. + * \param[in] output val forward-output value. + * \param[in] input val1 forward input value 1. + * \param[in] input val2 forward input value 2. + * \param[in/out] input grad forward input grad 1. + * \param[in/out] input grad forward input grad 2. + * \param[in] scale default 1.0. + * + */ +template +void CosSimBackward(const typename Tensor::Matrix& out_grad, + const typename Tensor::Matrix& out_value, + const typename Tensor::Matrix& in1_value, + const typename Tensor::Matrix& in2_value, + typename Tensor::Matrix& in1_grad, + typename Tensor::Matrix& in2_grad, + real scale); + +} // namespace paddle diff --git a/paddle/function/CosSimOpGpu.cu b/paddle/function/CosSimOpGpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..1dd733674fa0542c76070955ec63e008b083c7d2 --- /dev/null +++ b/paddle/function/CosSimOpGpu.cu @@ -0,0 +1,241 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "hl_base.h" +#include "hl_device_functions.cuh" +#include "CosSimOp.h" + +namespace paddle { + +template +__global__ void KeCosSim(real* output, + const real* input1, + const real* input2, + int width, + int input1_height, + int input2_height, + real scale) { + const int ty = blockIdx.y; + int tid = threadIdx.x; + + __shared__ real xx[block_size]; + __shared__ real yy[block_size]; + __shared__ real xy[block_size]; + + xx[tid] = 0.0; + yy[tid] = 0.0; + xy[tid] = 0.0; + __syncthreads(); + + input1 += ty * width; + if (input2_height > 1) { + input2 += ty * width; + } + for (int index = tid; index < width; index += block_size) { + real x = input1[index]; + real y = input2[index]; + xx[tid] += x * x; + yy[tid] += y * y; + xy[tid] += x * y; + } + __syncthreads(); + + for (int s = block_size / 2; s > 0; s >>= 1) { + if (tid < s) { + xx[tid] += xx[tid + s]; + yy[tid] += yy[tid + s]; + xy[tid] += xy[tid + s]; + } + __syncthreads(); + } + if (tid == 0) { + output[ty] = scale * xy[0] / (sqrt(xx[0]) * sqrt(yy[0])); + } +} + +void hlCossim(real* output, + const real* input1, + const real* input2, + size_t width, + size_t input1_height, + size_t input2_height, + real scale) { + CHECK_NOTNULL(output); + CHECK_NOTNULL(input1); + CHECK_NOTNULL(input2); + const int block_size = 256; + dim3 threads(block_size, 1); + dim3 grid(1, input1_height); + + KeCosSim<<>> + (output, input1, input2, width, input1_height, input2_height, scale); + CHECK_SYNC("hlCossim failed"); +} + +template <> +void CosSimForward(GpuMatrix& out_mat, + const GpuMatrix& in1_mat, + const GpuMatrix& in2_mat, + real scale) { + CHECK(out_mat.getData() && in1_mat.getData() && in2_mat.getData()); + CHECK(in1_mat.useGpu_ == true && in2_mat.useGpu_ == true) + << "Matrix type are not GPU"; + + size_t num_samples = out_mat.getHeight(); + size_t dim = in1_mat.getWidth(); + real* out = out_mat.getData(); + const real* x = in1_mat.getData(); + const real* y = in2_mat.getData(); + hlCossim(out, x, y, dim, in1_mat.getHeight(), in2_mat.getHeight(), scale); +} + +template +__global__ void KeCosSimDerivative(const real* grad, + const real* output, + const real* prev_out_x, + const real* prev_out_y, + real* prev_grad_x, + real* prev_grad_y, + size_t width, + size_t input1_height, + size_t input2_height, + real scale) { + const int ty = blockIdx.y; + int tid = threadIdx.x; + + __shared__ real xx[block_size]; + __shared__ real yy[block_size]; + __shared__ real xy[block_size]; + + xx[tid] = 0.0; + yy[tid] = 0.0; + xy[tid] = 0.0; + __syncthreads(); + + prev_out_x += ty * width; + prev_grad_x += ty * width; + if (input2_height > 1) { + prev_out_y += ty * width; + prev_grad_y += ty * width; + } + for (int index = tid; index < width; index += block_size) { + real x = prev_out_x[index]; + real y = prev_out_y[index]; + xx[tid] += x * x; + yy[tid] += y * y; + xy[tid] += x * y; + } + __syncthreads(); + + for (int s = block_size / 2; s > 0; s >>= 1) { + if (tid < s) { + xx[tid] += xx[tid + s]; + yy[tid] += yy[tid + s]; + xy[tid] += xy[tid + s]; + } + __syncthreads(); + } + if (xy[0] == 0) { + real reciprocal = 1.0 / (sqrt(xx[0]) * sqrt(yy[0])); + for (int index = tid; index < width; index += block_size) { + prev_grad_x[index] += + scale * grad[ty] * prev_out_y[index] * reciprocal; + if (input2_height > 1) { + prev_grad_y[index] += + scale * grad[ty] * prev_out_x[index] * reciprocal; + } else { + paddle::paddleAtomicAdd(prev_grad_y + index, + scale * grad[ty] * prev_out_x[index] * reciprocal); + } + } + } else { + real reciprocalXY = 1.0 / xy[0]; + real reciprocalSquareSumX = 1.0 / xx[0]; + real reciprocalSquareSumY = 1.0 / yy[0]; + for (int index = tid; index < width; index += block_size) { + prev_grad_x[index] += output[ty] * grad[ty] * + (prev_out_y[index] * reciprocalXY - + prev_out_x[index] * reciprocalSquareSumX); + if (input2_height > 1) { + prev_grad_y[index] += output[ty] * grad[ty] * + (prev_out_x[index] * reciprocalXY - + prev_out_y[index] * reciprocalSquareSumY); + } else { + paddle::paddleAtomicAdd(prev_grad_y + index, output[ty] * grad[ty] * + (prev_out_x[index] * reciprocalXY - + prev_out_y[index] * reciprocalSquareSumY)); + } + } + } +} + +void hlCossimDerivative(const real* grad, + const real* output, + const real* prev_out_x, + const real* prev_out_y, + real* prev_grad_x, + real* prev_grad_y, + size_t width, + size_t input1_height, + size_t input2_height, + real scale) { + CHECK_NOTNULL(grad); + CHECK_NOTNULL(output); + CHECK_NOTNULL(prev_out_x); + CHECK_NOTNULL(prev_out_y); + CHECK_NOTNULL(prev_grad_x); + CHECK_NOTNULL(prev_grad_y); + const int block_size = 256; + dim3 threads(block_size, 1); + dim3 grid(1, input1_height); + KeCosSimDerivative<<>> + (grad, output, prev_out_x, prev_out_y, prev_grad_x, prev_grad_y, width, + input1_height, input2_height, scale); + CHECK_SYNC("hlCossimDerivate failed"); +} + +template <> +void CosSimBackward(const GpuMatrix& out_grad, + const GpuMatrix& out_val, + const GpuMatrix& in1_val, + const GpuMatrix& in2_val, + GpuMatrix& in1_grad, + GpuMatrix& in2_grad, + real scale) { + CHECK(out_grad.getData() && out_val.getData() && in1_val.getData() && + in2_val.getData() && in1_grad.getData() && in2_grad.getData()); + CHECK(out_grad.useGpu_ && out_val.useGpu_ && in1_val.useGpu_ + && in2_val.useGpu_ && in1_grad.useGpu_ && in2_grad.useGpu_) + << "Matrix types are not equally GPU"; + + size_t dim = in1_val.getWidth(); + const real* grad = out_grad.getData(); + const real* out = out_val.getData(); + const real* prev_out_x = in1_val.getData(); + const real* prev_out_y = in2_val.getData(); + real* prev_grad_x = in1_grad.getData(); + real* prev_grad_y = in2_grad.getData(); + hlCossimDerivative(grad, + out, + prev_out_x, + prev_out_y, + prev_grad_x, + prev_grad_y, + dim, + in1_val.getHeight(), + in2_val.getHeight(), + scale); +} + +} // namespace paddle diff --git a/paddle/function/CosSimOpTest.cpp b/paddle/function/CosSimOpTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..48c815f027161b48c17ce654ab819156fd856199 --- /dev/null +++ b/paddle/function/CosSimOpTest.cpp @@ -0,0 +1,64 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "FunctionTest.h" +#include "paddle/math/Matrix.h" + +using namespace paddle; // NOLINT + +void testCosSimForward(size_t height_x, + size_t height_y, + size_t width, + real scale) { + FunctionCompare test("CosSimForward", FuncConfig().set("scale", scale)); + // prepare input arguments + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, width})); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_y, width})); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1}), + ASSIGN_TO); + // run Function + test.run(); +} + +void testCosSimBackward(size_t height_x, + size_t height_y, + size_t width, + real scale) { + FunctionCompare test("CosSimBackward", FuncConfig().set("scale", scale)); + // prepare input arguments + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1})); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1})); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, width})); + test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_y, width})); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, width}), + ADD_TO); + test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_y, width}), + ADD_TO); + // run Function + test.run(); +} + +TEST(Matrix, cosSim) { + for (auto height_x : {10, 100, 1000}) { + for (auto height_y : {1, height_x}) { + for (auto width : {10, 100, 1000}) { + for (auto scale : {1.0, 2.0}) { + testCosSimForward(height_x, height_y, width, scale); + testCosSimBackward(height_x, height_y, width, scale); + } + } + } + } +} diff --git a/paddle/function/FunctionTest.h b/paddle/function/FunctionTest.h index 00f59f97d4c8c1076abe00866b786615a9801a5d..0cfafdb27f55a3e6617d31a968d2a05fc77f5b46 100644 --- a/paddle/function/FunctionTest.h +++ b/paddle/function/FunctionTest.h @@ -69,6 +69,54 @@ public: gpuMemory_.back()->getBuf(), input.valueType(), input.shape())); } + // assume one copy of sequence is shared by different SequenceArgs + void addSequence(const SequenceIdArg& input) { + CHECK_EQ(input.shape().ndims(), 1UL); + size_t batchSize = input.shape()[0]; + size_t numSeqs = batchSize / 10 + 1; + size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32); + cpuMemory_.emplace_back(std::make_shared(sizeId)); + gpuMemory_.emplace_back(std::make_shared(sizeId)); + cpuSeq_ = std::make_shared(cpuMemory_.back()->getBuf(), + TensorShape{numSeqs + 1}); + gpuSeq_ = std::make_shared(gpuMemory_.back()->getBuf(), + TensorShape{numSeqs + 1}); + /// init sequence Id + initArg(*cpuSeq_, batchSize); + + // todo(tianbing), delete it + CHECK_EQ(cpuSeq_->shape().getElements(), cpuSeq_->numSeqs() + 1); + + CpuIVector cpuSeq(cpuSeq_->shape().getElements(), (int*)cpuSeq_->data()); + GpuIVector gpuSeq(gpuSeq_->shape().getElements(), (int*)gpuSeq_->data()); + gpuSeq.copyFrom(cpuSeq); + } + + void addInputs(const SequenceArg& input) { + CHECK_EQ(input.shape().ndims(), 2UL); + size_t batchSize = input.shape()[0]; + if (!cpuSeq_ || !gpuSeq_) { // sequence not exist + addSequence(SequenceIdArg(TensorShape{batchSize})); + } + + size_t size = + input.shape().getElements() * sizeOfValuType(input.valueType()); + cpuMemory_.emplace_back(std::make_shared(size)); + gpuMemory_.emplace_back(std::make_shared(size)); + + /// SequenceArg + cpuInputs_.emplace_back( + std::make_shared(cpuMemory_.back()->getBuf(), + input.valueType(), + input.shape(), + *cpuSeq_)); + gpuInputs_.emplace_back( + std::make_shared(gpuMemory_.back()->getBuf(), + input.valueType(), + input.shape(), + *gpuSeq_)); + } + // output need only contains shape, do not contains data. void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) { size_t size = @@ -116,24 +164,31 @@ public: std::make_shared(*gpuSparse_, argType)); } - void addInputs(const SequenceArg& input) { - size_t batchSize = input.shape()[0]; - size_t numSeqs = batchSize / 10 + 1; - - size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32); - cpuMemory_.emplace_back(std::make_shared(sizeId)); - gpuMemory_.emplace_back(std::make_shared(sizeId)); - - TensorShape seqsId({numSeqs + 1}); - // void* cpuBuffer = cpuMemory_.back()->getBuf(); - // void* gpuBuffer = gpuMemory_.back()->getBuf(); + void addOutputs(const SequenceArg& output, ArgType argType = ASSIGN_TO) { + CHECK_EQ(output.shape().ndims(), 2UL); + size_t batchSize = output.shape()[0]; + if (!cpuSeq_ || !gpuSeq_) { // sequence not exist + addSequence(SequenceIdArg(TensorShape{batchSize})); + } size_t size = - input.shape().getElements() * sizeOfValuType(input.valueType()); + output.shape().getElements() * sizeOfValuType(output.valueType()); cpuMemory_.emplace_back(std::make_shared(size)); gpuMemory_.emplace_back(std::make_shared(size)); - // TODO: need be implemented. + /// SequenceArg + cpuOutputs_.emplace_back( + std::make_shared(cpuMemory_.back()->getBuf(), + output.valueType(), + output.shape(), + *cpuSeq_, + argType)); + gpuOutputs_.emplace_back( + std::make_shared(gpuMemory_.back()->getBuf(), + output.valueType(), + output.shape(), + *gpuSeq_, + argType)); } void addInputs(const SparseMatrixArg& input) { @@ -193,14 +248,44 @@ public: std::shared_ptr getGpuFunction() const { return gpuFunc_; } protected: + // only init cpu argument, gpu argument copy from cpu argument. + void initArg(BufferArg& arg) { + CpuVector vector(arg.shape().getElements(), (real*)arg.data()); + vector.uniform(0.001, 1); + } + + void initArg(SequenceArg& arg) { + /// init only matrix + CpuVector vector(arg.shape().getElements(), (real*)arg.data()); + vector.uniform(0.001, 1); + } + + void initArg(SequenceIdArg& arg, size_t batchSize) { + size_t numSeqs = arg.numSeqs(); + int* buf = reinterpret_cast(arg.data()); + int pos = 0; + size_t maxLen = 2 * batchSize / numSeqs; + for (int i = 0; i < (int)numSeqs; ++i) { + int len = 1 + uniformRandom(std::min( + maxLen, batchSize - pos - numSeqs + i)); + buf[i] = pos; + pos += len; + VLOG(1) << " len=" << len; + } + buf[numSeqs] = batchSize; + } + void initInputs() { for (size_t i = 0; i < cpuInputs_.size(); i++) { if (cpuInputs_[i]->isSparseArg()) { continue; /// sparse matrix already init } - initArg(*cpuInputs_[i]); - + if (cpuInputs_[i]->isSequenceArg()) { + initArg(dynamic_cast(*cpuInputs_[i])); + } else { + initArg(*cpuInputs_[i]); + } // TODO: Need a BufferCopy used to copy from one BufferArg to another. CpuVector cpuVector(cpuInputs_[i]->shape().getElements(), (real*)cpuInputs_[i]->data()); @@ -217,7 +302,11 @@ protected: continue; /// sparse matrix already init } - initArg(*cpuOutputs_[i]); + if (cpuOutputs_[i]->isSequenceArg()) { + initArg(dynamic_cast(*cpuOutputs_[i])); + } else { + initArg(*cpuOutputs_[i]); + } // TODO: Need a BufferCopy used to copy from one BufferArg to another. CpuVector cpuVector(cpuOutputs_[i]->shape().getElements(), @@ -241,28 +330,6 @@ protected: } } - // only init cpu argument, gpu argument copy from cpu argument. - void initArg(BufferArg& arg) { - CpuVector vector(arg.shape().getElements(), (real*)arg.data()); - vector.uniform(0.001, 1); - } - - void initArg(SequenceIdArg& arg, size_t batchSize) { - size_t numSeqs = arg.numSeqs(); - int* buf = reinterpret_cast(arg.data()); - int pos = 0; - size_t maxLen = 2 * batchSize / numSeqs; - for (int i = 0; i < (int)numSeqs; ++i) { - int len = uniformRandom( - std::min(maxLen, batchSize - pos - numSeqs + i)) + - 1; - buf[i] = pos; - pos += len; - VLOG(1) << " len=" << len; - } - buf[numSeqs] = batchSize; - } - protected: std::shared_ptr cpuFunc_; std::shared_ptr gpuFunc_; @@ -274,6 +341,8 @@ protected: std::vector gpuOutputs_; std::shared_ptr cpuSparse_; std::shared_ptr gpuSparse_; + std::shared_ptr cpuSeq_; + std::shared_ptr gpuSeq_; }; } // namespace paddle diff --git a/paddle/function/MulOpTest.cpp b/paddle/function/MulOpTest.cpp index 158c3c90983b12c352765479006669c5c9e5a8aa..8748eb0d79fa0fcb0935eac5bb37b44274128aa0 100644 --- a/paddle/function/MulOpTest.cpp +++ b/paddle/function/MulOpTest.cpp @@ -60,7 +60,7 @@ TEST(MulOp, DDDMatrixMul) { if (transa && transb) { continue; } - VLOG(3) << setiosflags(std::ios::left) << std::setfill(' ') + VLOG(3) << std::setiosflags(std::ios::left) << std::setfill(' ') << " transa=" << transa << " transb=" << transb << " dimM=" << std::setw(5) << dimM << " dimN=" << std::setw(5) << dimN @@ -104,7 +104,7 @@ TEST(MuLOp, DSparseDMul) { for (const auto dimK : {3, 10}) { for (const auto nnz : {3, 10}) { for (const auto FORMAT : {SPARSE_CSR}) { - VLOG(3) << setiosflags(std::ios::left) << std::setfill(' ') + VLOG(3) << std::setiosflags(std::ios::left) << std::setfill(' ') << " dimM=" << std::setw(5) << dimM << " dimN=" << std::setw(5) << dimN << " dimK=" << std::setw(5) << dimK @@ -150,7 +150,7 @@ TEST(MulOp, DDSparseMul) { for (const auto dimK : {3, 10}) { for (const auto nnz : {3, 10}) { for (const auto FORMAT : {SPARSE_CSR, SPARSE_CSC}) { - VLOG(3) << setiosflags(std::ios::left) << std::setfill(' ') + VLOG(3) << std::setiosflags(std::ios::left) << std::setfill(' ') << " dimM=" << std::setw(5) << dimM << " dimN=" << std::setw(5) << dimN << " dimK=" << std::setw(5) << dimK @@ -197,7 +197,7 @@ TEST(MulOp, SparseDDMul) { for (const auto dimK : {3, 10}) { for (const auto nnz : {3, 10}) { for (const auto FORMAT : {SPARSE_CSC, SPARSE_CSR}) { - VLOG(3) << setiosflags(std::ios::left) << std::setfill(' ') + VLOG(3) << std::setiosflags(std::ios::left) << std::setfill(' ') << " dimM=" << std::setw(5) << dimM << " dimN=" << std::setw(5) << dimN << " dimK=" << std::setw(5) << dimK diff --git a/paddle/gserver/dataproviders/PyDataProvider2.cpp b/paddle/gserver/dataproviders/PyDataProvider2.cpp index c26e242534f2afcff396762adb085bf99303e2b5..b8079dc0796d0e300e65ac6b6b8d3bc826b1e504 100644 --- a/paddle/gserver/dataproviders/PyDataProvider2.cpp +++ b/paddle/gserver/dataproviders/PyDataProvider2.cpp @@ -647,7 +647,7 @@ public: DataBatch& gpuBatch = *batch; std::vector& gpuArguments = gpuBatch.getStreams(); gpuArguments.resize(cpuArguments.size()); - gpuBatch.setSize(size); + gpuBatch.setSize(bsize); for (size_t i = 0; i < headers_.size(); ++i) { gpuArguments[i].resizeAndCopyFrom( cpuArguments[i], useGpu_, HPPL_STREAM_1); diff --git a/paddle/gserver/evaluators/Evaluator.cpp b/paddle/gserver/evaluators/Evaluator.cpp index ae7508e2bb117a60492e0c28230f2fbb4b14915e..2bf6ead0dc382cd74cf64508835b24b8483dc553 100644 --- a/paddle/gserver/evaluators/Evaluator.cpp +++ b/paddle/gserver/evaluators/Evaluator.cpp @@ -866,21 +866,20 @@ void PnpairEvaluator::calc(std::vector& predictArray) { ClassRegistrar Evaluator::registrar_; Evaluator* Evaluator::create(const EvaluatorConfig& config) { - Evaluator* evaluator = nullptr; - if (config.type() == "classification_error") { - evaluator = new ClassificationErrorEvaluator(); - } else if (config.type() == "sum") { - evaluator = new SumEvaluator(); - } else if (config.type() == "last-column-sum") { - evaluator = new ColumnSumEvaluator(-1); - } else if (config.type() == "last-column-auc") { - evaluator = new AucEvaluator(-1); - } else { - evaluator = registrar_.createByType(config.type()); - } + Evaluator* evaluator = registrar_.createByType(config.type()); evaluator->init(config); return evaluator; } + +REGISTER_EVALUATOR(classification_error, ClassificationErrorEvaluator); +REGISTER_EVALUATOR(sum, SumEvaluator); +static InitFunction __reg_type_auc_sum__([]() { + Evaluator::registrar_.registerClass( + "last-column-sum", [] { return new ColumnSumEvaluator(-1); }); + Evaluator::registrar_.registerClass("last-column-auc", + [] { return new AucEvaluator(-1); }); +}); + /** * @brief print value of each layer. * @@ -888,32 +887,10 @@ Evaluator* Evaluator::create(const EvaluatorConfig& config) { */ class ValuePrinter : public Evaluator { public: - ValuePrinter() {} - virtual void eval(const NeuralNetwork& nn) { for (const std::string& name : config_.input_layers()) { - const Argument& argu = nn.getLayer(name)->getOutput(); - if (argu.value) { - std::ostringstream os; - argu.value->print(os); - LOG(INFO) << "layer=" << name << " value matrix:\n" << os.str(); - } - if (argu.ids) { - std::ostringstream os; - argu.ids->print(os, argu.ids->getSize()); - LOG(INFO) << "layer=" << name << " ids vector:\n" << os.str(); - } - if (auto startPos = argu.sequenceStartPositions) { - std::ostringstream os; - startPos->getVector(false)->print(os, startPos->getSize()); - LOG(INFO) << "layer=" << name << " sequence pos vector:\n" << os.str(); - } - if (auto subStartPos = argu.subSequenceStartPositions) { - std::ostringstream os; - subStartPos->getVector(false)->print(os, subStartPos->getSize()); - LOG(INFO) << "layer=" << name << " sub-sequence pos vector:\n" - << os.str(); - } + nn.getLayer(name)->getOutput().printValueString(LOG(INFO), + "layer=" + name + " "); } } @@ -929,8 +906,6 @@ REGISTER_EVALUATOR(value_printer, ValuePrinter); */ class GradientPrinter : public Evaluator { public: - GradientPrinter() {} - virtual void eval(const NeuralNetwork& nn) { for (const std::string& name : config_.input_layers()) { const Argument& argu = nn.getLayer(name)->getOutput(); @@ -939,11 +914,6 @@ public: argu.grad->print(os); LOG(INFO) << "layer=" << name << " grad matrix:\n" << os.str(); } - if (auto startPos = argu.sequenceStartPositions) { - std::ostringstream os; - startPos->getVector(false)->print(os, startPos->getSize()); - LOG(INFO) << "layer=" << name << " sequence pos vector:\n" << os.str(); - } } } diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index a9a9f4f903e305bfe0ee3dd089a85ba524022faa..2ab964b8fc2e080282aa03db4ee6836540e666d7 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -155,7 +155,8 @@ protected: public: explicit BootBiasLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { if (!Layer::init(layerMap, parameterMap)) return false; if (biasParameter_) { @@ -174,7 +175,7 @@ public: } } - virtual void forward(PassType passType) { + void forward(PassType passType) override { if (biases_) { MatrixPtr outV = getOutputValue(); outV->addBias(*(biases_->getW()), 1); @@ -182,7 +183,7 @@ public: } } - virtual void backward(const UpdateCallback& callback) { + void backward(const UpdateCallback& callback) override { if (biases_) { backwardActivation(); biases_->getWGrad()->collectBias(*getOutputGrad(), 1); diff --git a/paddle/gserver/layers/AddtoLayer.h b/paddle/gserver/layers/AddtoLayer.h index 53d3f99cdd3439a1ba85f54526ca65005986c634..4e98c174b462763d3c2714770f66951981afa9f8 100644 --- a/paddle/gserver/layers/AddtoLayer.h +++ b/paddle/gserver/layers/AddtoLayer.h @@ -44,19 +44,20 @@ public: /** * Intialization of AddtoLayer. */ - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; /** * Forward propagation. * @note There is no weight matrix for each input, * because it just a simple add operation. */ - void forward(PassType passType); + void forward(PassType passType) override; /** * Backward propagation. */ - void backward(const UpdateCallback& callback = nullptr); + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/AgentLayer.h b/paddle/gserver/layers/AgentLayer.h index 41683ad6712d5df710737cf71c600790fcc8786f..b6dac7ae6fec2d61c60c9548d466233efe9febd5 100644 --- a/paddle/gserver/layers/AgentLayer.h +++ b/paddle/gserver/layers/AgentLayer.h @@ -35,7 +35,8 @@ public: ~AgentLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; // if *numSamples* set, // real layer output will only use first *numSamples* rows @@ -44,8 +45,8 @@ public: numSamples_ = numSamples; } - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr) {} + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override {} }; /** @@ -56,8 +57,8 @@ public: explicit SequenceAgentLayer(const LayerConfig& config) : AgentLayer(config) {} ~SequenceAgentLayer() {} - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr) {} + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override {} }; /** @@ -78,7 +79,8 @@ public: virtual ~GatherAgentLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; // call before addRealLayer void copyIdAndSequenceInfo(const Argument& input, @@ -88,8 +90,8 @@ public: // add one real layer, can call many times void addRealLayer(LayerPtr layer) { realLayers_.push_back(layer); } - void forward(PassType passType); - void backward(const UpdateCallback& callback); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; }; /** @@ -133,7 +135,8 @@ public: virtual ~ScatterAgentLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; /** * @brief set real layer in generation @@ -182,8 +185,8 @@ public: numSequences_ = numSequences; } - void forward(PassType passType); - void backward(const UpdateCallback& callback); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; }; /** diff --git a/paddle/gserver/layers/AverageLayer.h b/paddle/gserver/layers/AverageLayer.h index b3c4ecec8bc6f56b4563ee9f1ada91e4d8f2cbb5..621e1d7bb12ec5b8c7a6173bd601835d9406e814 100644 --- a/paddle/gserver/layers/AverageLayer.h +++ b/paddle/gserver/layers/AverageLayer.h @@ -38,12 +38,11 @@ public: explicit AverageLayer(const LayerConfig& config) : SequencePoolLayer(config) {} - ~AverageLayer() {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; protected: MatrixPtr outMtx_; diff --git a/paddle/gserver/layers/BatchNormBaseLayer.h b/paddle/gserver/layers/BatchNormBaseLayer.h index 75bda95de1472b08538b48072ddf9ea607b83299..230bafc31d96bbd49481a7ed135be6888688627e 100644 --- a/paddle/gserver/layers/BatchNormBaseLayer.h +++ b/paddle/gserver/layers/BatchNormBaseLayer.h @@ -52,7 +52,8 @@ public: */ static Layer* create(const LayerConfig& config); - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; /** * @brief Calculate feature map size. Some input uses frameHeight and diff --git a/paddle/gserver/layers/BatchNormalizationLayer.h b/paddle/gserver/layers/BatchNormalizationLayer.h index 195acbbfc58db8368f6db1c1595dd6b04801ee26..f6115801fc6b341c0718f8851617de43bdeeec09 100644 --- a/paddle/gserver/layers/BatchNormalizationLayer.h +++ b/paddle/gserver/layers/BatchNormalizationLayer.h @@ -33,9 +33,10 @@ public: ~BatchNormalizationLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; protected: /// Epsilon value used in the batch normalization formula. @@ -58,7 +59,7 @@ protected: /// to batch, channels* imagePixels. void shrinkMat(const MatrixPtr& in, MatrixPtr& out); - void onPassEnd() { firstTest_ = true; } + void onPassEnd() override { firstTest_ = true; } MatrixPtr tmpMat_, tmpGrad_; MatrixPtr expandedIn_, expandedOut_; diff --git a/paddle/gserver/layers/BilinearInterpLayer.h b/paddle/gserver/layers/BilinearInterpLayer.h index 4ff4b0ea793dc901d099bf73d55aa15463e62094..27c269f2781c99e4f166ef1052cbf03a773ad57e 100644 --- a/paddle/gserver/layers/BilinearInterpLayer.h +++ b/paddle/gserver/layers/BilinearInterpLayer.h @@ -38,9 +38,10 @@ public: virtual ~BilinearInterpLayer() {} size_t getSize(); - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/BlockExpandLayer.h b/paddle/gserver/layers/BlockExpandLayer.h index cc96fdd03fcac6925a16f0fb91045f065f74e803..8f347400e60ec84fc1b5fdbc1c911a8768b306d0 100644 --- a/paddle/gserver/layers/BlockExpandLayer.h +++ b/paddle/gserver/layers/BlockExpandLayer.h @@ -58,10 +58,11 @@ public: ~BlockExpandLayer() {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/CRFDecodingLayer.h b/paddle/gserver/layers/CRFDecodingLayer.h index 1fd444ad10e71df2bb6d8bdb839e6f02b33d647f..3cbcac6cf62decd43844cc442fc5e4f973d0acfc 100644 --- a/paddle/gserver/layers/CRFDecodingLayer.h +++ b/paddle/gserver/layers/CRFDecodingLayer.h @@ -32,9 +32,10 @@ namespace paddle { class CRFDecodingLayer : public CRFLayer { public: explicit CRFDecodingLayer(const LayerConfig& config) : CRFLayer(config) {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; protected: std::unique_ptr crf_; diff --git a/paddle/gserver/layers/CRFLayer.h b/paddle/gserver/layers/CRFLayer.h index d21b32b68c1a40c814af3aa2c285612a5f938d79..de36a85083b6b293fd2d8522ec279a38cc4f8be3 100644 --- a/paddle/gserver/layers/CRFLayer.h +++ b/paddle/gserver/layers/CRFLayer.h @@ -29,9 +29,10 @@ namespace paddle { class CRFLayer : public Layer { public: explicit CRFLayer(const LayerConfig& config) : Layer(config) {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; protected: size_t numClasses_; diff --git a/paddle/gserver/layers/CTCLayer.h b/paddle/gserver/layers/CTCLayer.h index 70d429bad656ade3c05256472d799ae72e128be5..f7a515f312d075c54b4aab2557175c70fdbd9875 100644 --- a/paddle/gserver/layers/CTCLayer.h +++ b/paddle/gserver/layers/CTCLayer.h @@ -22,10 +22,11 @@ namespace paddle { class CTCLayer : public Layer { public: explicit CTCLayer(const LayerConfig& config) : Layer(config) {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - virtual void forward(PassType passType); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; void forwardImp(const Argument& softmaxSeqs, const Argument& labelSeqs); - virtual void backward(const UpdateCallback& callback); + void backward(const UpdateCallback& callback) override; void backwardImp(const UpdateCallback& callback, const Argument& softmaxSeqs, const Argument& labelSeqs); diff --git a/paddle/gserver/layers/ConcatenateLayer.cpp b/paddle/gserver/layers/ConcatenateLayer.cpp index d19adace7d58af16736fc2b6e536f5fd69a19863..c5fc4cf4f81a55a4c57e92dce64c06acd404badd 100644 --- a/paddle/gserver/layers/ConcatenateLayer.cpp +++ b/paddle/gserver/layers/ConcatenateLayer.cpp @@ -28,10 +28,11 @@ public: ~ConcatenateLayer() {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(concat, ConcatenateLayer); @@ -101,10 +102,11 @@ public: ~ConcatenateLayer2() {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; protected: std::vector> projections_; diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h index aedf4100e32fa1294c361b6163c14eab7869b803..e9d15d94f806a5d2e6f11cbbfc29e291dfe8538f 100644 --- a/paddle/gserver/layers/ConvBaseLayer.h +++ b/paddle/gserver/layers/ConvBaseLayer.h @@ -80,7 +80,8 @@ protected: public: explicit ConvBaseLayer(const LayerConfig& config) : Layer(config) {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; /** * imgSizeH_ and imgSizeW_ will be set according to the previous input layers diff --git a/paddle/gserver/layers/ConvShiftLayer.cpp b/paddle/gserver/layers/ConvShiftLayer.cpp index 9bfb1ab7a47b11a6793159aefcb4f9fa12b81a6b..002be415691f0b3df93835915dcbc9d455231422 100644 --- a/paddle/gserver/layers/ConvShiftLayer.cpp +++ b/paddle/gserver/layers/ConvShiftLayer.cpp @@ -47,10 +47,11 @@ public: ~ConvShiftLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(conv_shift, ConvShiftLayer); diff --git a/paddle/gserver/layers/ConvexCombinationLayer.cpp b/paddle/gserver/layers/ConvexCombinationLayer.cpp index ed57f2af3c6455fb89fd05b37bb205e8da0bf7e1..32eb3bf604acaa8f2060882b545efeeb40f8218d 100644 --- a/paddle/gserver/layers/ConvexCombinationLayer.cpp +++ b/paddle/gserver/layers/ConvexCombinationLayer.cpp @@ -49,10 +49,11 @@ public: ~ConvexCombinationLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(convex_comb, ConvexCombinationLayer); diff --git a/paddle/gserver/layers/CosSimLayer.cpp b/paddle/gserver/layers/CosSimLayer.cpp index 254120443dc3d41bf2422be2e88cb376d70c93d4..a6c0300acf6752a3536e7939577b561fd97d1eb8 100644 --- a/paddle/gserver/layers/CosSimLayer.cpp +++ b/paddle/gserver/layers/CosSimLayer.cpp @@ -26,15 +26,23 @@ bool CosSimLayer::init(const LayerMap& layerMap, Layer::init(layerMap, parameterMap); CHECK_EQ(inputLayers_.size(), 2LU); + + createFunction(forward_, + "CosSimForward", + FuncConfig().set("scale", (real)config_.cos_scale())); + createFunction(backward_, + "CosSimBackward", + FuncConfig().set("scale", (real)config_.cos_scale())); + return true; } void CosSimLayer::forward(PassType passType) { Layer::forward(passType); - /* malloc memory for the output_ if necessary */ int batchSize = getInputValue(0)->getHeight(); int size = getSize(); + CHECK_EQ(forward_.size(), 1) << "Only one forward function needed"; { REGISTER_TIMER_INFO("CosFwResetTimer", getName().c_str()); @@ -42,26 +50,43 @@ void CosSimLayer::forward(PassType passType) { } MatrixPtr outV = getOutputValue(); - /* activation */ { REGISTER_TIMER_INFO("CosFwAtvTimer", getName().c_str()); MatrixPtr prevOut1 = getInputValue(0); MatrixPtr prevOut2 = getInputValue(1); - outV->cosSim(*prevOut1, *prevOut2, config_.cos_scale()); + + CHECK(outV && prevOut1 && prevOut2); + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*prevOut1); + inputs.addArg(*prevOut2); + outputs.addArg(*outV, ASSIGN_TO); + forward_[0]->calc(inputs, outputs); } } void CosSimLayer::backward(const UpdateCallback& callback) { /* activation */ { REGISTER_TIMER_INFO("CosBpAtvTimer", getName().c_str()); - MatrixPtr outG = this->getOutputGrad(); - - outG->cosSimDerivative(*this->getOutputValue(), - *getInputValue(0), - *getInputValue(1), - *getInputGrad(0), - *getInputGrad(1), - config_.cos_scale()); + CHECK_EQ(backward_.size(), 1) << "Only one backward function needed"; + + const auto outG = this->getOutputGrad(); + const auto outV = this->getOutputValue(); + const auto inV1 = this->getInputValue(0); + const auto inV2 = this->getInputValue(1); + auto inG1 = this->getInputGrad(0); + auto inG2 = this->getInputGrad(1); + CHECK(outG && outV && inV1 && inV2 && inG1 && inG2); + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*outG); + inputs.addArg(*outV); + inputs.addArg(*inV1); + inputs.addArg(*inV2); + outputs.addArg(*inG1, ADD_TO); + outputs.addArg(*inG2, ADD_TO); + + backward_[0]->calc(inputs, outputs); } } diff --git a/paddle/gserver/layers/CosSimLayer.h b/paddle/gserver/layers/CosSimLayer.h index 5dcc5d8a5b4dc76cb6cea023a874049731a26516..8afaee62c2dcacba006846df0111fcbe8f7575e4 100644 --- a/paddle/gserver/layers/CosSimLayer.h +++ b/paddle/gserver/layers/CosSimLayer.h @@ -28,7 +28,7 @@ namespace paddle { * * - Input1: A vector (batchSize * dataDim) * * - Input2: A vector (batchSize * dataDim) or (1 * dataDim) * - * - Output: A vector (dataDim * 1) + * - Output: A vector (batchSize * 1) * * The config file api is cos_sim. */ @@ -38,10 +38,11 @@ public: ~CosSimLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/CosSimVecMatLayer.cpp b/paddle/gserver/layers/CosSimVecMatLayer.cpp index ad490b0b8c4656c1eabf519233f2386b4b6e9417..aabafd473aa1e06a767d48d4c49b7b8662e992e7 100644 --- a/paddle/gserver/layers/CosSimVecMatLayer.cpp +++ b/paddle/gserver/layers/CosSimVecMatLayer.cpp @@ -18,7 +18,6 @@ limitations under the License. */ #include "paddle/utils/Stat.h" namespace paddle { - /** * @brief A layer for computing cosine similarity between a vector * and each row of a matrix @@ -46,10 +45,11 @@ public: ~CosSimVecMatLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(cos_vm, CosSimVecMatLayer); @@ -97,11 +97,22 @@ bool CosSimVecMatLayer::init(const LayerMap& layerMap, dataDim, /* trans= */ false, useGpu_); + + CHECK(tmpRow0 && tmpRow1 && tmpRow2 && tmpRow3 && tmpMtx0 && tmpMtx1); + + createFunction(forward_, + "CosSimForward", + FuncConfig().set("scale", (real)config_.cos_scale())); + createFunction(backward_, + "CosSimBackward", + FuncConfig().set("scale", (real)config_.cos_scale())); + return true; } void CosSimVecMatLayer::forward(PassType passType) { Layer::forward(passType); + CHECK_EQ(forward_.size(), 1) << "Only one forward function needed"; MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); @@ -117,17 +128,25 @@ void CosSimVecMatLayer::forward(PassType passType) { } MatrixPtr outV = getOutputValue(); - + CHECK(outV && inV0 && inV1); REGISTER_TIMER_INFO("FwCosVMTimer", getName().c_str()); for (size_t i = 0; i < batchSize; i++) { tmpRow0->setData(inV0->rowBuf(i)); tmpMtx0->setData(inV1->rowBuf(i)); tmpRow2->setData(outV->rowBuf(i)); - tmpRow2->cosSim(*(tmpMtx0), *(tmpRow0), config_.cos_scale()); + + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*tmpMtx0); + inputs.addArg(*tmpRow0); + outputs.addArg(*tmpRow2, ASSIGN_TO); + forward_[0]->calc(inputs, outputs); } } void CosSimVecMatLayer::backward(const UpdateCallback& callback) { + CHECK_EQ(backward_.size(), 1) << "Only one forward function needed"; + MatrixPtr inV0 = getInputValue(0); MatrixPtr inV1 = getInputValue(1); MatrixPtr inG0 = getInputGrad(0); @@ -136,27 +155,27 @@ void CosSimVecMatLayer::backward(const UpdateCallback& callback) { MatrixPtr outG = getOutputGrad(); size_t batchSize = inV0->getHeight(); - + CHECK(inV0 && inV1 && inG0 && inG1 && outV && outG); REGISTER_TIMER_INFO("BwCosVMTimer", getName().c_str()); - if (inG0 && inG1) { - for (size_t i = 0; i < batchSize; i++) { - tmpRow0->setData(inV0->rowBuf(i)); - tmpRow1->setData(inG0->rowBuf(i)); - tmpMtx0->setData(inV1->rowBuf(i)); - tmpMtx1->setData(inG1->rowBuf(i)); - tmpRow2->setData(outV->rowBuf(i)); - tmpRow3->setData(outG->rowBuf(i)); - - tmpRow3->cosSimDerivative(*(tmpRow2), - *(tmpMtx0), - *(tmpRow0), - *(tmpMtx1), - *(tmpRow1), - config_.cos_scale()); - } - } else { - CHECK(!inG0 || !inG1) << "Not supported"; + for (size_t i = 0; i < batchSize; i++) { + tmpRow0->setData(inV0->rowBuf(i)); + tmpRow1->setData(inG0->rowBuf(i)); + tmpMtx0->setData(inV1->rowBuf(i)); + tmpMtx1->setData(inG1->rowBuf(i)); + tmpRow2->setData(outV->rowBuf(i)); + tmpRow3->setData(outG->rowBuf(i)); + + BufferArgs inputs; + BufferArgs outputs; + inputs.addArg(*tmpRow3); + inputs.addArg(*tmpRow2); + inputs.addArg(*tmpMtx0); + inputs.addArg(*tmpRow0); + outputs.addArg(*tmpMtx1, ADD_TO); + outputs.addArg(*tmpRow1, ADD_TO); + + backward_[0]->calc(inputs, outputs); } } diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/gserver/layers/CostLayer.cpp index 7e9519f6b3af50bf47b660b285c3593087f80271..998b8d7d3034cb18fbab242c66656092bfc50fcb 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/gserver/layers/CostLayer.cpp @@ -367,8 +367,6 @@ void LambdaCost::backward(const UpdateCallback& callback) { getInputGrad(0)->add(*marginGrad_); } -void LambdaCost::onPassEnd() {} - void LambdaCost::calcGrad(const real* outputScore, const real* score, real* gradData, @@ -611,14 +609,15 @@ class SumCostLayer : public Layer { public: explicit SumCostLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { bool ret = Layer::init(layerMap, parameterMap); if (!ret) return ret; CHECK_EQ(inputLayers_.size(), 1UL); return true; } - virtual void forward(PassType passType) { + void forward(PassType passType) override { Layer::forward(passType); const MatrixPtr& input = getInputValue(0); @@ -629,7 +628,7 @@ public: output_.value->sumRows(*input, /* scaleSum= */ 1, /* scaleDest= */ 0); } - virtual void backward(const UpdateCallback& callback = nullptr) { + void backward(const UpdateCallback& callback = nullptr) override { getInputGrad(0)->add((real)1); } }; diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/gserver/layers/CostLayer.h index 7f73bdb3f7d63ef1c8d76deb64f40d19d20f87c7..b3045e0b31308abf2caa90cbd21f105e685ef341 100644 --- a/paddle/gserver/layers/CostLayer.h +++ b/paddle/gserver/layers/CostLayer.h @@ -32,15 +32,16 @@ class CostLayer : public Layer { public: explicit CostLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; LayerPtr getOutputLayer() { return inputLayers_[0]; } LayerPtr getLabelLayer() { return inputLayers_[1]; } - virtual void forward(PassType passType); + void forward(PassType passType) override; - virtual void backward(const UpdateCallback& callback = nullptr); + void backward(const UpdateCallback& callback = nullptr) override; virtual void forwardImp(Matrix& outputValue, Argument& label, @@ -68,11 +69,14 @@ public: explicit MultiClassCrossEntropy(const LayerConfig& config) : CostLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forwardImp(Matrix& output, Argument& label, Matrix& cost); + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad); + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; }; /** @@ -95,11 +99,14 @@ public: explicit MultiClassCrossEntropyWithSelfNorm(const LayerConfig& config) : CostLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forwardImp(Matrix& output, Argument& label, Matrix& cost); + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad); + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; protected: MatrixPtr sftMaxSum_; @@ -117,11 +124,14 @@ public: explicit SoftBinaryClassCrossEntropy(const LayerConfig& config) : CostLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forwardImp(Matrix& output, Argument& label, Matrix& cost); + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad); + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; protected: MatrixPtr targetPerDim_; @@ -139,11 +149,14 @@ public: explicit SumOfSquaresCostLayer(const LayerConfig& config) : CostLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forwardImp(Matrix& output, Argument& label, Matrix& cost); + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad); + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; }; /** @@ -162,17 +175,18 @@ class RankingCost : public Layer { public: explicit RankingCost(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; LayerPtr getOutputLayer(size_t i) { return inputLayers_[i]; } LayerPtr getLabelLayer() { return inputLayers_[2]; } - void forward(PassType passType); + void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr); + void backward(const UpdateCallback& callback = nullptr) override; - void onPassEnd(); + void onPassEnd() override; void forwardImp(Matrix& output, Argument& label, Matrix& cost) { (void)output; @@ -214,17 +228,16 @@ class LambdaCost : public Layer { public: explicit LambdaCost(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; LayerPtr getOutputLayer() { return inputLayers_[0]; } LayerPtr getScoreLayer() { return inputLayers_[1]; } - void forward(PassType passType); + void forward(PassType passType) override; - void backward(const UpdateCallback& callback = nullptr); - - void onPassEnd(); + void backward(const UpdateCallback& callback = nullptr) override; real calcNDCG(const real* outputScore, const real* score, int size); void calcGrad(const real* outputScore, @@ -256,11 +269,14 @@ public: explicit MultiBinaryLabelCrossEntropy(const LayerConfig& config) : CostLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forwardImp(Matrix& output, Argument& label, Matrix& cost); + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad); + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; }; /** @@ -282,13 +298,16 @@ class HuberTwoClass : public CostLayer { public: explicit HuberTwoClass(const LayerConfig& config) : CostLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forwardImp(Matrix& output, Argument& label, Matrix& cost); + void forwardImp(Matrix& output, Argument& label, Matrix& cost) override; void forwardImpIn(Matrix& output, Argument& label, Matrix& cost); - void backwardImp(Matrix& outputValue, Argument& label, Matrix& outputGrad); + void backwardImp(Matrix& outputValue, + Argument& label, + Matrix& outputGrad) override; void backwardImpIn(Matrix& outputValue, Argument& label, Matrix& outputGrad); }; diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.h b/paddle/gserver/layers/CudnnBatchNormLayer.h index b1e7d2082f1443313bfc858a17adfd737ecff98f..413efd4d3ecd734b343efbcf8328ac0592daddda 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.h +++ b/paddle/gserver/layers/CudnnBatchNormLayer.h @@ -35,14 +35,15 @@ public: ~CudnnBatchNormLayer(); - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; /** * reshape tensor of ioDesc_. */ void reshape(int batchSize); - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; protected: /** diff --git a/paddle/gserver/layers/CudnnConvLayer.h b/paddle/gserver/layers/CudnnConvLayer.h index b869c695bd753076c6501a1253fcad22139ccadf..919b1efc4e453219a6c2ab1a11c61ccb99404084 100644 --- a/paddle/gserver/layers/CudnnConvLayer.h +++ b/paddle/gserver/layers/CudnnConvLayer.h @@ -45,9 +45,10 @@ public: ~CudnnConvLayer(); - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - void forward(PassType passType); - void backward(const UpdateCallback& callback); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; void addBiases(); void bpropBiases(); }; diff --git a/paddle/gserver/layers/CudnnPoolLayer.h b/paddle/gserver/layers/CudnnPoolLayer.h index 072b2f9513f4ef8aed03ecfa7a9014667bb2ce9e..f0aa22fe3af90c9233330c15fc56c3696a624446 100644 --- a/paddle/gserver/layers/CudnnPoolLayer.h +++ b/paddle/gserver/layers/CudnnPoolLayer.h @@ -45,7 +45,8 @@ public: hl_pooling_mode_t* mode = nullptr); explicit CudnnPoolLayer(const LayerConfig& config); ~CudnnPoolLayer(); - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; /** * Reshape input and output tensor descriptor. @@ -53,8 +54,8 @@ public: * So reshaping is needed. */ void reshape(int batchSize); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/DataLayer.h b/paddle/gserver/layers/DataLayer.h index d3bc97bb6cd0b8faf8ae108a0147d77854596e25..a9cf1f943c260a934564a19aecda28c24ccff43c 100644 --- a/paddle/gserver/layers/DataLayer.h +++ b/paddle/gserver/layers/DataLayer.h @@ -33,13 +33,13 @@ public: /** * Prefetch sparse matrix/ids only. */ - void prefetch() { output_ = data_; } + void prefetch() override { output_ = data_; } /** * Forward propagation. Copy data_ (value, in, grad, ids, cpuSequenceDims, * sequenceStartPositions, subSequenceStartPositions, strs) to output_. */ - virtual void forward(PassType passType) { + void forward(PassType passType) override { Layer::forward(passType); copyDataToOutput(output_); if (FLAGS_show_layer_stat) { @@ -50,9 +50,9 @@ public: /** * Data layer's backward propagation do nothing. */ - virtual void backward(const UpdateCallback& callback) { (void)callback; } + void backward(const UpdateCallback& callback) override { (void)callback; } - virtual void copyOutputToOtherDevice() { + void copyOutputToOtherDevice() override { for (size_t i = 0; i != outputOtherDevice_.size(); i++) { copyDataToOutput(outputOtherDevice_[i]); } diff --git a/paddle/gserver/layers/DataNormLayer.h b/paddle/gserver/layers/DataNormLayer.h index b3043cffd210feaf9ddaed096de762aa7e2a6139..f0fd044e5b83430a4028a227c7d5a31b6fa86f20 100644 --- a/paddle/gserver/layers/DataNormLayer.h +++ b/paddle/gserver/layers/DataNormLayer.h @@ -44,10 +44,11 @@ public: ~DataNormLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; protected: int mode_; diff --git a/paddle/gserver/layers/EosIdCheckLayer.cpp b/paddle/gserver/layers/EosIdCheckLayer.cpp index fa53e2e4cfc8a220eeb2a637d7fe759f1744f9d5..686f1fa0543cb3629ac223316e595e642a9e7d76 100644 --- a/paddle/gserver/layers/EosIdCheckLayer.cpp +++ b/paddle/gserver/layers/EosIdCheckLayer.cpp @@ -27,14 +27,14 @@ class EosIdCheckLayer : public Layer { public: explicit EosIdCheckLayer(const LayerConfig& config) : Layer(config) {} - virtual bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { bool ret = Layer::init(layerMap, parameterMap); CHECK_EQ(1UL, inputLayers_.size()); return ret; } - virtual void forward(PassType passType) { + void forward(PassType passType) override { Layer::forward(passType); const Argument& input = getInput(0); @@ -42,7 +42,7 @@ public: output_.ids->isEqualTo(*input.ids, config_.eos_id()); } - virtual void backward(const UpdateCallback& callback) {} + void backward(const UpdateCallback& callback) override {} }; REGISTER_LAYER(eos_id, EosIdCheckLayer); diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.h b/paddle/gserver/layers/ExpandConvBaseLayer.h index 8445642217cf3e83441ddd9beec80f99faf946bc..aabcdfc392d3e242df84c820c336d8b32c7cb04f 100644 --- a/paddle/gserver/layers/ExpandConvBaseLayer.h +++ b/paddle/gserver/layers/ExpandConvBaseLayer.h @@ -48,7 +48,8 @@ public: ~ExpandConvBaseLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; size_t getOutputSize(); /** diff --git a/paddle/gserver/layers/ExpandConvLayer.h b/paddle/gserver/layers/ExpandConvLayer.h index de81a017e1bac38a5717e8c83a028f5408c0e084..60681690e5dd55b2e9aa4e1f25758db6033665a6 100644 --- a/paddle/gserver/layers/ExpandConvLayer.h +++ b/paddle/gserver/layers/ExpandConvLayer.h @@ -35,10 +35,11 @@ public: ~ExpandConvLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/ExpandConvTransLayer.h b/paddle/gserver/layers/ExpandConvTransLayer.h index 4a527d67995e255c65fea1f310551f8de5630030..00b8f241889fdd3f423d75dedd9068aa3674f190 100644 --- a/paddle/gserver/layers/ExpandConvTransLayer.h +++ b/paddle/gserver/layers/ExpandConvTransLayer.h @@ -34,10 +34,11 @@ public: ~ExpandConvTransLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/ExpandLayer.h b/paddle/gserver/layers/ExpandLayer.h index 5c636144235cdb3800aa921464985616f8ee9203..c21b3350e2bc4b136eaf50f96799f479a13df6bd 100644 --- a/paddle/gserver/layers/ExpandLayer.h +++ b/paddle/gserver/layers/ExpandLayer.h @@ -53,10 +53,11 @@ public: ~ExpandLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/FeatureMapExpandLayer.cpp b/paddle/gserver/layers/FeatureMapExpandLayer.cpp index 4b685812aac24782e4fe47fa85c7e91bd3494087..b3850f543af74abbddaac5bb0a32851f2d3297d0 100644 --- a/paddle/gserver/layers/FeatureMapExpandLayer.cpp +++ b/paddle/gserver/layers/FeatureMapExpandLayer.cpp @@ -46,10 +46,11 @@ public: ~FeatureMapExpandLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(featmap_expand, FeatureMapExpandLayer); diff --git a/paddle/gserver/layers/FullyConnectedLayer.h b/paddle/gserver/layers/FullyConnectedLayer.h index ccd584585c97cb679332cbd10d6f3a1306ca5a54..64e7a050125aa92b414e58c7678bf87efd01103f 100644 --- a/paddle/gserver/layers/FullyConnectedLayer.h +++ b/paddle/gserver/layers/FullyConnectedLayer.h @@ -36,13 +36,14 @@ public: explicit FullyConnectedLayer(const LayerConfig& config) : Layer(config) {} ~FullyConnectedLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; Weight& getWeight(int idx) { return *weights_[idx]; } - void prefetch(); - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void prefetch() override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/GatedRecurrentLayer.h b/paddle/gserver/layers/GatedRecurrentLayer.h index 25770ce57fbaa4d16c9454d824800f2f0c7f957d..58dd760eb870e9570f8a406f098f69c5fdf6477a 100644 --- a/paddle/gserver/layers/GatedRecurrentLayer.h +++ b/paddle/gserver/layers/GatedRecurrentLayer.h @@ -50,17 +50,18 @@ class GatedRecurrentLayer : public Layer, public GruCompute { public: explicit GatedRecurrentLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); + void forward(PassType passType) override; - void backward(const UpdateCallback& callback); + void backward(const UpdateCallback& callback) override; - void resetState(); + void resetState() override; - void setState(LayerStatePtr state); + void setState(LayerStatePtr state) override; - LayerStatePtr getState(); + LayerStatePtr getState() override; protected: void forwardSequence(int batchSize, diff --git a/paddle/gserver/layers/GetOutputLayer.cpp b/paddle/gserver/layers/GetOutputLayer.cpp index b77fdbb30e11b72b0c7de765df173204aa0b6851..4e29efd4612b18e655ba7674a3fd7890ce3f0e79 100644 --- a/paddle/gserver/layers/GetOutputLayer.cpp +++ b/paddle/gserver/layers/GetOutputLayer.cpp @@ -22,17 +22,18 @@ public: ~GetOutputLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { if (!Layer::init(layerMap, parameterMap)) return false; CHECK_EQ(1U, inputLayers_.size()); CHECK_NE(inputArgument_[0], ""); return true; } - void forward(PassType passType) { + void forward(PassType passType) override { output_ = getPrev(0)->getOutput(inputArgument_[0]); } - void backward(const UpdateCallback& callback = nullptr) {} + void backward(const UpdateCallback& callback = nullptr) override {} }; REGISTER_LAYER(get_output, GetOutputLayer); diff --git a/paddle/gserver/layers/GruStepLayer.cpp b/paddle/gserver/layers/GruStepLayer.cpp index 4a1006aa941f396c233a0cecfc38228f1f9fafe1..5b5cb25f9269a30f79d602b342411d0e6bfa429b 100644 --- a/paddle/gserver/layers/GruStepLayer.cpp +++ b/paddle/gserver/layers/GruStepLayer.cpp @@ -55,10 +55,11 @@ public: ~GruStepLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(gru_step, GruStepLayer); diff --git a/paddle/gserver/layers/HierarchicalSigmoidLayer.h b/paddle/gserver/layers/HierarchicalSigmoidLayer.h index 70da3ac126e147387b20c5a97d0116a5a679e044..3f6875fb9f007c0938bfcd7cad99c73b4ba1511b 100644 --- a/paddle/gserver/layers/HierarchicalSigmoidLayer.h +++ b/paddle/gserver/layers/HierarchicalSigmoidLayer.h @@ -61,9 +61,10 @@ class HierarchicalSigmoidLayer : public Layer { public: explicit HierarchicalSigmoidLayer(const LayerConfig& config) : Layer(config) {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; protected: /** diff --git a/paddle/gserver/layers/InterpolationLayer.cpp b/paddle/gserver/layers/InterpolationLayer.cpp index 44fe1fb1fea4203a4a1cac67c581b13adda65966..eac7428571980baf6b2ddb8b2cc85b9c98afa5d6 100644 --- a/paddle/gserver/layers/InterpolationLayer.cpp +++ b/paddle/gserver/layers/InterpolationLayer.cpp @@ -43,10 +43,11 @@ public: ~InterpolationLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(interpolation, InterpolationLayer); diff --git a/paddle/gserver/layers/LstmLayer.h b/paddle/gserver/layers/LstmLayer.h index f49df2c412f05f74da455d41cdf7c9bd4b9ec2e2..c45a52d2e9aaf41a8e02495cc2deae60ab13650a 100644 --- a/paddle/gserver/layers/LstmLayer.h +++ b/paddle/gserver/layers/LstmLayer.h @@ -74,17 +74,18 @@ class LstmLayer : public Layer, public LstmCompute { public: explicit LstmLayer(const LayerConfig &config) : Layer(config) {} - bool init(const LayerMap &layerMap, const ParameterMap ¶meterMap); + bool init(const LayerMap &layerMap, + const ParameterMap ¶meterMap) override; - void forward(PassType passType); + void forward(PassType passType) override; - void backward(const UpdateCallback &callback); + void backward(const UpdateCallback &callback) override; - void resetState(); + void resetState() override; - void setState(LayerStatePtr state); + void setState(LayerStatePtr state) override; - LayerStatePtr getState(); + LayerStatePtr getState() override; protected: /** diff --git a/paddle/gserver/layers/LstmStepLayer.cpp b/paddle/gserver/layers/LstmStepLayer.cpp index 5fc6474b8653f4c7dac284e11d88f803405169a3..568277a90c62c73a811dcbf66782a4bdc4021b81 100644 --- a/paddle/gserver/layers/LstmStepLayer.cpp +++ b/paddle/gserver/layers/LstmStepLayer.cpp @@ -35,10 +35,11 @@ public: ~LstmStepLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(lstm_step, LstmStepLayer); diff --git a/paddle/gserver/layers/MDLstmLayer.cpp b/paddle/gserver/layers/MDLstmLayer.cpp index 88d934d782b549a984f1d7798e54bcc4436ea0cf..be0f2a07d4aae253b7b18dbe406c4b94bf96bc8e 100644 --- a/paddle/gserver/layers/MDLstmLayer.cpp +++ b/paddle/gserver/layers/MDLstmLayer.cpp @@ -181,11 +181,12 @@ class MDLstmLayer : public LstmLayer { public: explicit MDLstmLayer(const LayerConfig& config) : LstmLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); + void forward(PassType passType) override; - void backward(const UpdateCallback& callback); + void backward(const UpdateCallback& callback) override; protected: void forwardOneSequence(int start, CoordIterator& coordIter); diff --git a/paddle/gserver/layers/MaxIdLayer.cpp b/paddle/gserver/layers/MaxIdLayer.cpp index 80555f3f7b324100c059c3356a4a2e462bc6face..9e72b167cd963ae4928bf85503214dd7cee31148 100644 --- a/paddle/gserver/layers/MaxIdLayer.cpp +++ b/paddle/gserver/layers/MaxIdLayer.cpp @@ -30,8 +30,8 @@ private: public: explicit MaxIdLayer(const LayerConfig& config) : Layer(config) {} - virtual bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { bool ret = Layer::init(layerMap, parameterMap); CHECK_EQ(1UL, inputLayers_.size()); @@ -40,7 +40,7 @@ public: return ret; } - virtual void forward(PassType passType) { + void forward(PassType passType) override { Layer::forward(passType); const Argument& input = getInput(0); size_t batchSize = input.getBatchSize(); @@ -54,7 +54,7 @@ public: input.value->rowMax(*output_.ids, *output_.in); } - virtual void backward(const UpdateCallback& callback) {} + void backward(const UpdateCallback& callback) override {} }; REGISTER_LAYER(maxid, MaxIdLayer); diff --git a/paddle/gserver/layers/MaxLayer.h b/paddle/gserver/layers/MaxLayer.h index 472ee0ccca196250f4b81fc1e921aaee5f352b7e..baa58ca2d7a6970f0d2f3ef6f8609404c82efa30 100644 --- a/paddle/gserver/layers/MaxLayer.h +++ b/paddle/gserver/layers/MaxLayer.h @@ -42,14 +42,13 @@ protected: public: explicit MaxLayer(const LayerConfig& config) : SequencePoolLayer(config) {} - ~MaxLayer() {} - - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { return SequencePoolLayer::init(layerMap, parameterMap); } - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/MaxOutLayer.h b/paddle/gserver/layers/MaxOutLayer.h index 59c2245e0d6490d4f8e1b77b1c88267747aaa63a..73fd8536be56b2c620fbfdea1937f3acd593bf05 100644 --- a/paddle/gserver/layers/MaxOutLayer.h +++ b/paddle/gserver/layers/MaxOutLayer.h @@ -45,10 +45,11 @@ public: explicit MaxOutLayer(const LayerConfig& config) : Layer(config) {} virtual ~MaxOutLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/MixedLayer.h b/paddle/gserver/layers/MixedLayer.h index 9655a152c7bc96fb3941fcbd9db4ff71a59e4ebe..755c9deb8b1be34b6f44a7b30b107f99102a3853 100644 --- a/paddle/gserver/layers/MixedLayer.h +++ b/paddle/gserver/layers/MixedLayer.h @@ -35,21 +35,22 @@ public: ~MixedLayer() {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - virtual void prefetch(); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); - virtual void resetState(); + void prefetch() override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + void resetState() override; /** * setState() should be called after getState(). * Argument state consists of all projections states. */ - virtual void setState(LayerStatePtr state); + void setState(LayerStatePtr state) override; /** * Return state which consists of all projections states. */ - virtual LayerStatePtr getState(); + LayerStatePtr getState() override; protected: std::vector> projections_; diff --git a/paddle/gserver/layers/MultiplexLayer.cpp b/paddle/gserver/layers/MultiplexLayer.cpp index d09720c5255747df11d4d7367f67a245e63e6846..297972b3cd9e4dfba94e2597053ab7c7c560c9dd 100644 --- a/paddle/gserver/layers/MultiplexLayer.cpp +++ b/paddle/gserver/layers/MultiplexLayer.cpp @@ -69,10 +69,11 @@ public: ~MultiplexLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; private: /** diff --git a/paddle/gserver/layers/NCELayer.cpp b/paddle/gserver/layers/NCELayer.cpp index 3542e739df8d03470bf2c455b4f3492a7f9e973a..0bc2ef11829337d9b765ef00066289494eb984b3 100644 --- a/paddle/gserver/layers/NCELayer.cpp +++ b/paddle/gserver/layers/NCELayer.cpp @@ -61,7 +61,8 @@ public: rand_(0, config.num_classes() - 1), prepared_(false) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { /* Initialize the basic parent class */ Layer::init(layerMap, parameterMap); @@ -146,7 +147,7 @@ public: prepared_ = true; } - void prefetch() { + void prefetch() override { prepareSamples(); IVector::resizeOrCreate(labelIds_, samples_.size(), useGpu_); int* ids = labelIds_->getData(); @@ -163,7 +164,7 @@ public: } } - void forward(PassType passType) { + void forward(PassType passType) override { Layer::forward(passType); CHECK(!useGpu_) << "GPU is not supported"; @@ -199,7 +200,7 @@ public: forwardCost(); } - void backward(const UpdateCallback& callback) { + void backward(const UpdateCallback& callback) override { Matrix::resizeOrCreate(sampleOut_.grad, 1, samples_.size(), diff --git a/paddle/gserver/layers/NormLayer.h b/paddle/gserver/layers/NormLayer.h index 011bab8fdedab00b336290a245b82de07496b554..e77faaa322570933b3ea2de877b7859857306432 100644 --- a/paddle/gserver/layers/NormLayer.h +++ b/paddle/gserver/layers/NormLayer.h @@ -30,7 +30,8 @@ class NormLayer : public Layer { public: explicit NormLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { Layer::init(layerMap, parameterMap); return true; } @@ -56,9 +57,10 @@ protected: public: explicit ResponseNormLayer(const LayerConfig& config) : NormLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - void forward(PassType passType) { LOG(FATAL) << "Not implemented"; } - void backward(const UpdateCallback& callback = nullptr) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override { LOG(FATAL) << "Not implemented"; } + void backward(const UpdateCallback& callback = nullptr) override { LOG(FATAL) << "Not implemented"; } }; diff --git a/paddle/gserver/layers/NormProjectionLayer.h b/paddle/gserver/layers/NormProjectionLayer.h index 2c0d8a3a718c484508b2bf6d4e7861d54a1682bb..2997ae8848c438fa13037ccf03c1faca9ad73224 100644 --- a/paddle/gserver/layers/NormProjectionLayer.h +++ b/paddle/gserver/layers/NormProjectionLayer.h @@ -36,9 +36,10 @@ public: size_t getSize(); - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; protected: TensorShape shape_; diff --git a/paddle/gserver/layers/OuterProdLayer.cpp b/paddle/gserver/layers/OuterProdLayer.cpp index b606e4436567eb2a8df9fd501a2af8c8aa1d2fdf..283fdb003a2bb9474eac7a379ceb2c02027cfc5f 100644 --- a/paddle/gserver/layers/OuterProdLayer.cpp +++ b/paddle/gserver/layers/OuterProdLayer.cpp @@ -38,10 +38,11 @@ public: ~OuterProdLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(out_prod, OuterProdLayer); diff --git a/paddle/gserver/layers/PadLayer.h b/paddle/gserver/layers/PadLayer.h index 3e3a21a9970370c2bc9c2ac656af776719dfca24..b2bbf28082e630aeb429ee997a1d43ce7ba05d1c 100644 --- a/paddle/gserver/layers/PadLayer.h +++ b/paddle/gserver/layers/PadLayer.h @@ -29,9 +29,10 @@ public: ~PadLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; protected: void setOutDims(const size_t batchSize); diff --git a/paddle/gserver/layers/ParameterReluLayer.h b/paddle/gserver/layers/ParameterReluLayer.h index a82497fc01ca1f63719a905c7545911a7e05289b..9a11b81ebf1f5c06355fc107b00aa69b65148ed5 100644 --- a/paddle/gserver/layers/ParameterReluLayer.h +++ b/paddle/gserver/layers/ParameterReluLayer.h @@ -56,9 +56,10 @@ public: ~ParameterReluLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/PoolLayer.h b/paddle/gserver/layers/PoolLayer.h index 318b89d7c2bce896d183eba8c48c230d962918a5..d43292ad2d4bbe1229ca59ca21bee92c9ec006a3 100644 --- a/paddle/gserver/layers/PoolLayer.h +++ b/paddle/gserver/layers/PoolLayer.h @@ -46,7 +46,8 @@ public: */ static Layer* create(const LayerConfig& config); - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/PoolProjectionLayer.h b/paddle/gserver/layers/PoolProjectionLayer.h index 3dc6af2f0e9fb1a12eca7bc0c531a2e7b151fb8a..e31116de8ccb1f6b847c9fff47961bedfad1a79c 100644 --- a/paddle/gserver/layers/PoolProjectionLayer.h +++ b/paddle/gserver/layers/PoolProjectionLayer.h @@ -40,7 +40,7 @@ public: size_t getSize(); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/PowerLayer.cpp b/paddle/gserver/layers/PowerLayer.cpp index 64fecab5b08354ceea8b290b78eede72d24a98a2..31c34b43e2995a2bf7f4d16629a8172a7e76c8e1 100644 --- a/paddle/gserver/layers/PowerLayer.cpp +++ b/paddle/gserver/layers/PowerLayer.cpp @@ -40,10 +40,11 @@ public: ~PowerLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(power, PowerLayer); diff --git a/paddle/gserver/layers/PrintLayer.cpp b/paddle/gserver/layers/PrintLayer.cpp index ac7f658864fee6812ea89d1dbd84ad4db94e3035..de198af111be4200dd1b240f6de9464e3f43b06d 100644 --- a/paddle/gserver/layers/PrintLayer.cpp +++ b/paddle/gserver/layers/PrintLayer.cpp @@ -19,38 +19,17 @@ namespace paddle { class PrintLayer : public Layer { public: explicit PrintLayer(const LayerConfig& config) : Layer(config) {} - void forward(PassType passType); - void backward(const UpdateCallback& callback) {} -}; -void PrintLayer::forward(PassType passType) { - Layer::forward(passType); - for (size_t i = 0; i != inputLayers_.size(); ++i) { - const auto& argu = getInput(i); - const std::string& name = inputLayers_[i]->getName(); - if (argu.value) { - std::ostringstream os; - argu.value->print(os); - LOG(INFO) << "layer=" << name << " value matrix:\n" << os.str(); - } - if (argu.ids) { - std::ostringstream os; - argu.ids->print(os, argu.ids->getSize()); - LOG(INFO) << "layer=" << name << " ids vector:\n" << os.str(); - } - if (auto startPos = argu.sequenceStartPositions) { - std::ostringstream os; - startPos->getVector(false)->print(os, startPos->getSize()); - LOG(INFO) << "layer=" << name << " sequence pos vector:\n" << os.str(); - } - if (auto subStartPos = argu.subSequenceStartPositions) { - std::ostringstream os; - subStartPos->getVector(false)->print(os, subStartPos->getSize()); - LOG(INFO) << "layer=" << name << " sub-sequence pos vector:\n" - << os.str(); + void forward(PassType passType) override { + Layer::forward(passType); + for (size_t i = 0; i != inputLayers_.size(); ++i) { + getInput(i).printValueString(LOG(INFO), + "layer=" + inputLayers_[i]->getName() + " "); } } -} + + void backward(const UpdateCallback& callback) override {} +}; REGISTER_LAYER(print, PrintLayer); diff --git a/paddle/gserver/layers/PriorBox.cpp b/paddle/gserver/layers/PriorBox.cpp index 36ace7597cd66cc2d83353ec999a75c79dd1e33e..bcf5e912a50fef2cec8ebdf1e0dad9efa43fba2f 100644 --- a/paddle/gserver/layers/PriorBox.cpp +++ b/paddle/gserver/layers/PriorBox.cpp @@ -30,10 +30,11 @@ namespace paddle { class PriorBoxLayer : public Layer { public: explicit PriorBoxLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback) {} + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override {} protected: int numPriors_; diff --git a/paddle/gserver/layers/RecurrentLayer.cpp b/paddle/gserver/layers/RecurrentLayer.cpp index b843fa1265cf3c0ad0814fb90f69e245ee5ab4ad..e4c2b483d2fa4032735858dab17647592791a9c7 100644 --- a/paddle/gserver/layers/RecurrentLayer.cpp +++ b/paddle/gserver/layers/RecurrentLayer.cpp @@ -45,17 +45,18 @@ class RecurrentLayer : public Layer { public: explicit RecurrentLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); + void forward(PassType passType) override; - void backward(const UpdateCallback& callback); + void backward(const UpdateCallback& callback) override; - void resetState(); + void resetState() override; - void setState(LayerStatePtr state); + void setState(LayerStatePtr state) override; - LayerStatePtr getState(); + LayerStatePtr getState() override; protected: /** diff --git a/paddle/gserver/layers/RecurrentLayerGroup.cpp b/paddle/gserver/layers/RecurrentLayerGroup.cpp index af8dd61d84e2e53ca26dc054d0516e62ab7aa216..78a74ff19a38cd205f3a46900bf716e2e1b1e4d5 100644 --- a/paddle/gserver/layers/RecurrentLayerGroup.cpp +++ b/paddle/gserver/layers/RecurrentLayerGroup.cpp @@ -33,15 +33,15 @@ public: void initSubNetwork(NeuralNetwork* rootNetwork, const ModelConfig& config, const std::vector& parameterTypes, - bool useGpu); + bool useGpu) override; - void forward(PassType passType) { + void forward(PassType passType) override { REGISTER_TIMER_INFO("RecurrentGroupFwTime", getName().c_str()); const std::vector inArgs; std::vector outArgs; network_->forward(inArgs, &outArgs, passType); } - void backward(const UpdateCallback& callback) { + void backward(const UpdateCallback& callback) override { REGISTER_TIMER_INFO("RecurrentGroupBwTime", getName().c_str()); network_->backward(nullptr); @@ -53,7 +53,8 @@ public: /** * @see Layer.accessSubNetwork */ - void accessSubNetwork(const std::function& callback) { + void accessSubNetwork( + const std::function& callback) override { callback(*network_); } diff --git a/paddle/gserver/layers/ResizeLayer.cpp b/paddle/gserver/layers/ResizeLayer.cpp index 7fcb3adea01b9d16394ee90b751b10902dc3a190..eb3b63c106901f89dd75cc2a495477b240d40e3c 100644 --- a/paddle/gserver/layers/ResizeLayer.cpp +++ b/paddle/gserver/layers/ResizeLayer.cpp @@ -20,18 +20,19 @@ namespace paddle { /** * @brief A layer for resizing a minibatch matrix h*w to h'*w' * @note - * origin matrix height * witdth) + * origin matrix height * width) * resize matrix: (height * width / size) * size */ class ResizeLayer : public Layer { public: explicit ResizeLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); + void forward(PassType passType) override; - void backward(const UpdateCallback& callback); + void backward(const UpdateCallback& callback) override; }; REGISTER_LAYER(resize, ResizeLayer); diff --git a/paddle/gserver/layers/SamplingIdLayer.cpp b/paddle/gserver/layers/SamplingIdLayer.cpp index 59ff5d41b529099277375cd5e1b498f3331c3b0a..2538d99bb71fa1ce6546730b817a49347fe3c5d8 100644 --- a/paddle/gserver/layers/SamplingIdLayer.cpp +++ b/paddle/gserver/layers/SamplingIdLayer.cpp @@ -35,8 +35,8 @@ public: explicit SamplingIdLayer(const LayerConfig& config) : Layer(config), rand1_(0, 1) {} - virtual bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override { bool ret = Layer::init(layerMap, parameterMap); CHECK_EQ(1UL, inputLayers_.size()); if (useGpu_) { @@ -48,7 +48,7 @@ public: return ret; } - void forward(PassType passType) { + void forward(PassType passType) override { Layer::forward(passType); if (useGpu_) { for (size_t i = 0; i < inputLayers_.size(); i++) { @@ -83,7 +83,7 @@ public: output_.ids->copyFrom(ids.data(), batchSize); } - virtual void backward(const UpdateCallback& callback) {} + void backward(const UpdateCallback& callback) override {} }; REGISTER_LAYER(sampling_id, SamplingIdLayer); diff --git a/paddle/gserver/layers/ScalingLayer.cpp b/paddle/gserver/layers/ScalingLayer.cpp index 7f0084be6b57f5ce8245609e64c744c1a049a925..a38ee0857a767981eb24e79e96bf6115e9c63720 100644 --- a/paddle/gserver/layers/ScalingLayer.cpp +++ b/paddle/gserver/layers/ScalingLayer.cpp @@ -37,10 +37,11 @@ public: ~ScalingLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(scaling, ScalingLayer); diff --git a/paddle/gserver/layers/SelectiveFullyConnectedLayer.h b/paddle/gserver/layers/SelectiveFullyConnectedLayer.h index bdf9a4652cc71710d1d33e8b085c5aec28f6f806..99126fdba542bd142341039af27c3af72b391ca7 100644 --- a/paddle/gserver/layers/SelectiveFullyConnectedLayer.h +++ b/paddle/gserver/layers/SelectiveFullyConnectedLayer.h @@ -65,9 +65,10 @@ public: : Layer(config), selCols_(nullptr) {} ~SelectiveFullyConnectedLayer() {} - void prefetch(); + void prefetch() override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; Weight& getWeight(int idx) { return *weights_[idx]; } @@ -90,8 +91,8 @@ public: void fillSelectiveData( const std::shared_ptr>>& candidates); - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; private: /** diff --git a/paddle/gserver/layers/SequenceConcatLayer.cpp b/paddle/gserver/layers/SequenceConcatLayer.cpp index 069bc26e602ff7d925b4115d12388b6716676b29..4b24d8f0c852e1bdc887d4ee1465b9ad05d210bb 100644 --- a/paddle/gserver/layers/SequenceConcatLayer.cpp +++ b/paddle/gserver/layers/SequenceConcatLayer.cpp @@ -21,9 +21,11 @@ namespace paddle { /** * A layer for concatenating the first sequence with the second sequence - * following the first - * Input: two sequences each containing some instances + * Input: two sequences each containing the same number of instances + * seq1 = [a1, a2, ..., an] + * seq2 = [b1, b2, ..., bn] * Output: a concatenated sequence of the two input sequences + * out = [a1, b1, a2, b2, ..., an, bn] */ class SequenceConcatLayer : public Layer { @@ -35,10 +37,11 @@ public: ~SequenceConcatLayer() {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(seqconcat, SequenceConcatLayer); @@ -167,13 +170,17 @@ void SequenceConcatLayer::backward(const UpdateCallback& callback) { size_t rightNumIns = 0; for (size_t seqId = 0; seqId < numSequences1; ++seqId) { leftNumIns = starts1[seqId + 1] - starts1[seqId]; - inputGrad1->subMatrix(starts1[seqId], leftNumIns) - ->add(*(outputGrad->subMatrix(offset, leftNumIns))); + if (inputGrad1) { + inputGrad1->subMatrix(starts1[seqId], leftNumIns) + ->add(*(outputGrad->subMatrix(offset, leftNumIns))); + } offset += leftNumIns; rightNumIns = starts2[seqId + 1] - starts2[seqId]; - inputGrad2->subMatrix(starts2[seqId], rightNumIns) - ->add(*(outputGrad->subMatrix(offset, rightNumIns))); + if (inputGrad2) { + inputGrad2->subMatrix(starts2[seqId], rightNumIns) + ->add(*(outputGrad->subMatrix(offset, rightNumIns))); + } offset += rightNumIns; } } diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp index 4bfce766c769f4be2e5cc7bf691d539b1d307a47..7a13cd7ad0fecf202613d8da365ea832b41ab04e 100644 --- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp +++ b/paddle/gserver/layers/SequenceLastInstanceLayer.cpp @@ -42,12 +42,11 @@ public: explicit SequenceLastInstanceLayer(const LayerConfig& config) : SequencePoolLayer(config) {} - ~SequenceLastInstanceLayer() {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(seqlastins, SequenceLastInstanceLayer); diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/gserver/layers/SequencePoolLayer.h index aa9c132586e55d0f6bccec1689db60145ca2d43f..85b51ccd1dc7e7eb7aa9344b0f7ec6f70a35a0b4 100644 --- a/paddle/gserver/layers/SequencePoolLayer.h +++ b/paddle/gserver/layers/SequencePoolLayer.h @@ -46,12 +46,11 @@ protected: public: explicit SequencePoolLayer(const LayerConfig& config) : Layer(config) {} - virtual ~SequencePoolLayer() {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/SequenceReshapeLayer.cpp b/paddle/gserver/layers/SequenceReshapeLayer.cpp index 23924b0490851ad3c3c74d77e7abd8b0af8fc234..433592953b220eda4db4634124a57a2074cef4c0 100644 --- a/paddle/gserver/layers/SequenceReshapeLayer.cpp +++ b/paddle/gserver/layers/SequenceReshapeLayer.cpp @@ -20,9 +20,12 @@ limitations under the License. */ namespace paddle { /** - * A layer for reshaping the sequence - * Input: a sequence - * Output: a sequence + * A layer for reshaping the sequence. Assume the input sequence has + * T instances, the dimension of each instance is M, and the input + * reshape_dim is N, then the output sequence has T*M/N instances, + * the dimension of each instance is N. + * + * Note that T*M/N must be an integer. */ class SequenceReshapeLayer : public Layer { @@ -34,12 +37,11 @@ protected: public: explicit SequenceReshapeLayer(const LayerConfig& config) : Layer(config) {} - ~SequenceReshapeLayer() {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(seqreshape, SequenceReshapeLayer); diff --git a/paddle/gserver/layers/SlopeInterceptLayer.cpp b/paddle/gserver/layers/SlopeInterceptLayer.cpp index b678f414b6d76fa26818cb379fb0f0fb8fc7ec09..faf98744a7fdcf9c2c1712d783f153739ccc8eca 100644 --- a/paddle/gserver/layers/SlopeInterceptLayer.cpp +++ b/paddle/gserver/layers/SlopeInterceptLayer.cpp @@ -39,12 +39,11 @@ class SlopeInterceptLayer : public Layer { public: explicit SlopeInterceptLayer(const LayerConfig& config) : Layer(config) {} - ~SlopeInterceptLayer() {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(slope_intercept, SlopeInterceptLayer); diff --git a/paddle/gserver/layers/SpatialPyramidPoolLayer.h b/paddle/gserver/layers/SpatialPyramidPoolLayer.h index 32e88cf141a667d9dffbe7dcba46e9fde721f9e7..7d3cb80443801a947e3d529beb002561c4ac1964 100644 --- a/paddle/gserver/layers/SpatialPyramidPoolLayer.h +++ b/paddle/gserver/layers/SpatialPyramidPoolLayer.h @@ -43,9 +43,8 @@ protected: public: explicit SpatialPyramidPoolLayer(const LayerConfig& config) : Layer(config) {} - ~SpatialPyramidPoolLayer() {} - - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; ProjectionConfig getConfig(size_t sizeX_, size_t sizeY_, @@ -54,7 +53,7 @@ public: std::string& poolType_); size_t getSize(); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/SubSequenceLayer.cpp b/paddle/gserver/layers/SubSequenceLayer.cpp index c52fbee26232ad6eb09f84315a57c73e6aa02eb0..19b7ad1869af98e6313fe85a40203fd1e84f31d6 100644 --- a/paddle/gserver/layers/SubSequenceLayer.cpp +++ b/paddle/gserver/layers/SubSequenceLayer.cpp @@ -35,12 +35,11 @@ protected: public: explicit SubSequenceLayer(const LayerConfig& config) : Layer(config) {} - ~SubSequenceLayer() {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(subseq, SubSequenceLayer); diff --git a/paddle/gserver/layers/SumToOneNormLayer.cpp b/paddle/gserver/layers/SumToOneNormLayer.cpp index aa99b49380d3682ccf3d89220c0c68f22e458271..00f8519550bcff9bb706b1a28dc0dfcdc06cc54a 100644 --- a/paddle/gserver/layers/SumToOneNormLayer.cpp +++ b/paddle/gserver/layers/SumToOneNormLayer.cpp @@ -41,12 +41,11 @@ protected: public: explicit SumToOneNormLayer(const LayerConfig& config) : Layer(config) {} - ~SumToOneNormLayer() {} + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; REGISTER_LAYER(sum_to_one_norm, SumToOneNormLayer); diff --git a/paddle/gserver/layers/TensorLayer.h b/paddle/gserver/layers/TensorLayer.h index ac38ffb620570320497446a6825ca2273b73facc..43992f692d3ce40fa095c8e0190bae01dc2ac3c1 100644 --- a/paddle/gserver/layers/TensorLayer.h +++ b/paddle/gserver/layers/TensorLayer.h @@ -44,13 +44,12 @@ protected: public: explicit TensorLayer(const LayerConfig& config) : Layer(config) {} - ~TensorLayer() {} - - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; Weight& getWeight(int idx) { return *weights_[idx]; } - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/TransLayer.h b/paddle/gserver/layers/TransLayer.h index 1d5a370b3414bf96076532858ff822080f40a2f5..be10bb74f6b218f0b12dc9f20db9a6ee8af7a478 100644 --- a/paddle/gserver/layers/TransLayer.h +++ b/paddle/gserver/layers/TransLayer.h @@ -32,9 +32,10 @@ class TransLayer : public Layer { public: explicit TransLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void forward(PassType passType); - void backward(const UpdateCallback& callback = nullptr); + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; }; } // namespace paddle diff --git a/paddle/gserver/layers/ValidationLayer.h b/paddle/gserver/layers/ValidationLayer.h index 4c1de7b3b7d6975c2693eb065f7d3e19cc51a95c..c8b2634a1366ed03846f2331726d04232b5d32ee 100644 --- a/paddle/gserver/layers/ValidationLayer.h +++ b/paddle/gserver/layers/ValidationLayer.h @@ -26,7 +26,8 @@ class ValidationLayer : public Layer { public: explicit ValidationLayer(const LayerConfig& config) : Layer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; LayerPtr getOutputLayer() { return inputLayers_[0]; } @@ -37,13 +38,13 @@ public: return inputLayers_[2]; } - virtual void forward(PassType passType); + void forward(PassType passType) override; - virtual void backward(const UpdateCallback& callback = nullptr); + void backward(const UpdateCallback& callback = nullptr) override; virtual void validationImp(MatrixPtr outputValue, IVectorPtr label) = 0; - virtual void onPassEnd() = 0; + void onPassEnd() override = 0; }; /* @@ -57,11 +58,12 @@ public: cpuLabel_(nullptr), cpuWeight_(nullptr) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void validationImp(MatrixPtr outputValue, IVectorPtr label); + void validationImp(MatrixPtr outputValue, IVectorPtr label) override; - void onPassEnd(); + void onPassEnd() override; struct PredictionResult { PredictionResult(real __out, int __label) : out(__out), label(__label) {} @@ -86,11 +88,12 @@ public: explicit PnpairValidation(const LayerConfig& config) : ValidationLayer(config) {} - bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; - void validationImp(MatrixPtr outputValue, IVectorPtr label); + void validationImp(MatrixPtr outputValue, IVectorPtr label) override; - void onPassEnd(); + void onPassEnd() override; private: bool passBegin_; diff --git a/paddle/gserver/layers/WarpCTCLayer.h b/paddle/gserver/layers/WarpCTCLayer.h index 3d9ae9249af66dd085f5b6bb7a3c09d8b2276a24..7e8d7379d267886805db2eb7983a4dabbf949914 100644 --- a/paddle/gserver/layers/WarpCTCLayer.h +++ b/paddle/gserver/layers/WarpCTCLayer.h @@ -30,9 +30,10 @@ public: explicit WarpCTCLayer(const LayerConfig& config) : Layer(config) {} ~WarpCTCLayer() {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); - virtual void forward(PassType passType); - virtual void backward(const UpdateCallback& callback); + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + void forward(PassType passType) override; + void backward(const UpdateCallback& callback) override; protected: /** diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index a8b53e2105b053399e62fba5321fd22c1fe4a50d..1964b2f8bfaebc49fe3073e03c949a8a9c3e385a 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -941,59 +941,6 @@ void GpuMatrix::softreluDerivative(Matrix& output) { void GpuMatrix::scaledTanh(Matrix& output, real p1, real p2) { BaseMatrix::scaledTanh(output, p1, p2); } -void GpuMatrix::cosSim(Matrix& output1, Matrix& output2, real scale) { - CHECK(output1.useGpu_ == true && output2.useGpu_ == true) - << "Matrix type are not equal"; - size_t numSamples = getHeight(); - size_t dim = output1.getWidth(); - CHECK_EQ(getWidth(), 1UL); - CHECK_EQ(output1.getHeight(), numSamples); - CHECK_EQ(output1.getWidth(), output2.getWidth()); - real* out = getData(); - real* x = output1.getData(); - real* y = output2.getData(); - hl_cossim(out, x, y, dim, output1.getHeight(), output2.getHeight(), scale); -} -void GpuMatrix::cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale) { - CHECK(output.useGpu_ == true && prevOut1.useGpu_ == true && - prevOut2.useGpu_ == true && prevGrad1.useGpu_ == true && - prevGrad2.useGpu_ == true) - << "Matrix type are not equal"; - CHECK_EQ(getWidth(), 1UL); - CHECK_EQ(output.getWidth(), 1UL); - - size_t numSamples = getHeight(); - CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(prevOut1.getHeight(), numSamples); - CHECK_EQ(prevGrad1.getHeight(), numSamples); - - size_t dim = prevOut1.getWidth(); - CHECK_EQ(prevOut2.getWidth(), dim); - CHECK_EQ(prevGrad1.getWidth(), dim); - CHECK_EQ(prevGrad2.getWidth(), dim); - - real* grad = getData(); - real* out = output.getData(); - real* prevOutX = prevOut1.getData(); - real* prevOutY = prevOut2.getData(); - real* prevGradX = prevGrad1.getData(); - real* prevGradY = prevGrad2.getData(); - hl_cossim_derivative(grad, - out, - prevOutX, - prevOutY, - prevGradX, - prevGradY, - dim, - prevOut1.getHeight(), - prevOut2.getHeight(), - scale); -} void GpuMatrix::randomizeUniform() { CHECK(isContiguous()); @@ -3470,105 +3417,6 @@ void CpuMatrix::softmaxDerivative(Matrix& output, Matrix& sftmaxSum) { } } -void CpuMatrix::cosSim(Matrix& output1, Matrix& output2, real scale) { - size_t numSamples = getHeight(); - size_t dim = output1.getWidth(); - CHECK_EQ(getWidth(), 1UL); - CHECK_EQ(output1.getHeight(), numSamples); - CHECK_EQ(output1.getWidth(), output2.getWidth()); - - real* out = getData(); - const real* x = output1.getData(); - const real* y = output2.getData(); - size_t yInc = dim; - if (output2.getHeight() == 1LU) { - yInc = 0; - } else { - CHECK_EQ(output2.getHeight(), numSamples); - } - for (size_t i = 0; i < numSamples; ++i, x += dim, y += yInc) { - real squareSumX = 0; - real squareSumY = 0; - real xy = 0; - for (size_t j = 0; j < dim; ++j) { - squareSumX += _square(x[j]); - squareSumY += _square(y[j]); - xy += x[j] * y[j]; - } - CHECK(squareSumX > 0 && squareSumY > 0); - out[i] = scale * xy / (std::sqrt(squareSumX) * std::sqrt(squareSumY)); - } -} - -void CpuMatrix::cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale) { - CHECK(output.useGpu_ == false) << "Matrix type are not equal"; - - CHECK_EQ(getWidth(), 1UL); - CHECK_EQ(output.getWidth(), 1UL); - - size_t numSamples = getHeight(); - CHECK_EQ(output.getHeight(), numSamples); - CHECK_EQ(prevOut1.getHeight(), numSamples); - CHECK_EQ(prevGrad1.getHeight(), numSamples); - - size_t dim = prevOut1.getWidth(); - CHECK_EQ(prevOut2.getWidth(), dim); - CHECK_EQ(prevGrad1.getWidth(), dim); - CHECK_EQ(prevGrad2.getWidth(), dim); - - const real* grad = getData(); - const real* out = output.getData(); - const real* prevOutX = prevOut1.getData(); - const real* prevOutY = prevOut2.getData(); - real* prevGradX = prevGrad1.getData(); - real* prevGradY = prevGrad2.getData(); - size_t yInc = dim; - if (prevOut2.getHeight() == 1LU) { - yInc = 0; - CHECK_EQ(prevGrad2.getHeight(), 1LU); - } else { - CHECK_EQ(prevOut2.getHeight(), numSamples); - CHECK_EQ(prevGrad2.getHeight(), numSamples); - } - for (size_t i = 0; i < numSamples; ++i, - prevOutX += dim, - prevOutY += yInc, - prevGradX += dim, - prevGradY += yInc) { - real squareSumX = 0; - real squareSumY = 0; - real xy = 0; - for (size_t j = 0; j < dim; ++j) { - squareSumX += _square(prevOutX[j]); - squareSumY += _square(prevOutY[j]); - xy += prevOutX[j] * prevOutY[j]; - } - CHECK(squareSumX > 0 && squareSumY > 0); - if (xy == 0) { - real reciprocal = 1.0f / (std::sqrt(squareSumX) * std::sqrt(squareSumY)); - for (size_t j = 0; j < dim; ++j) { - prevGradX[j] += scale * grad[i] * prevOutY[j] * reciprocal; - prevGradY[j] += scale * grad[i] * prevOutX[j] * reciprocal; - } - } else { - real reciprocalXY = 1.0f / xy; - real reciprocalSquareSumX = 1.0f / squareSumX; - real reciprocalSquareSumY = 1.0f / squareSumY; - for (size_t j = 0; j < dim; ++j) { - prevGradX[j] += out[i] * grad[i] * (prevOutY[j] * reciprocalXY - - prevOutX[j] * reciprocalSquareSumX); - prevGradY[j] += out[i] * grad[i] * (prevOutX[j] * reciprocalXY - - prevOutY[j] * reciprocalSquareSumY); - } - } - } -} - void CpuMatrix::sumOfSquares(Matrix& output, Matrix& label) { CHECK(output.useGpu_ == false && label.useGpu_ == false) << "Matrix type are not equal"; diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index c92c0a272d5a72868bd61035d77aa4ed0fad7a7c..ea4bbb86b057b526c5ea294b2cd835aef65de58d 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -799,26 +799,6 @@ public: LOG(FATAL) << "Not implemented"; } - /** - * cosine similarity, for each row i, - * this[i] = cos(output1[i], output2[i]) - * - * output2 can only have one row, then for each row i, - * this[i] = cos(output1[i], output2[0]) - */ - virtual void cosSim(Matrix& output1, Matrix& output2, real scale = 1.0f) { - LOG(FATAL) << "Not implemented"; - } - - virtual void cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale = 1.0f) { - LOG(FATAL) << "Not implemented"; - } - /// print out the values of elements to os virtual void print(std::ostream& os) const { LOG(FATAL) << "Not implemented"; @@ -1324,14 +1304,6 @@ public: void softreluDerivative(Matrix& output); void scaledTanh(Matrix& output, real p1, real p2); - void cosSim(Matrix& output1, Matrix& output2, real scale); - void cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale); - virtual void print(std::ostream& os) const; virtual void print(std::ostream& os, size_t height, size_t width) const; @@ -1752,14 +1724,6 @@ public: void softreluDerivative(Matrix& output); void scaledTanh(Matrix& output, real p1, real p2); - void cosSim(Matrix& output1, Matrix& output2, real scale); - void cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale); - void print(std::ostream& os) const; void print(std::ostream& os, size_t height, size_t width) const; void printOneRow(std::ostream& os, size_t idx) const; diff --git a/paddle/math/tests/test_Matrix.cpp b/paddle/math/tests/test_Matrix.cpp index a4084bdf7c6953651bfd9714fd8a5c930f774fe6..1c21da5b76e95603258a5006d0c57b00126e65b9 100644 --- a/paddle/math/tests/test_Matrix.cpp +++ b/paddle/math/tests/test_Matrix.cpp @@ -181,28 +181,6 @@ TEST(Matrix, copyByRowIndex) { } } -void testCosSim(int heightX, int heightY, int width, real scale) { - AutoCompare test(heightX, 1); - CpuMatrix arg1(heightX, width); - CpuMatrix arg2(heightY, width); - arg1.randomizeUniform(); - arg2.randomizeUniform(); - arg2.add(-0.5); - test.cmpWithArg(&Matrix::cosSim, arg1, arg2, scale); -} - -TEST(Matrix, cosSim) { - for (auto heightX : {10, 100, 1000}) { - for (auto heightY : {1, heightX}) { - for (auto width : {10, 100, 1000}) { - for (auto scale : {1.0, 2.0}) { - testCosSim(heightX, heightY, width, scale); - } - } - } - } -} - void testParamReluForward(int height, int width, int w_height, int w_width) { AutoCompare test(height, width); CpuMatrix arg1(height, width); diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index e024f2cf1b913f56301ac7b3380f0c382818f413..6caaea443c1df756bfeb775154e8a90400cc3211 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -720,61 +720,6 @@ TEST(Matrix, sequenceAvgForward) { } } -void testCosSimDerivate(int heightX, int heightY, int width, real scale) { - MatrixPtr prevOutX = CpuMatrix::create(heightX, width, false, false); - MatrixPtr prevOutY = CpuMatrix::create(heightY, width, false, false); - MatrixPtr grad = CpuMatrix::create(heightX, 1, false, false); - MatrixPtr output = CpuMatrix::create(heightX, 1, false, false); - MatrixPtr prevGradX = CpuMatrix::create(heightX, width, false, false); - MatrixPtr prevGradY = CpuMatrix::create(heightY, width, false, false); - - prevOutX->randomizeUniform(); - prevOutY->randomizeUniform(); - grad->randomizeUniform(); - output->randomizeUniform(); - prevGradX->randomizeUniform(); - prevGradY->randomizeUniform(); - - MatrixPtr prevOutXGpu = GpuMatrix::create(heightX, width, false, true); - MatrixPtr prevOutYGpu = GpuMatrix::create(heightY, width, false, true); - MatrixPtr gradGpu = GpuMatrix::create(heightX, 1, false, true); - MatrixPtr outputGpu = GpuMatrix::create(heightX, 1, false, true); - MatrixPtr prevGradXGpu = GpuMatrix::create(heightX, width, false, true); - MatrixPtr prevGradYGpu = GpuMatrix::create(heightY, width, false, true); - - prevOutXGpu->copyFrom(*prevOutX); - prevOutYGpu->copyFrom(*prevOutY); - gradGpu->copyFrom(*grad); - outputGpu->copyFrom(*output); - prevGradXGpu->copyFrom(*prevGradX); - prevGradYGpu->copyFrom(*prevGradY); - - grad->cosSimDerivative( - *output, *prevOutX, *prevOutY, *prevGradX, *prevGradY, scale); - - gradGpu->cosSimDerivative(*outputGpu, - *prevOutXGpu, - *prevOutYGpu, - *prevGradXGpu, - *prevGradYGpu, - scale); - - TensorCheckErr(*prevGradX, *prevGradXGpu); - TensorCheckErr(*prevGradY, *prevGradYGpu); -} - -TEST(Matrix, cosSimDerivate) { - for (auto heightX : {1, 10, 100}) { - for (auto heightY : {1, heightX}) { - for (auto width : {1, 10, 100}) { - for (auto scale : {1.0, 2.0}) { - testCosSimDerivate(heightX, heightY, width, scale); - } - } - } - } -} - void testParamReluBackwardDiff(int height, int width, int w_height, diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 65d01a15718ae2bebd4869eff0e5407524bc0e7c..7a343cca33f5b420be6192231ac73ca1c2da5fb9 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -602,6 +602,44 @@ void Argument::degradeSequence(const Argument& input, bool useGpu) { tgtBuf[numSequences] = numSubSequences; } +void Argument::getValueString( + std::unordered_map* out) const { + if (value) { + std::ostringstream os; + value->print(os); + out->insert({"value", os.str()}); + } + if (ids) { + std::ostringstream os; + ids->print(os, ids->getSize()); + out->insert({"ids", os.str()}); + } + if (sequenceStartPositions) { + std::ostringstream os; + sequenceStartPositions->getVector(false)->print( + os, sequenceStartPositions->getSize()); + out->insert({"sequence pos", os.str()}); + } + if (subSequenceStartPositions) { + std::ostringstream os; + subSequenceStartPositions->getVector(false)->print( + os, subSequenceStartPositions->getSize()); + out->insert({"sub-sequence pos", os.str()}); + } +} + +void Argument::printValueString(std::ostream& stream, + const std::string& prefix) const { + std::unordered_map out; + getValueString(&out); + for (auto field : {"value", "id", "sequence pos", "sub-sequence pos"}) { + auto it = out.find(field); + if (it != out.end()) { + stream << prefix << field << ":\n" << it->second; + } + } +} + void Argument::subArgFrom(const Argument& input, size_t offset, size_t height, diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index afd2de0202bf0f14ec3d4c5b856455a3488e41f6..178c068b93ac5fc1e06200984f14da86069cf7e4 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -297,6 +297,23 @@ struct Argument { sequence has sub-sequence degrades to a sequence. */ void degradeSequence(const Argument& input, bool useGpu); + + /** + * @brief getValueString will return the argument's output in string. There + * are several kinds of output. The keys of output dictionary are 'value', + * 'id', 'sequence pos', 'sub-sequence pos'. + * @param out [out]: the return values. + */ + void getValueString(std::unordered_map* out) const; + + /** + * @brief printValueString will print the argument's output in order of + * 'value', 'id', 'sequence pos', 'sub-sequence pos'. + * @param stream: Output stream + * @param prefix: line prefix for printing. + */ + void printValueString(std::ostream& stream, + const std::string& prefix = "") const; }; } // namespace paddle diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp index 220aac1ff11e0ff263df8459f539237944b94c81..dbab4ec43ca2fa691445131d2cb14f51721a2e4c 100644 --- a/paddle/utils/Util.cpp +++ b/paddle/utils/Util.cpp @@ -289,6 +289,7 @@ void mkDir(const char* filename) { void mkDirRecursively(const char* dir) { struct stat sb; + if (*dir == 0) return; // empty string if (!stat(dir, &sb)) return; mkDirRecursively(path::dirname(dir).c_str()); diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index ee7a5bff84ca96ef1010fa7430356722f807fb0f..357637e20346f8e1179d3a28ff580722cdfcccff 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -24,6 +24,7 @@ add_custom_target(paddle_python ALL DEPENDS ${OUTPUT_DIR}/.timestamp) add_subdirectory(paddle/trainer_config_helpers/tests) +add_subdirectory(paddle/reader/tests) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/dist/ DESTINATION opt/paddle/share/wheels diff --git a/python/paddle/reader/__init__.py b/python/paddle/reader/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..493b410e8299ebe167be43ead1401a6ab245a631 --- /dev/null +++ b/python/paddle/reader/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# It would be too lengthy to require our users to prefix decorators with `decorator`. +# For example, we want the following line +# +# r = paddle.reader.decorator.bufferd(paddle.reader.creator.text("hello.txt")) +# +# to be a shorter version: +# +# r = paddle.reader.buffered(paddle.reader.creator.text("hello.txt")) +from decorator import * diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py new file mode 100644 index 0000000000000000000000000000000000000000..f0ddb0ff812b15ede21e6965c7c8857f12716fa0 --- /dev/null +++ b/python/paddle/reader/decorator.py @@ -0,0 +1,60 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ['buffered'] + +from Queue import Queue +from threading import Thread + + +def buffered(reader, size): + """Creates a buffered data reader. + + The buffered data reader will read and save data entries into a buffer. + Reading from the buffered data reader will proceed as long as the buffer + is not empty. + + Args: + reader: the data reader to read from. + size: max buffer size. + + Returns: + The buffered data reader. + """ + + class EndSignal(): + pass + + end = EndSignal() + + def read_worker(r, q): + for d in r: + q.put(d) + q.put(end) + + def create_reader(): + r = reader() + q = Queue(maxsize=size) + t = Thread( + target=read_worker, args=( + r, + q, )) + t.daemon = True + t.start() + e = q.get() + while e != end: + yield e + e = q.get() + + return create_reader diff --git a/python/paddle/reader/tests/CMakeLists.txt b/python/paddle/reader/tests/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..502c897d8946a838847c1c23b1236358c58c088e --- /dev/null +++ b/python/paddle/reader/tests/CMakeLists.txt @@ -0,0 +1,4 @@ +add_test(NAME reader_decorator_test + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/reader/tests/decorator_test.py + WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) diff --git a/python/paddle/reader/tests/decorator_test.py b/python/paddle/reader/tests/decorator_test.py new file mode 100644 index 0000000000000000000000000000000000000000..879d1d9c1d0e0650d347b5c44e36771a0c15390e --- /dev/null +++ b/python/paddle/reader/tests/decorator_test.py @@ -0,0 +1,50 @@ +# Copyright PaddlePaddle contributors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +import paddle.reader +import time + + +def reader_10(dur): + for i in range(10): + time.sleep(dur) + yield i + + +class TestBuffered(unittest.TestCase): + def test_read(self): + for size in range(20): + b = paddle.reader.buffered(lambda: reader_10(0), size) + c = 0 + for i in b(): + self.assertEqual(i, c) + c += 1 + self.assertEqual(c, 10) + + def test_buffering(self): + # read have 30ms delay. + b = paddle.reader.buffered(lambda: reader_10(0.03), 10) + last_time = time.time() + for idx, i in enumerate(b()): + elapsed_time = time.time() - last_time + if i == 0: + time.sleep(0.3) + else: + # read time should be short, meaning already buffered. + self.assertLess(elapsed_time, 0.01) + last_time = time.time() + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index b02af991dc577e070dac813cfd18d35ab6dfc3e0..d403a6029a3e9d4c41b80a2206397dcdfe780026 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -893,11 +893,11 @@ class MaxOut(Cfg): self.add_keys(locals()) -def DataBase(async_load_data=False, - constant_slots=None, - data_ratio=1, - is_main_data=True, - usage_ratio=None): +def create_data_config_proto(async_load_data=False, + constant_slots=None, + data_ratio=1, + is_main_data=True, + usage_ratio=None): # default: all sub dataproviders are treat as "main data". # see proto/DataConfig.proto for is_main_data data_config = DataConfig() @@ -923,7 +923,7 @@ def SimpleData(files=None, context_len=None, buffer_capacity=None, **xargs): - data_config = DataBase(**xargs) + data_config = create_data_config_proto(**xargs) data_config.type = 'simple' data_config.files = files data_config.feat_dim = feat_dim @@ -945,7 +945,7 @@ def PyData(files=None, constant_slots=None, load_thread_num=None, **xargs): - data_config = DataBase(**xargs) + data_config = create_data_config_proto(**xargs) data_config.type = 'py' if load_data_module in g_py_module_name_list: @@ -996,7 +996,7 @@ def ProtoData(files=None, constant_slots=None, load_thread_num=None, **xargs): - data_config = DataBase(**xargs) + data_config = create_data_config_proto(**xargs) if type is None: data_config.type = 'proto' else: @@ -1035,7 +1035,7 @@ def Data(type, buffer_capacity=None, **xargs): - data_config = DataBase(**xargs) + data_config = create_data_config_proto(**xargs) data_config.type = type data_config.files = files data_config.feat_dim = feat_dim diff --git a/python/paddle/trainer_config_helpers/data_sources.py b/python/paddle/trainer_config_helpers/data_sources.py index 622b4fc25ccff397cd3115db316870f328466fba..ab9a2562dcccb394c0b24741ceeb10061e40cb9a 100644 --- a/python/paddle/trainer_config_helpers/data_sources.py +++ b/python/paddle/trainer_config_helpers/data_sources.py @@ -58,8 +58,8 @@ def define_py_data_source(file_list, :param obj: python object name. May be a function name if using PyDataProviderWrapper. :type obj: basestring - :param args: The best practice is using dict to pass arguments into - DataProvider, and use :code:`@init_hook_wrapper` to + :param args: The best practice is using dict to pass arguments into + DataProvider, and use :code:`@init_hook_wrapper` to receive arguments. :type args: string or picklable object :param async: Load Data asynchronously or not. @@ -98,7 +98,7 @@ def define_py_data_sources(train_list, The annotation is almost the same as define_py_data_sources2, except that it can specific train_async and data_cls. - :param data_cls: + :param data_cls: :param train_list: Train list name. :type train_list: basestring :param test_list: Test list name. @@ -111,8 +111,8 @@ def define_py_data_sources(train_list, a tuple or list to this argument. :type obj: basestring or tuple or list :param args: The best practice is using dict() to pass arguments into - DataProvider, and use :code:`@init_hook_wrapper` to receive - arguments. If train and test is different, then pass a tuple + DataProvider, and use :code:`@init_hook_wrapper` to receive + arguments. If train and test is different, then pass a tuple or list to this argument. :type args: string or picklable object or list or tuple. :param train_async: Is training data load asynchronously or not. @@ -163,12 +163,12 @@ def define_py_data_sources2(train_list, test_list, module, obj, args=None): .. code-block:: python - define_py_data_sources2(train_list="train.list", - test_list="test.list", + define_py_data_sources2(train_list="train.list", + test_list="test.list", module="data_provider" # if train/test use different configurations, # obj=["process_train", "process_test"] - obj="process", + obj="process", args={"dictionary": dict_name}) The related data provider can refer to :ref:`api_pydataprovider2_sequential_model` . @@ -185,8 +185,8 @@ def define_py_data_sources2(train_list, test_list, module, obj, args=None): a tuple or list to this argument. :type obj: basestring or tuple or list :param args: The best practice is using dict() to pass arguments into - DataProvider, and use :code:`@init_hook_wrapper` to receive - arguments. If train and test is different, then pass a tuple + DataProvider, and use :code:`@init_hook_wrapper` to receive + arguments. If train and test is different, then pass a tuple or list to this argument. :type args: string or picklable object or list or tuple. :return: None @@ -195,13 +195,13 @@ def define_py_data_sources2(train_list, test_list, module, obj, args=None): def py_data2(files, load_data_module, load_data_object, load_data_args, **kwargs): - data = DataBase() + data = create_data_config_proto() data.type = 'py2' data.files = files data.load_data_module = load_data_module data.load_data_object = load_data_object data.load_data_args = load_data_args - data.async_load_data = True + data.async_load_data = False return data define_py_data_sources( diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 66fa58ac91e33bfeac37d1bfbdad8dab4789c4bd..0d3a31b9d39a28070bdadc8329caf8e383dbace8 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -37,6 +37,7 @@ __all__ = [ "dotmul_projection", "dotmul_operator", "repeat_layer", + "seq_reshape_layer", "table_projection", "mixed_layer", "data_layer", @@ -59,6 +60,7 @@ __all__ = [ 'img_cmrnorm_layer', 'addto_layer', 'concat_layer', + 'seq_concat_layer', 'lstm_step_layer', 'recurrent_group', 'memory', @@ -124,6 +126,7 @@ class LayerType(object): GRUMEMORY = "gated_recurrent" SEQUENCE_LAST_INSTANCE = "seqlastins" SEQUENCE_FIRST_INSTANCE = "seqfirstins" + SEQUENCE_RESHAPE = "seqreshape" POOLING_MAX = "max" POOLING_AVG = 'average' FC_LAYER = "fc" @@ -144,6 +147,7 @@ class LayerType(object): CONCAT_LAYER = 'concat' CONCAT_PROJ_LAYER = 'concat2' + SEQUENCE_CONCAT_LAYER = 'seqconcat' LSTM_STEP_LAYER = 'lstm_step' GRU_STEP_LAYER = 'gru_step' @@ -1448,6 +1452,61 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None): parents=[input]) +@wrap_name_default("seqreshape") +@wrap_act_default(act=IdentityActivation()) +@wrap_bias_attr_default(has_bias=False) +@layer_support() +def seq_reshape_layer(input, + reshape_size, + act=None, + name=None, + layer_attr=None, + bias_attr=None): + """ + A layer for reshaping the sequence. Assume the input sequence has T instances, + the dimension of each instance is M, and the input reshape_size is N, then the + output sequence has T*M/N instances, the dimension of each instance is N. + + Note that T*M/N must be an integer. + + The example usage is: + + .. code-block:: python + + reshape = seq_reshape_layer(input=layer, reshape_size=4) + + :param input: Input layer. + :type input: LayerOutput + :param reshape_size: the size of reshaped sequence. + :type reshape_size: int + :param name: Layer name. + :type name: basestring + :param act: Activation type. + :type act: BaseActivation + :param layer_attr: extra layer attributes. + :type layer_attr: ExtraLayerAttribute. + :param bias_attr: The Bias Attribute. If no bias, then pass False or + something not type of ParameterAttribute. None will get a + default Bias. + :type bias_attr: ParameterAttribute or None or bool + :return: LayerOutput object. + :rtype: LayerOutput + """ + + Layer( + inputs=[input.name], + name=name, + size=reshape_size, + type=LayerType.SEQUENCE_RESHAPE, + bias=ParamAttr.to_bias(bias_attr), + **ExtraAttr.to_kwargs(layer_attr)) + return LayerOutput( + name=name, + size=reshape_size, + layer_type=LayerType.SEQUENCE_RESHAPE, + parents=[input]) + + @wrap_name_default() @layer_support() def interpolation_layer(input, weight, name=None, layer_attr=None): @@ -2570,6 +2629,63 @@ def concat_layer(input, act=None, name=None, layer_attr=None, bias_attr=None): size=sz) +@wrap_name_default("seqconcat") +@wrap_act_default(act=IdentityActivation()) +@wrap_bias_attr_default(has_bias=False) +@layer_support() +def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, + bias_attr=None): + """ + Concat sequence a with sequence b. + + Inputs: + - a = [a1, a2, ..., an] + - b = [b1, b2, ..., bn] + - Note that the length of a and b should be the same. + + Output: [a1, b1, a2, b2, ..., an, bn] + + The example usage is: + + .. code-block:: python + + concat = seq_concat_layer(a=layer1, b=layer2) + + :param name: Layer name. + :type name: basestring + :param a: input sequence layer + :type a: LayerOutput + :param b: input sequence layer + :type b: LayerOutput + :param act: Activation type. + :type act: BaseActivation + :param layer_attr: Extra Layer Attribute. + :type layer_attr: ExtraLayerAttribute + :param bias_attr: The Bias Attribute. If no bias, then pass False or + something not type of ParameterAttribute. None will get a + default Bias. + :type bias_attr: ParameterAttribute or None or bool + :return: LayerOutput object. + :rtype: LayerOutput + """ + assert isinstance(a, LayerOutput) and isinstance(b, LayerOutput) + assert a.size == b.size + Layer( + name=name, + type=LayerType.SEQUENCE_CONCAT_LAYER, + inputs=[a.name, b.name], + active_type=act.name, + bias=ParamAttr.to_bias(bias_attr), + **ExtraLayerAttribute.to_kwargs(layer_attr)) + + return LayerOutput( + name, + layer_type=LayerType.SEQUENCE_CONCAT_LAYER, + parents=[a, b], + activation=act, + size=a.size) + + def memory(name, size, is_seq=False, @@ -3677,26 +3793,27 @@ def pad_layer(input, For example, - .. code-block:: - - input(2,2,2,3) = [ - [ [[1,2,3], [3,4,5]], - [[2,3,5], [1,6,7]] ], - [ [[4,3,1], [1,8,7]], - [[3,8,9], [2,3,5]] ] - ] - - pad_c=[1,1], pad_h=[0,0], pad_w=[0,0] - output(2,4,2,3) = [ - [ [[0,0,0], [0,0,0]], - [[1,2,3], [3,4,5]], - [[2,3,5], [1,6,7]], - [[0,0,0], [0,0,0]] ], - [ [[0,0,0], [0,0,0]], - [[4,3,1], [1,8,7]], - [[3,8,9], [2,3,5]], - [[0,0,0], [0,0,0]] ] - ] + .. code-block:: python + + input(2,2,2,3) = [ + [ [[1,2,3], [3,4,5]], + [[2,3,5], [1,6,7]] ], + [ [[4,3,1], [1,8,7]], + [[3,8,9], [2,3,5]] ] + ] + + pad_c=[1,1], pad_h=[0,0], pad_w=[0,0] + + output(2,4,2,3) = [ + [ [[0,0,0], [0,0,0]], + [[1,2,3], [3,4,5]], + [[2,3,5], [1,6,7]], + [[0,0,0], [0,0,0]] ], + [ [[0,0,0], [0,0,0]], + [[4,3,1], [1,8,7]], + [[3,8,9], [2,3,5]], + [[0,0,0], [0,0,0]] ] + ] The simply usage is: @@ -4191,13 +4308,7 @@ def block_expand_layer(input, @wrap_name_default() @layer_support() -def maxout_layer(input, - groups, - num_channels=None, - size_x=None, - size_y=None, - name=None, - layer_attr=None): +def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): """ A layer to do max out on conv layer output. - Input: output of a conv layer. @@ -4227,12 +4338,6 @@ def maxout_layer(input, :type num_channels: int|None :param groups: The group number of input layer. :type groups: int - :param size_x: conv output width. If None will be set - automatically from previous output. - :type size_x: int|None - :param size_y: conv output height. If None will be set - automatically from previous output. - :type size_y: int|None :param name: The name of this layer, which can not specify. :type name: None|basestring. :param layer_attr: Extra Layer attribute. diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index ea46b557a26ce638742facda3eb6aa2feb4b2563..c9178e3c6a46a2d663ec368569e529e780b76a6f 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -4,6 +4,7 @@ test_sequence_pooling test_lstmemory_layer test_grumemory_layer last_first_seq test_expand_layer test_ntm_layers test_hsigmoid img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight -test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops) +test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops +test_seq_concat_reshape) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr new file mode 100644 index 0000000000000000000000000000000000000000..91284b4fb32fcfdbf6b9e7384ffe080574b78821 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr @@ -0,0 +1,51 @@ +type: "nn" +layers { + name: "data1" + type: "data" + size: 30 + active_type: "" +} +layers { + name: "data2" + type: "data" + size: 30 + active_type: "" +} +layers { + name: "__seqconcat_0__" + type: "seqconcat" + size: 30 + active_type: "" + inputs { + input_layer_name: "data1" + } + inputs { + input_layer_name: "data2" + } +} +layers { + name: "__seqreshape_0__" + type: "seqreshape" + size: 5 + active_type: "linear" + inputs { + input_layer_name: "data1" + } +} +input_layer_names: "data1" +input_layer_names: "data2" +output_layer_names: "__seqconcat_0__" +output_layer_names: "__seqreshape_0__" +sub_models { + name: "root" + layer_names: "data1" + layer_names: "data2" + layer_names: "__seqconcat_0__" + layer_names: "__seqreshape_0__" + input_layer_names: "data1" + input_layer_names: "data2" + output_layer_names: "__seqconcat_0__" + output_layer_names: "__seqreshape_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr index 1cfb92255aa92fa3fbc16a816851a5c2f81c2b56..569b0b945a762e8b596e197adc06df64e33311af 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_split_datasource.protostr @@ -19,7 +19,7 @@ model_config { data_config { type: "py2" files: "train.list" - async_load_data: true + async_load_data: false for_test: false load_data_module: "a" load_data_object: "c" @@ -58,7 +58,7 @@ opt_config { test_data_config { type: "py2" files: "test.list" - async_load_data: true + async_load_data: false for_test: true load_data_module: "b" load_data_object: "d" diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py new file mode 100644 index 0000000000000000000000000000000000000000..5c161ba805fb301e8feb8702ad61a8341df40e3f --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py @@ -0,0 +1,12 @@ +from paddle.trainer_config_helpers import * + +settings(batch_size=1000, learning_rate=1e-5) + +din1 = data_layer(name='data1', size=30) +din2 = data_layer(name='data2', size=30) + +opts = [] +opts.append(seq_concat_layer(a=din1, b=din2)) +opts.append(seq_reshape_layer(input=din1, reshape_size=5)) + +outputs(opts) diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index b2ea87b086101d71e89c33ce7c1f4eb21afade5a..30d0b2a398bd0e39895daf9b1421ec736ab8da83 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -11,7 +11,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import optimizer +import layer +import activation +import parameters +import trainer +import event +import data_type +import py_paddle.swig_paddle as api + +__all__ = [ + 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', + 'event', 'data_type' +] + + +def init(**kwargs): + args = [] + for key in kwargs.keys(): + args.append('--%s=%s' % (key, str(kwargs[key]))) -__all__ = ['optimizer'] + api.initPaddle(*args) diff --git a/python/paddle/v2/activation.py b/python/paddle/v2/activation.py new file mode 100644 index 0000000000000000000000000000000000000000..1f3aab9ef3c5f69e22d7e83250d0ff46c1ff718a --- /dev/null +++ b/python/paddle/v2/activation.py @@ -0,0 +1,37 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers.activations import * + +__all__ = [ + "Base", "Tanh", "Sigmoid", "Softmax", "Identity", "Linear", + 'SequenceSoftmax', "Exp", "Relu", "BRelu", "SoftRelu", "STanh", "Abs", + "Square", "Log" +] + +Base = BaseActivation +Tanh = TanhActivation +Sigmoid = SigmoidActivation +Softmax = SoftmaxActivation +SequenceSoftmax = SequenceSoftmaxActivation +Identity = IdentityActivation +Linear = Identity +Relu = ReluActivation +BRelu = BReluActivation +SoftRelu = SoftReluActivation +STanh = STanhActivation +Abs = AbsActivation +Square = SquareActivation +Exp = ExpActivation +Log = LogActivation diff --git a/python/paddle/v2/data_type.py b/python/paddle/v2/data_type.py new file mode 100644 index 0000000000000000000000000000000000000000..5b01ba4cd4866cf7b355fc0a6a667409cf9c4419 --- /dev/null +++ b/python/paddle/v2/data_type.py @@ -0,0 +1,22 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer.PyDataProvider2 import \ + InputType, dense_vector, sparse_binary_vector,\ + sparse_vector, integer_value + +__all__ = [ + 'InputType', 'dense_vector', 'sparse_binary_vector', 'sparse_vector', + 'integer_value' +] diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py new file mode 100644 index 0000000000000000000000000000000000000000..a16cfa91f062a60a141ea8fa962b3ecf6f5f0a22 --- /dev/null +++ b/python/paddle/v2/event.py @@ -0,0 +1,26 @@ +""" +All training events. + +There are: + +* BeginTraining +* EndTraining +* BeginIteration +* EndIteration +* BeginPass +* EndPass + +TODO(yuyang18): Complete it! +""" +__all__ = ['EndIteration'] + + +class EndIteration(object): + """ + Event On One Batch Training Complete. + """ + + def __init__(self, pass_id, batch_id, cost): + self.pass_id = pass_id + self.batch_id = batch_id + self.cost = cost diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py new file mode 100644 index 0000000000000000000000000000000000000000..507725ee4ff71200656869a2be1d0f7dd67b6387 --- /dev/null +++ b/python/paddle/v2/layer.py @@ -0,0 +1,215 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Before this new package paddle.v2.layer, users would need to use functions +in paddle.trainer_config_helpers.layers to configure networks. + +The Old Way: +========= +This old way requires that the creation of a network be defined in a Python +function, say network_config, and that this Python function being passed to +paddle.trainer_config_helpers.parse_network_config for the creation of +protobuf message description of this network. + +```python +def network_config(): + img = paddle.trainer_config_helpers.data_layer(name="pixel", size=784) + inference = paddle.trainer_config_helpers.fc_layer( + input=img, + size=10, + act=paddle.trainer_config_helpers.SoftmaxActivation()) + cost = paddle.trainer_config_helpers.classification_cost( + input=inference, + label=paddle.trainer_config_helpers.data_layer(name="label", size=10)) + +proto_desc = parse_network_config(network_config) +``` + +When parse_network_config executes network_config, those layer definition +functions like data_layer and fc_layer would change some Python global variables, +so that after the execution, parse_network_config could collect information from +these global variables and generates the protobuf message. + + + +The New Way: +========= +In this PR, we define a function in paddle.v2.layer which creates a Python +class for each layer creation function in paddle.trainer_config_helpers.layers. +Users can use create a network as follows: + +```python +img = paddle.v2.layer.data(name="pixel", size=784) +inference = paddle.v2.layer.fc(input=img, size=10, act=paddle.v2.layer.Softmax()) +cost = paddle.v2.layer.classification( + input=inference, + label=paddle.v2.layer.data(name="label", size=10)) + +parameters = paddle.v2.parameters.create(cost) +``` + +This new way doesn't require those invocations to layer definition functions +to be in a Python function but could be anywhere. + +Also, the creation of a protobuf message is hidden in the invocation of +paddle.v2.parameters.create, no longer exposed to users. +""" + +import collections + +import paddle.trainer_config_helpers as conf_helps +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as __parse__ +from paddle.trainer_config_helpers.default_decorators import wrap_name_default + +import data_type + +__all__ = [ + 'parse_network', 'data', 'fc', 'max_id', 'classification_cost', + 'cross_entropy_cost' +] + + +def parse_network(*outputs): + """ + parse all output layers and then generate a model config proto. + :param outputs: + :return: + """ + + def __real_func__(): + context = dict() + real_output = [each.to_proto(context=context) for each in outputs] + conf_helps.outputs(real_output) + + return __parse__(__real_func__) + + +class Layer(object): + def __init__(self, name, parent_layers): + assert isinstance(parent_layers, dict) + assert isinstance(name, basestring) + self.name = name + self.__parent_layers__ = parent_layers + + def to_proto(self, context): + """ + function to set proto attribute + """ + kwargs = dict() + for layer_name in self.__parent_layers__: + if not isinstance(self.__parent_layers__[layer_name], + collections.Sequence): + v1_layer = self.__parent_layers__[layer_name].to_proto( + context=context) + else: + v1_layer = map(lambda x: x.to_proto(context=context), + self.__parent_layers__[layer_name]) + kwargs[layer_name] = v1_layer + + if self.name not in context: + context[self.name] = self.to_proto_impl(**kwargs) + return context[self.name] + + def to_proto_impl(self, **kwargs): + raise NotImplementedError() + + +def __convert_to_v2__(method_name, name_prefix, parent_names): + if name_prefix is not None: + wrapper = wrap_name_default(name_prefix=name_prefix) + else: + wrapper = None + + class V2LayerImpl(Layer): + def __init__(self, name=None, **kwargs): + parent_layers = dict() + other_kwargs = dict() + for pname in parent_names: + parent_layers[pname] = kwargs[pname] + + for key in kwargs.keys(): + if key not in parent_names: + other_kwargs[key] = kwargs[key] + + super(V2LayerImpl, self).__init__(name, parent_layers) + self.__other_kwargs__ = other_kwargs + + if wrapper is not None: + __init__ = wrapper(__init__) + + def to_proto_impl(self, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__other_kwargs__: + args[each] = self.__other_kwargs__[each] + return getattr(conf_helps, method_name)(name=self.name, **args) + + return V2LayerImpl + + +""" +Some layer may need some special config, and can not use __convert_to_v2__ to convert. +So we also need to implement some special LayerV2. +""" + + +class DataLayerV2(Layer): + def __init__(self, name, type, **kwargs): + assert isinstance(type, data_type.InputType) + + self.type = type + self.__method_name__ = 'data_layer' + self.__kwargs__ = kwargs + + super(DataLayerV2, self).__init__(name=name, parent_layers=dict()) + + def to_proto_impl(self, **kwargs): + args = dict() + args['size'] = self.type.dim + for each in kwargs: + args[each] = kwargs[each] + for each in self.__kwargs__: + args[each] = self.__kwargs__[each] + return getattr(conf_helps, self.__method_name__)(name=self.name, **args) + + +data = DataLayerV2 +fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input']) +max_id = __convert_to_v2__( + 'maxid_layer', name_prefix='maxid_layer', parent_names=['input']) +classification_cost = __convert_to_v2__( + 'classification_cost', + name_prefix='classification_cost', + parent_names=['input', 'label']) +cross_entropy_cost = __convert_to_v2__( + 'cross_entropy', + name_prefix='cross_entropy', + parent_names=['input', 'label']) + +if __name__ == '__main__': + pixel = data(name='pixel', type=data_type.dense_vector(784)) + label = data(name='label', type=data_type.integer_value(10)) + hidden = fc(input=pixel, size=100, act=conf_helps.SigmoidActivation()) + inference = fc(input=hidden, size=10, act=conf_helps.SoftmaxActivation()) + maxid = max_id(input=inference) + cost1 = classification_cost(input=inference, label=label) + cost2 = cross_entropy_cost(input=inference, label=label) + + print parse_network(cost1) + print parse_network(cost2) + print parse_network(cost1, cost2) + print parse_network(cost2) + print parse_network(inference, maxid) diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py new file mode 100644 index 0000000000000000000000000000000000000000..ea504d5104716d157add87ed3f6e31ea69e0a3f0 --- /dev/null +++ b/python/paddle/v2/parameters.py @@ -0,0 +1,265 @@ +import numpy as np +from . import layer as v2_layer +import py_paddle.swig_paddle as api +from paddle.proto.ParameterConfig_pb2 import ParameterConfig + +__all__ = ['Parameters', 'create'] + + +def create(*layers): + """ + Create parameter pool by layers. In paddle, layer can be represent a + model config. + + :param layers: + :return: + """ + for layer in layers: + if not isinstance(layer, v2_layer.Layer): + raise ValueError( + 'create must pass a topologies which type is paddle.layer.Layer') + model_config = v2_layer.parse_network(*layers) + pool = Parameters() + for param in model_config.parameters: + pool.__append_config__(param) + return pool + + +class Parameters(object): + """ + Parameters is a dictionary contains Paddle's parameter. The key of + Parameters is the name of parameter. The value of Parameters is a plain + :code:`numpy.ndarry` . + + Basically usage is + + .. code-block:: python + + data = paddle.layers.data(...) + ... + out = paddle.layers.fc(...) + + parameters = paddle.parameters.create(out) + + parameter_names = parameters.names() + fc_mat = parameters.get('fc') + print fc_mat + """ + + def __init__(self): + self.__param_conf__ = dict() + self.__gradient_machines__ = [] + self.__tmp_params__ = [] + + def __append_config__(self, param_conf): + """ + Append a parameter configuration. It used to initialize Parameters and + should be invoked only in paddle.parameters.create + + :param param_conf: The parameter configuration in protobuf + :type param_conf: ParameterConfig + :return: Nothing + """ + + if not isinstance(param_conf, ParameterConfig): + raise ValueError("param_conf must be paddle.proto.ParameterConfig") + + if param_conf.name in self.__param_conf__: + raise ValueError("duplicated parameter %s" % param_conf.name) + + self.__param_conf__[param_conf.name] = param_conf + + def keys(self): + """ + keys are the names of each parameter. + :return: list of parameter name + :rtype: list + """ + return self.__param_conf__.keys() + + def names(self): + """ + names of each parameter. + :return: list of parameter name + :rtype: list + """ + return self.keys() + + def has_key(self, key): + """ + has_key return true if there are such parameter name == key + :param key: Parameter name + :type key: basestring + :return: True if contains such key + """ + return key in self.__param_conf__.keys() + + def __iter__(self): + """ + Return an iterator of parameter name. It is used by `for loop` + or `in` operator. + + .. code-block:: python + + parameters = paddle.parameters.create(...) + if "fc_param" in parameters: + print 'OK' + :return: an iterator of parameter name + :rtype: iterator + """ + return iter(self.__param_conf__) + + def __getitem__(self, key): + """ + Get parameter by parameter name. It uses Python dict syntax. + + :note: It will always copy the parameter from C++ side. + :param key: Parameter name + :type key: basestring + :return: parameter value + :rtype: np.ndarray + """ + shape = self.get_shape(key) + + if len(self.__gradient_machines__) == 0: + # create new parameter in python numpy. + return np.ndarray(shape=shape, dtype=np.float32) + else: + for each_gradient_machine in self.__gradient_machines__: + param = __get_parameter_in_gradient_machine__( + each_gradient_machine, key) + # for simplify implementation now, we always copy from C++ + assert isinstance(param, api.Parameter) + val = param.getBuf(api.PARAMETER_VALUE) + assert isinstance(val, api.Vector) + val = val.copyToNumpyArray() + return val + # else continue + + raise RuntimeError("Unexpected branch") + + def get_shape(self, key): + """ + get shape of the parameter. + :param key: parameter name + :type key: basestring + :return: parameter's shape + :rtype: tuple + """ + if not isinstance(key, basestring): + raise ValueError("parameter name should be string") + if not self.has_key(key): + raise ValueError("No such parameter %s" % key) + conf = self.__param_conf__[key] + return tuple(map(int, conf.dims)) + + def __setitem__(self, key, value): + """ + Set parameter by parameter name & value. It use Python dict syntax. + + :note: It will always copy the parameter to C++ side. + :param key: Parameter name + :type key: basestring + :param value: Parameter matrix. + :type value: np.ndarray + :return: Nothing + """ + + if not isinstance(value, np.ndarray): + raise ValueError("Must return ndarray") + value = value.astype(dtype=np.float32) + shape = self.get_shape(key) + if value.shape != shape: + raise ValueError("Value shape mismatch, expect %s, should %s" % + (shape, value.shape)) + + if len(self.__gradient_machines__) == 0: + self.__tmp_params__.append((key, value)) + else: + for each_gradient_machine in self.__gradient_machines__: + __copy_parameter_to_gradient_machine__(each_gradient_machine, + key, value) + + def get(self, parameter_name): + """ + Get parameter by parameter name. + + :note: It will always copy the parameter from C++ side. + :param parameter_name: parameter name + :type parameter_name: basestring + :return: The parameter matrix. + :rtype: np.ndarray + """ + return self.__getitem__(key=parameter_name) + + def set(self, parameter_name, value): + """ + Set parameter by parameter name & matrix. + :param parameter_name: parameter name + :type parameter_name: basestring + :param value: parameter matrix + :type value: np.ndarray + :return: Nothing. + """ + self.__setitem__(key=parameter_name, value=value) + + def append_gradient_machine(self, gradient_machine): + """ + append gradient machine to parameters. This method is used internally in + Trainer.train. + + :param gradient_machine: Paddle C++ GradientMachine object. + :type gradient_machine: api.GradientMachine + :return: + """ + + if not isinstance(gradient_machine, api.GradientMachine): + raise ValueError("gradient_machine should be api.GradientMachine") + + if len(self.__tmp_params__) != 0: + for name, val in self.__tmp_params__: + try: + __copy_parameter_to_gradient_machine__(gradient_machine, + name, val) + except ValueError: + # If no such parameter in gradient machine, then don't copy + pass + self.__gradient_machines__.append(gradient_machine) + + +def __get_parameter_in_gradient_machine__(gradient_machine, name): + """ + + :param gradient_machine: + :type gradient_machine: api.GradientMachine + :param name: + :return: + :rtype: api.Parameter + """ + params = filter(lambda p: p.getName() == name, + gradient_machine.getParameters()) + + if len(params) == 0: + raise ValueError("No such parameter") + elif len(params) > 1: + raise ValueError("Unexpected branch") + else: + return params[0] + + +def __copy_parameter_to_gradient_machine__(gradient_machine, name, arr): + """ + Copy a python ndarray into the gradient machine. + + :param gradient_machine: + :type gradient_machine: api.GradientMachine + :param name: + :param arr: + :type arr: np.ndarray + :return: + :rtype: api.Parameter + """ + param = __get_parameter_in_gradient_machine__(gradient_machine, name) + vec = param.getBuf(api.PARAMETER_VALUE) + assert isinstance(vec, api.Vector) + vec.copyFromNumpyArray(arr.flatten()) diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..4365bd41e7073bce4112e5813dbf1517856c06f5 --- /dev/null +++ b/python/paddle/v2/trainer.py @@ -0,0 +1,188 @@ +import collections + +import py_paddle.swig_paddle as api +from paddle.proto.ModelConfig_pb2 import ModelConfig +from py_paddle import DataProviderConverter + +from . import event as v2_event +from . import layer as v2_layer +from . import optimizer as v2_optimizer +from . import parameters as v2_parameters + +__all__ = ['ITrainer', 'SGD'] + + +def default_event_handler(event): + """ + Default event handler. It will print some log and save mode. + + TODO(yuyang18): Complete it! + :param event: + :return: + """ + pass + + +class ITrainer(object): + """ + The interface of Trainer. The only exposed method is `train`. + """ + + def train(self, + train_data_reader, + topology, + parameters, + test_data_reader=None, + event_handler=None): + """ + train method. + + :param train_data_reader: + :param topology: + :param parameters: + :param test_data_reader: + :param event_handler: + :return: + """ + + raise NotImplementedError() + + +class SGD(ITrainer): + def __init__(self, update_equation): + """ + Simple SGD Trainer. + + :param update_equation: The optimizer object. + :type update_equation: v2_optimizer.Optimizer + """ + if not isinstance(update_equation, v2_optimizer.Optimizer): + raise ValueError("update equation parameter must be " + "paddle.v2.optimizer.Optimizer") + self.__optimizer__ = update_equation + + def train(self, + train_data_reader, + topology, + parameters, + num_passes=1, + test_data_reader=None, + event_handler=None, + batch_size=32, + data_types=None): + """ + Training method. Will train num_passes of input data. + + :param train_data_reader: + :param topology: Network Topology, use one or more Layers to represent it. + :param parameters: The parameter pools. + :param num_passes: The total train passes. + :param test_data_reader: + :param event_handler: Event handler. A method will be invoked when event + occurred. + :type event_handler: (BaseEvent) => None + :param batch_size: Not important, will be removed after data refactor. + :param data_types: Not important, will be removed after data refactor. + :return: + """ + if event_handler is None: + event_handler = default_event_handler + + topology = v2_layer.parse_network(topology) + + __check_train_args__(**locals()) + + gm = api.GradientMachine.createFromConfigProto( + topology, api.CREATE_MODE_NORMAL, self.__optimizer__.enable_types()) + assert isinstance(gm, api.GradientMachine) + parameters.append_gradient_machine(gm) + + updater = self.__optimizer__.create_local_updater() + updater.init(gm) + + gm.start() + out_args = api.Arguments.createArguments(0) + + data_types_lists = [] + for each in topology.input_layer_names: + if each not in data_types: + raise ValueError() + data_types_lists.append(data_types[each]) + + converter = DataProviderConverter(input_types=data_types_lists) + + for pass_id in xrange(num_passes): + updater.startPass() + for batch_id, data_batch in enumerate( + __data_reader_to_batch__(train_data_reader, batch_size, + topology)): + pass_type = updater.startBatch(len(data_batch)) + gm.forwardBackward(converter(data_batch), out_args, pass_type) + for each_param in gm.getParameters(): + updater.update(each_param) + # Get cost. We use numpy to calculate total cost for this batch. + cost_vec = out_args.getSlotValue(0) + cost_vec = cost_vec.copyToNumpyMat() + cost = cost_vec.sum() / len(data_batch) + updater.finishBatch(cost) + event_handler( + v2_event.EndIteration( + pass_id=pass_id, batch_id=batch_id, cost=cost)) + + updater.finishPass() + gm.finish() + + +def __data_reader_to_batch__(reader, batch_size, topology): + """ + This function is not important, and will be removed when data refactored. + """ + + def input_reorder(func): + for item in func(): + retv = [] + for __layer_name__ in topology.input_layer_names: + retv.append(item[__layer_name__]) + yield retv + + return __generator_to_batch__(input_reorder(reader), batch_size=batch_size) + + +def __generator_to_batch__(generator, batch_size): + """ + This function is not important, and will be removed when data refactored. + """ + ret_val = list() + for each_item in generator: + ret_val.append(each_item) + if len(ret_val) == batch_size: + yield ret_val + ret_val = list() + if len(ret_val) != 0: + yield ret_val + + +def __check_train_args__(train_data_reader, topology, parameters, + test_data_reader, event_handler, **kwargs): + """ + Check train function's argument types + """ + if not callable(train_data_reader) or not isinstance(train_data_reader(), + collections.Iterator): + raise ValueError('train_data_reader should be a function, ' + 'which can return a iterator') + + if test_data_reader is not None: + if not callable(test_data_reader) or not isinstance( + test_data_reader(), collections.Iterator): + raise ValueError('test_data_reader should be a function, which can ' + 'return a iterator') + + if not isinstance(topology, ModelConfig): + raise ValueError('topology should be a model config') + + if not isinstance(parameters, v2_parameters.Parameters): + raise ValueError('parameters should be a parameter pool') + + if not callable(event_handler): + raise ValueError('event handler should be a function')