diff --git a/doc/api/trainer_config_helpers/layers.rst b/doc/api/trainer_config_helpers/layers.rst index 2793d6afd9565eb461c8657b838b146fe1992b20..bbea823de4d870f8a4384b6a85ebb7e8182797fe 100644 --- a/doc/api/trainer_config_helpers/layers.rst +++ b/doc/api/trainer_config_helpers/layers.rst @@ -139,24 +139,12 @@ lstmemory :members: lstmemory :noindex: -lstm_step_layer ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: lstm_step_layer - :noindex: - grumemory --------- .. automodule:: paddle.trainer_config_helpers.layers :members: grumemory :noindex: -gru_step_layer ---------------- -.. automodule:: paddle.trainer_config_helpers.layers - :members: gru_step_layer - :noindex: - Recurrent Layer Group ===================== @@ -172,6 +160,18 @@ recurrent_group :members: recurrent_group :noindex: +lstm_step_layer +--------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: lstm_step_layer + :noindex: + +gru_step_layer +--------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: gru_step_layer + :noindex: + beam_search ------------ .. automodule:: paddle.trainer_config_helpers.layers @@ -308,6 +308,12 @@ repeat_layer :members: repeat_layer :noindex: +rotate_layer +------------ +.. automodule:: paddle.trainer_config_helpers.layers + :members: rotate_layer + :noindex: + seq_reshape_layer ----------------- .. automodule:: paddle.trainer_config_helpers.layers @@ -462,6 +468,12 @@ ctc_layer :members: ctc_layer :noindex: +warp_ctc_layer +-------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: warp_ctc_layer + :noindex: + nce_layer ----------- .. automodule:: paddle.trainer_config_helpers.layers diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 95f0915972cbacdf4ba860f893d1c47f175b8e8a..1bb1a01d509e6412c254fce856101137e66b1e12 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -112,6 +112,7 @@ __all__ = [ 'priorbox_layer', 'spp_layer', 'pad_layer', + 'eos_layer', 'layer_support', ] @@ -1289,6 +1290,12 @@ def last_seq(input, """ Get Last Timestamp Activation of a sequence. + The simple usage is: + + .. code-block:: python + + seq = last_seq(input=layer) + :param agg_level: Aggregated level :param name: Layer name. :type name: basestring @@ -1327,6 +1334,12 @@ def first_seq(input, """ Get First Timestamp Activation of a sequence. + The simple usage is: + + .. code-block:: python + + seq = first_seq(input=layer) + :param agg_level: aggregation level :param name: Layer name. :type name: basestring @@ -1427,7 +1440,7 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None): .. code-block:: python - expand = repeat_layer(layer, 4) + expand = repeat_layer(input=layer, num_repeats=4) :param input: Input layer :type input: LayerOutput @@ -1799,6 +1812,12 @@ def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None): Note that the above computation is for one sample. Multiple samples are processed in one batch. + The example usage is: + + .. code-block:: python + + cos = cos_sim(a=layer1, b=layer2, size=3) + :param name: layer name :type name: basestring :param a: input layer a @@ -1960,6 +1979,16 @@ def img_conv_layer(input, pieces. First 256/4 = 64 channels will process by first 32 filters. The rest channels will be processed by rest group of filters. + The example usage is: + + .. code-block:: python + + conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1, + num_channels=8, + num_filters=16, stride=1, + bias_attr=False, + act=ReluActivation()) + :param name: Layer name. :type name: basestring :param input: Layer Input. @@ -2099,6 +2128,34 @@ def img_pool_layer(input, .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/ + - ceil_mode=True: + + .. math:: + + w = 1 + int(ceil(input\_width + 2 * padding - pool\_size) / float(stride)) + h = 1 + int(ceil(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y)) + + - ceil_mode=False: + + .. math:: + + w = 1 + int(floor(input\_width + 2 * padding - pool\_size) / float(stride)) + h = 1 + int(floor(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y)) + + The example usage is: + + .. code-block:: python + + maxpool = img_pool_layer(input=conv, + pool_size=3, + pool_size_y=5, + num_channels=8, + stride=1, + stride_y=2, + padding=1, + padding_y=2, + pool_type=MaxPooling()) + :param padding: pooling padding width. :type padding: int :param padding_y: pooling padding height. It's equal to padding by default. @@ -2125,19 +2182,6 @@ def img_pool_layer(input, :param ceil_mode: Wether to use ceil mode to calculate output height and with. Defalut is True. If set false, Otherwise use floor. - - ceil_mode=True: - - .. math:: - - w = 1 + int(ceil(input_width + 2 * padding - pool_size) / float(stride)) - h = 1 + int(ceil(input_height + 2 * padding_y - pool_size_y) / float(stride_y)) - - - ceil_mode=False: - - .. math:: - - w = 1 + int(floor(input_width + 2 * padding - pool_size) / float(stride)) - h = 1 + int(floor(input_height + 2 * padding_y - pool_size_y) / float(stride_y)) :type ceil_mode: bool :return: LayerOutput object. :rtype: LayerOutput @@ -2199,6 +2243,15 @@ def spp_layer(input, The details please refer to `Kaiming He's paper `_. + The example usage is: + + .. code-block:: python + + spp = spp_layer(input=data, + pyramid_height=2, + num_channels=16, + pool_type=MaxPooling()) + :param name: layer name. :type name: basestring :param input: layer's input. @@ -2287,6 +2340,12 @@ def img_cmrnorm_layer(input, The details please refer to `Alex's paper `_. + The example usage is: + + .. code-block:: python + + norm = img_cmrnorm_layer(input=net, size=5) + :param name: layer name. :type name: None|basestring :param input: layer's input. @@ -2342,6 +2401,12 @@ def batch_norm_layer(input, The details of batch normalization please refer to this `paper `_. + The example usage is: + + .. code-block:: python + + norm = batch_norm_layer(input=net, act=ReluActivation()) + :param name: layer name. :type name: basestring :param input: batch normalization input. Better be linear activation. @@ -3905,13 +3970,13 @@ def conv_shift_layer(a, b, name=None, layer_attr=None): .. code-block:: python - conv_shift = conv_shift_layer(input=[layer1, layer2]) + conv_shift = conv_shift_layer(a=layer1, b=layer2) :param name: layer name :type name: basestring :param a: Input layer a. :type a: LayerOutput - :param b: input layer b + :param b: input layer b. :type b: LayerOutput :param layer_attr: layer's extra attribute. :type layer_attr: ExtraLayerAttribute @@ -4003,8 +4068,8 @@ def tensor_layer(a, @wrap_act_default() @layer_support() def selective_fc_layer(input, - select, size, + select=None, act=None, name=None, pass_generation=False, @@ -4031,6 +4096,7 @@ def selective_fc_layer(input, :type input: LayerOutput|list|tuple :param select: The select layer. The output of select layer should be a sparse binary matrix, and treat as the mask of selective fc. + If is None, acts exactly like fc_layer. :type select: LayerOutput :param size: The layer dimension. :type size: int @@ -4259,7 +4325,7 @@ def block_expand_layer(input, .. code-block:: python - block_expand = block_expand_layer(input, + block_expand = block_expand_layer(input=layer, num_channels=128, stride_x=1, stride_y=1, @@ -4463,7 +4529,7 @@ def warp_ctc_layer(input, - You can set 'blank' to any value ranged in [0, num_classes], which should be consistent as that used in your labels. - As a native 'softmax' activation is interated to the warp-ctc library, - 'linear' activation is expected instead in the 'input' layer. + 'linear' activation is expected instead in the 'input' layer. The simple usage: @@ -4596,6 +4662,13 @@ def crf_decoding_layer(input, this layer will also calculate error. output.value[i] is 1 for incorrect decoding or 0 for correct decoding. + The simple usage: + + .. code-block:: python + + crf_decoding = crf_decoding_layer(input=input, + size=label_dim) + :param input: The first input layer. :type input: LayerOutput :param size: size of this layer. diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 49d1983a2a422b7e105c66dd92419426f0853212..1dff754edf1faafcabd3bcea733970235964d344 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -20,11 +20,12 @@ import event import data_type import data_feeder import attr +import pooling import py_paddle.swig_paddle as api __all__ = [ 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', - 'event', 'data_type', 'attr', 'data_feeder' + 'event', 'data_type', 'attr', 'pooling', 'data_feeder' ] diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index d5fe06542bc19effa130171e78c158f99ffa45a7..d15e6398f51f43c1eeab67bba654f91cc56135a4 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -82,10 +82,17 @@ import activation import attr __all__ = [ - 'parse_network', 'data', 'fc', 'max_id', 'classification_cost', - 'cross_entropy_cost', 'cross_entropy_with_selfnorm_cost', 'regression_cost', + 'parse_network', 'data', 'fc', 'conv_shift', 'img_conv', 'img_pool', 'spp', + 'maxout', 'img_cmrnorm', 'batch_norm', 'sum_to_one_norm', 'recurrent', + 'lstmemory', 'grumemory', 'pool', 'last_seq', 'first_seq', 'concat', + 'seq_concat', 'block_expand', 'expand', 'repeat', 'seq_reshape', 'addto', + 'linear_comb', 'interpolation', 'bilinear_interp', 'power', 'scaling', + 'slope_intercept', 'tensor', 'cos_sim', 'trans', 'max_id', 'sampling_id', + 'pad', 'classification_cost', 'cross_entropy_cost', + 'cross_entropy_with_selfnorm_cost', 'regression_cost', 'multi_binary_label_cross_entropy_cost', 'rank_cost', 'lambda_cost', - 'sum_cost', 'huber_cost' + 'sum_cost', 'huber_cost', 'crf', 'crf_decoding', 'ctc', 'warp_ctc', 'nce', + 'hsigmoid', 'eos' ] __projection_names__ = filter(lambda x: x.endswith('_projection'), @@ -143,9 +150,9 @@ class Layer(object): raise NotImplementedError() -def __convert_to_v2__(method_name, name_prefix=None, parent_names=None): - if name_prefix is not None: - wrapper = wrap_name_default(name_prefix=name_prefix) +def __convert_to_v2__(method_name, parent_names, is_default_name=True): + if is_default_name: + wrapper = wrap_name_default(name_prefix=method_name) else: wrapper = None @@ -278,43 +285,91 @@ def mixed(size=0, data = DataLayerV2 -fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input']) -max_id = __convert_to_v2__( - 'maxid_layer', name_prefix='maxid', parent_names=['input']) -classification_cost = __convert_to_v2__( - 'classification_cost', - name_prefix='classification_cost', - parent_names=['input', 'label', 'weight']) -regression_cost = __convert_to_v2__( - 'regression_cost', - name_prefix='regression_cost', - parent_names=['input', 'label', 'weight']) -cross_entropy_cost = __convert_to_v2__( - 'cross_entropy', - name_prefix='cross_entropy', - parent_names=['input', 'label']) -cross_entropy_with_selfnorm_cost = __convert_to_v2__( - 'cross_entropy_with_selfnorm', - name_prefix='cross_entropy_with_selfnorm', - parent_names=['input', 'label']) -multi_binary_label_cross_entropy_cost = __convert_to_v2__( - 'multi_binary_label_cross_entropy', - name_prefix='multi_binary_label_cross_entropy', - parent_names=['input', 'label']) -rank_cost = __convert_to_v2__( - 'rank_cost', - name_prefix='rank_cost', - parent_names=['left', 'right', 'label', 'weight']) -lambda_cost = __convert_to_v2__( - 'lambda_cost', name_prefix='lambda_cost', parent_names=['input', 'score']) -sum_cost = __convert_to_v2__( - 'sum_cost', name_prefix='sum_cost', parent_names=['input']) -huber_cost = __convert_to_v2__( - 'huber_cost', name_prefix='huber_cost', parent_names=['input', 'label']) +AggregateLevel = conf_helps.layers.AggregateLevel +ExpandLevel = conf_helps.layers.ExpandLevel + +layer_list = [ + # [V2LayerImpl, V1_method_name, parent_names] + # fully connected layers + ['fc', 'fc_layer', ['input']], + # conv layers + ['conv_shift', 'conv_shift_layer', ['a', 'b']], + ['img_conv', 'img_conv_layer', ['input']], + # image pooling layers + ['img_pool', 'img_pool_layer', ['input']], + ['spp', 'spp_layer', ['input']], + ['maxout', 'maxout_layer', ['input']], + # norm layers + ['img_cmrnorm', 'img_cmrnorm_layer', ['input']], + ['batch_norm', 'batch_norm_layer', ['input']], + ['sum_to_one_norm', 'sum_to_one_norm_layer', ['input']], + # recurrent layers + ['recurrent', 'recurrent_layer', ['input']], + ['lstmemory', 'lstmemory', ['input']], + ['grumemory', 'grumemory', ['input']], + # aggregate layers + ['pool', 'pooling_layer', ['input']], + ['last_seq', 'last_seq', ['input']], + ['first_seq', 'first_seq', ['input']], + ['concat', 'concat_layer', ['input']], + ['seq_concat', 'seq_concat_layer', ['a', 'b']], + # reshaping layers + ['block_expand', 'block_expand_layer', ['input']], + ['expand', 'expand_layer', ['input', 'expand_as']], + ['repeat', 'repeat_layer', ['input']], + ['rotate', 'rotate_layer', ['input']], + ['seq_reshape', 'seq_reshape_layer', ['input']], + # math layers + ['addto', 'addto_layer', ['input']], + ['linear_comb', 'linear_comb_layer', ['weights', 'vectors']], + ['interpolation', 'interpolation_layer', ['input', 'weight']], + ['bilinear_interp', 'bilinear_interp_layer', ['input']], + ['power', 'power_layer', ['input', 'weight']], + ['scaling', 'scaling_layer', ['input', 'weight']], + ['slope_intercept', 'slope_intercept_layer', ['input']], + ['tensor', 'tensor_layer', ['a', 'b']], + ['cos_sim', 'cos_sim', ['a', 'b']], + ['trans', 'trans_layer', ['input']], + # sampling layers + ['max_id', 'maxid_layer', ['input']], + ['sampling_id', 'sampling_id_layer', ['input']], + # slicing and joining layers + ['pad', 'pad_layer', ['input']], + # cost layers + [ + 'classification_cost', 'classification_cost', + ['input', 'label', 'weight'] + ], + ['regression_cost', 'regression_cost', ['input', 'label', 'weight']], + ['cross_entropy_cost', 'cross_entropy', ['input', 'label']], + [ + 'cross_entropy_with_selfnorm_cost', 'cross_entropy_with_selfnorm', + ['input', 'label'] + ], + [ + 'multi_binary_label_cross_entropy_cost', + 'multi_binary_label_cross_entropy', ['input', 'label'] + ], + ['rank_cost', 'rank_cost', ['left', 'right', 'label', 'weight']], + ['lambda_cost', 'lambda_cost', ['input', 'score']], + ['sum_cost', 'sum_cost', ['input']], + ['huber_cost', 'huber_cost', ['input', 'label']], + ['crf', 'crf_layer', ['input', 'label']], + ['crf_decoding', 'crf_decoding_layer', ['input']], + ['ctc', 'ctc_layer', ['input', 'label']], + ['warp_ctc', 'warp_ctc_layer', ['input', 'label']], + ['nce', 'nce_layer', ['input', 'label']], + ['hsigmoid', 'hsigmoid', ['input', 'label']], + # check layers + ['eos', 'eos_layer', ['input']] +] +for l in layer_list: + globals()[l[0]] = __convert_to_v2__(l[1], l[2]) # convert projection for prj in __projection_names__: - globals()[prj] = __convert_to_v2__(prj, parent_names=['input']) + globals()[prj] = __convert_to_v2__( + prj, parent_names=['input'], is_default_name=False) # convert operator operator_list = [ @@ -323,4 +378,5 @@ operator_list = [ ['conv_operator', ['img', 'filter']] ] for op in operator_list: - globals()[op[0]] = __convert_to_v2__(op[0], parent_names=op[1]) + globals()[op[0]] = __convert_to_v2__( + op[0], parent_names=op[1], is_default_name=False) diff --git a/python/paddle/v2/pooling.py b/python/paddle/v2/pooling.py new file mode 100644 index 0000000000000000000000000000000000000000..9076a159bb4f2c58bf52cba1657c58510637f4f8 --- /dev/null +++ b/python/paddle/v2/pooling.py @@ -0,0 +1,24 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.trainer_config_helpers.poolings import * + +__all__ = ["Max", "CudnnMax", "Avg", "CudnnAvg", "Sum", "SquareRootN"] + +Max = MaxPooling +CudnnMax = CudnnMaxPooling +Avg = AvgPooling +CudnnAvg = CudnnAvgPooling +Sum = SumPooling +SquareRootN = SquareRootNPooling diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index bf1c344202f457f246c4f5b4f2ea8b42e916499c..bb0099ea2fbb78b0a05eedf23af95a02e8849015 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -19,16 +19,106 @@ import paddle.v2.activation as activation import paddle.v2.attr as attr import paddle.v2.data_type as data_type import paddle.v2.layer as layer +import paddle.v2.pooling as pooling +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as parse_network -pixel = layer.data(name='pixel', type=data_type.dense_vector(784)) +pixel = layer.data(name='pixel', type=data_type.dense_vector(128)) label = layer.data(name='label', type=data_type.integer_value(10)) weight = layer.data(name='weight', type=data_type.dense_vector(10)) score = layer.data(name='score', type=data_type.dense_vector(1)) + hidden = layer.fc(input=pixel, size=100, act=activation.Sigmoid(), param_attr=attr.Param(name='hidden')) inference = layer.fc(input=hidden, size=10, act=activation.Softmax()) +conv = layer.img_conv( + input=pixel, + filter_size=1, + filter_size_y=1, + num_channels=8, + num_filters=16, + act=activation.Linear()) + + +class ImageLayerTest(unittest.TestCase): + def test_conv_layer(self): + conv_shift = layer.conv_shift(a=pixel, b=score) + print layer.parse_network(conv, conv_shift) + + def test_pooling_layer(self): + maxpool = layer.img_pool( + input=conv, + pool_size=2, + num_channels=16, + padding=1, + pool_type=pooling.Max()) + spp = layer.spp(input=conv, + pyramid_height=2, + num_channels=16, + pool_type=pooling.Max()) + maxout = layer.maxout(input=conv, num_channels=16, groups=4) + print layer.parse_network(maxpool, spp, maxout) + + def test_norm_layer(self): + norm1 = layer.img_cmrnorm(input=conv, size=5) + norm2 = layer.batch_norm(input=conv) + norm3 = layer.sum_to_one_norm(input=conv) + print layer.parse_network(norm1, norm2, norm3) + + +class AggregateLayerTest(unittest.TestCase): + def test_aggregate_layer(self): + pool = layer.pool( + input=pixel, + pooling_type=pooling.Avg(), + agg_level=layer.AggregateLevel.EACH_SEQUENCE) + last_seq = layer.last_seq(input=pixel) + first_seq = layer.first_seq(input=pixel) + concat = layer.concat(input=[last_seq, first_seq]) + seq_concat = layer.seq_concat(a=last_seq, b=first_seq) + print layer.parse_network(pool, last_seq, first_seq, concat, seq_concat) + + +class MathLayerTest(unittest.TestCase): + def test_math_layer(self): + addto = layer.addto(input=[pixel, pixel]) + linear_comb = layer.linear_comb(weights=weight, vectors=hidden, size=10) + interpolation = layer.interpolation( + input=[hidden, hidden], weight=score) + bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4) + power = layer.power(input=pixel, weight=score) + scaling = layer.scaling(input=pixel, weight=score) + slope = layer.slope_intercept(input=pixel) + tensor = layer.tensor(a=pixel, b=pixel, size=1000) + cos_sim = layer.cos_sim(a=pixel, b=pixel) + trans = layer.trans(input=tensor) + print layer.parse_network(addto, linear_comb, interpolation, power, + scaling, slope, tensor, cos_sim, trans) + + +class ReshapeLayerTest(unittest.TestCase): + def test_reshape_layer(self): + block_expand = layer.block_expand( + input=conv, num_channels=4, stride_x=1, block_x=1) + expand = layer.expand( + input=weight, + expand_as=pixel, + expand_level=layer.ExpandLevel.FROM_TIMESTEP) + repeat = layer.repeat(input=pixel, num_repeats=4) + reshape = layer.seq_reshape(input=pixel, reshape_size=4) + rotate = layer.rotate(input=pixel, height=16, width=49) + print layer.parse_network(block_expand, expand, repeat, reshape, rotate) + + +class RecurrentLayerTest(unittest.TestCase): + def test_recurrent_layer(self): + word = layer.data(name='word', type=data_type.integer_value(12)) + recurrent = layer.recurrent(input=word) + lstm = layer.lstmemory(input=word) + gru = layer.grumemory(input=word) + print layer.parse_network(recurrent, lstm, gru) class CostLayerTest(unittest.TestCase): @@ -49,13 +139,35 @@ class CostLayerTest(unittest.TestCase): cost10 = layer.sum_cost(input=inference) cost11 = layer.huber_cost(input=score, label=label) - print dir(layer) - layer.parse_network(cost1, cost2) - print dir(layer) - #print layer.parse_network(cost3, cost4) - #print layer.parse_network(cost5, cost6) - #print layer.parse_network(cost7, cost8, cost9, cost10, cost11) + print layer.parse_network(cost1, cost2) + print layer.parse_network(cost3, cost4) + print layer.parse_network(cost5, cost6) + print layer.parse_network(cost7, cost8, cost9, cost10, cost11) + + crf = layer.crf(input=inference, label=label) + crf_decoding = layer.crf_decoding(input=inference, size=3) + ctc = layer.ctc(input=inference, label=label) + warp_ctc = layer.warp_ctc(input=pixel, label=label) + nce = layer.nce(input=inference, label=label, num_classes=3) + hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3) + + print layer.parse_network(crf, crf_decoding, ctc, warp_ctc, nce, + hsigmoid) + + +class OtherLayerTest(unittest.TestCase): + def test_sampling_layer(self): + maxid = layer.max_id(input=inference) + sampling_id = layer.sampling_id(input=inference) + eos = layer.eos(input=maxid, eos_id=5) + print layer.parse_network(maxid, sampling_id, eos) + + def test_slicing_joining_layer(self): + pad = layer.pad(input=conv, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1]) + print layer.parse_network(pad) + +class ProjOpTest(unittest.TestCase): def test_projection(self): input = layer.data(name='data', type=data_type.dense_vector(784)) word = layer.data(