提交 c1738e29 编写于 作者: L Luo Tao

Merge branch 'develop' into stride

......@@ -64,6 +64,7 @@ include(external/python) # download, build, install python
include(external/openblas) # download, build, install openblas
include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc
include(external/any) # download libn::any
include(package) # set paddle packages
include(cpplint) # set paddle c++ style
......
INCLUDE(ExternalProject)
SET(ANY_SOURCE_DIR ${THIRD_PARTY_PATH}/any)
INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/linb_any)
ExternalProject_Add(
linb_any
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/thelink2012/any.git"
GIT_TAG "8fef1e93710a0edf8d7658999e284a1142c4c020"
PREFIX ${ANY_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE)
import sys
import paddle.v2 as paddle
def seqToseq_net(source_dict_dim, target_dict_dim):
def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
beam_size = 3
max_length = 250
#### Encoder
src_word_id = paddle.layer.data(
name='source_language_word',
......@@ -67,30 +71,57 @@ def seqToseq_net(source_dict_dim, target_dict_dim):
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
if not is_generating:
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
else:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the last generated word is automatically gotten by
# GeneratedInputs, which is initialized by a start mark, such as <s>,
# and must be included in generation.
trg_embedding = paddle.layer.GeneratedInputV2(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
beam_gen = paddle.layer.beam_search(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length)
return beam_gen
def main():
......
......@@ -16,66 +16,6 @@ limitations under the License. */
namespace paddle {
template <>
size_t FuncConfig::get<size_t>(const std::string& key) const {
auto it = valueMap_.find(key);
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
return it->second.s;
}
template <>
real FuncConfig::get<real>(const std::string& key) const {
auto it = valueMap_.find(key);
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
return it->second.r;
}
template <>
int FuncConfig::get<int>(const std::string& key) const {
auto it = valueMap_.find(key);
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
return it->second.i;
}
template <>
bool FuncConfig::get<bool>(const std::string& key) const {
auto it = valueMap_.find(key);
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
return it->second.b;
}
template <>
FuncConfig& FuncConfig::set<size_t>(const std::string& key, size_t v) {
CHECK_EQ(static_cast<int>(valueMap_.count(key)), 0) << "Duplicated value: "
<< key;
valueMap_[key].s = v;
return *this;
}
template <>
FuncConfig& FuncConfig::set<real>(const std::string& key, real v) {
CHECK_EQ(static_cast<int>(valueMap_.count(key)), 0) << "Duplicated value: "
<< key;
valueMap_[key].r = v;
return *this;
}
template <>
FuncConfig& FuncConfig::set<int>(const std::string& key, int v) {
CHECK_EQ(static_cast<int>(valueMap_.count(key)), 0) << "Duplicated value: "
<< key;
valueMap_[key].i = v;
return *this;
}
template <>
FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) {
CHECK_EQ(static_cast<int>(valueMap_.count(key)), 0) << "Duplicated value: "
<< key;
valueMap_[key].b = v;
return *this;
}
void BufferArgs::addArg(const Matrix& arg,
const TensorShape& shape,
ArgType argType) {
......
......@@ -18,32 +18,49 @@ limitations under the License. */
#include <vector>
#include "BufferArg.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/Any.h"
#include "paddle/utils/ClassRegistrar.h"
#include "paddle/utils/Error.h"
namespace paddle {
/**
* Function Configuration.
* The argument type of Function::init.
* Follow-up will consider moving this data structure to Proto inside.
*/
class FuncConfig {
public:
union value {
size_t s;
real r;
int i;
bool b;
};
template <typename T>
T get(const std::string& key) const;
T get(const std::string& key, Error* err = nullptr) const {
try {
return any_cast<T>(valueMap_.at(key));
} catch (std::exception& e) { // could be cast or out of range exception.
if (err) {
*err = Error(e.what());
} else {
LOG(FATAL) << "Cannot get key " << key << "with error " << e.what();
}
return T();
}
}
template <typename T>
FuncConfig& set(const std::string& key, T v);
FuncConfig& set(const std::string& key, T v, Error* err = nullptr) {
auto it = valueMap_.find(key);
if (it != valueMap_.end()) { // already contains key.
if (err) {
*err = Error("Key %s is already set in FuncConfig", key.c_str());
} else {
LOG(FATAL) << "Key " << key << " is already set in FuncConfig.";
}
return *this;
}
valueMap_[key] = any(v);
return *this;
}
protected:
std::map<std::string, value> valueMap_;
mutable std::unordered_map<std::string, any> valueMap_;
};
/**
......
......@@ -25,9 +25,9 @@ void Pad<DEVICE_TYPE_CPU>(real* outputs,
const int inH,
const int inW,
const PadConf& pad) {
int cstart = pad.channelStart, cend = pad.channelEnd;
int hstart = pad.heightStart, hend = pad.heightEnd;
int wstart = pad.widthStart, wend = pad.widthEnd;
int cstart = pad.channel[0], cend = pad.channel[1];
int hstart = pad.height[0], hend = pad.height[1];
int wstart = pad.width[0], wend = pad.width[1];
int outC = inC + cstart + cend;
int outH = inH + hstart + hend;
int outW = inW + wstart + wend;
......@@ -51,9 +51,9 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad,
const int inH,
const int inW,
const PadConf& pad) {
int cstart = pad.channelStart, cend = pad.channelEnd;
int hstart = pad.heightStart, hend = pad.heightEnd;
int wstart = pad.widthStart, wend = pad.widthEnd;
int cstart = pad.channel[0], cend = pad.channel[1];
int hstart = pad.height[0], hend = pad.height[1];
int wstart = pad.width[0], wend = pad.width[1];
int outC = inC + cstart + cend;
int outH = inH + hstart + hend;
int outW = inW + wstart + wend;
......@@ -71,6 +71,12 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad,
}
}
static inline PadConf castToPadConf(const FuncConfig& conf) {
return {conf.get<std::vector<uint32_t>>("channel"),
conf.get<std::vector<uint32_t>>("height"),
conf.get<std::vector<uint32_t>>("width")};
}
/**
* \brief Padding zeros to input according to the specify dimension.
* The struct pad_ contains the padding size in each dimension.
......@@ -127,14 +133,7 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad,
template <DeviceType Device>
class PadFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
pad_.channelStart = config.get<int>("cstart");
pad_.channelEnd = config.get<int>("cend");
pad_.heightStart = config.get<int>("hstart");
pad_.heightEnd = config.get<int>("hend");
pad_.widthStart = config.get<int>("wstart");
pad_.widthEnd = config.get<int>("wend");
}
void init(const FuncConfig& config) override { pad_ = castToPadConf(config); }
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(1UL, inputs.size());
......@@ -175,14 +174,7 @@ private:
template <DeviceType Device>
class PadGradFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
pad_.channelStart = config.get<int>("cstart");
pad_.channelEnd = config.get<int>("cend");
pad_.heightStart = config.get<int>("hstart");
pad_.heightEnd = config.get<int>("hend");
pad_.widthStart = config.get<int>("wstart");
pad_.widthEnd = config.get<int>("wend");
}
void init(const FuncConfig& config) override { pad_ = castToPadConf(config); }
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(1UL, inputs.size());
......
......@@ -19,18 +19,12 @@ limitations under the License. */
namespace paddle {
struct PadConf {
/// how many values to add before the data along channel dimension.
int channelStart;
/// how many values to add after the data along channel dimension.
int channelEnd;
/// how many values to add before the data along height dimension.
int heightStart;
/// how many values to add after the data along height dimension.
int heightEnd;
/// how many values to add before the data along width dimension.
int widthStart;
/// how many values to add after the data along width dimension.
int widthEnd;
/// how many values to add before/after the data along channel dimension.
std::vector<uint32_t> channel;
/// how many values to add before/after the data along height dimension.
std::vector<uint32_t> height;
/// how many values to add before/after the data along width dimension.
std::vector<uint32_t> width;
};
/**
......
......@@ -36,12 +36,9 @@ bool PadLayer::init(const LayerMap& layerMap,
CHECK_EQ(2, pad_conf.pad_c_size());
CHECK_EQ(2, pad_conf.pad_h_size());
CHECK_EQ(2, pad_conf.pad_w_size());
padc_.push_back(pad_conf.pad_c(0));
padc_.push_back(pad_conf.pad_c(1));
padh_.push_back(pad_conf.pad_h(0));
padh_.push_back(pad_conf.pad_h(1));
padw_.push_back(pad_conf.pad_w(0));
padw_.push_back(pad_conf.pad_w(1));
padc_ = {pad_conf.pad_c(0), pad_conf.pad_c(1)};
padh_ = {pad_conf.pad_h(0), pad_conf.pad_h(1)};
padw_ = {pad_conf.pad_w(0), pad_conf.pad_w(1)};
outDims_ = TensorShape(4);
setOutDims(0);
......@@ -49,21 +46,15 @@ bool PadLayer::init(const LayerMap& layerMap,
createFunction(forward_,
"Pad",
FuncConfig()
.set("cstart", padc_[0])
.set("cend", padc_[1])
.set("hstart", padh_[0])
.set("hend", padh_[1])
.set("wstart", padw_[0])
.set("wend", padw_[1]));
.set("channel", padc_)
.set("height", padh_)
.set("width", padw_));
createFunction(backward_,
"PadGrad",
FuncConfig()
.set("cstart", padc_[0])
.set("cend", padc_[1])
.set("hstart", padh_[0])
.set("hend", padh_[1])
.set("wstart", padw_[0])
.set("wend", padw_[1]));
.set("channel", padc_)
.set("height", padh_)
.set("width", padw_));
return true;
}
......
......@@ -38,9 +38,9 @@ protected:
void setOutDims(const size_t batchSize);
void setTensorDim(const size_t batchSize);
std::vector<int> padc_;
std::vector<int> padh_;
std::vector<int> padw_;
std::vector<uint32_t> padc_;
std::vector<uint32_t> padh_;
std::vector<uint32_t> padw_;
TensorShape inDims_;
TensorShape outDims_;
};
......
......@@ -160,10 +160,19 @@ class SparseFloatScanner(SparseBinaryScanner):
class IndexScanner(IScanner):
def __init__(self, input_type, pos):
IScanner.__init__(self, input_type, pos)
self.__ids__ = []
self.__ids__ = None
self.__idx__ = 0
def pre_scan(self, dat):
self.__idx__ += 1
def finish_pre_scan(self, argument):
self.__ids__ = [0] * self.__idx__
self.__idx__ = 0
def scan(self, dat):
self.__ids__.append(dat)
self.__ids__[self.__idx__] = dat
self.__idx__ += 1
def finish_scan(self, argument):
ids = swig_paddle.IVector.create(self.__ids__, self.data_in_gpu)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#if __cplusplus > 201402L
#include <any>
namespace paddle {
// using std::any for C++ 17
using std::any;
using std::any_cast;
using std::bad_any_cast;
} // namespace paddle
#else
#include <any.hpp>
namespace paddle {
// use linb::any for C++ 11
using linb::any;
using linb::any_cast;
using linb::bad_any_cast;
} // namespace paddle
#endif
......@@ -18,7 +18,7 @@ import inspect
from paddle.trainer.config_parser import *
from .activations import LinearActivation, SigmoidActivation, TanhActivation, \
ReluActivation, IdentityActivation, SoftmaxActivation
ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation
from .evaluators import *
from .poolings import MaxPooling, AvgPooling, BasePoolingType
from .attrs import *
......@@ -2277,8 +2277,9 @@ def img_pool_layer(input,
pool_type.name = 'avg'
type_name = pool_type.name + '-projection' \
if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
else pool_type.name
if (
isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
else pool_type.name
pool_size_y = pool_size if pool_size_y is None else pool_size_y
stride_y = stride if stride_y is None else stride_y
......@@ -3318,8 +3319,8 @@ def recurrent_group(step,
assert (targetInlink == None or targetInlink_in_inlinks())
targetInlinkName = None if targetInlink == None \
else targetInlink.name if isinstance(targetInlink, LayerOutput) \
else targetInlink.input.name
else targetInlink.name if isinstance(targetInlink, LayerOutput) \
else targetInlink.input.name
contains_sub_seq = [False]
......@@ -4831,12 +4832,14 @@ def crf_decoding_layer(input,
return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=1)
@wrap_act_default(act=SigmoidActivation())
@wrap_bias_attr_default(has_bias=True)
@wrap_name_default()
@layer_support()
def nce_layer(input,
label,
num_classes,
act=None,
weight=None,
num_neg_samples=10,
neg_distribution=None,
......@@ -4865,6 +4868,8 @@ def nce_layer(input,
:type weight: LayerOutput
:param num_classes: number of classes.
:type num_classes: int
:param act: Activation, default is Sigmoid.
:type act: BaseActivation
:param num_neg_samples: number of negative samples. Default is 10.
:type num_neg_samples: int
:param neg_distribution: The distribution for generating the random negative labels.
......@@ -4887,6 +4892,8 @@ def nce_layer(input,
assert isinstance(neg_distribution, collections.Sequence)
assert len(neg_distribution) == num_classes
assert sum(neg_distribution) == 1
if not isinstance(act, BaseActivation):
raise TypeError()
ipts_for_layer = []
parents = []
......@@ -4908,12 +4915,17 @@ def nce_layer(input,
type=LayerType.NCE_LAYER,
num_classes=num_classes,
neg_sampling_dist=neg_distribution,
active_type=act.name,
num_neg_samples=num_neg_samples,
inputs=ipts_for_layer,
bias=ParamAttr.to_bias(bias_attr),
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name, LayerType.NCE_LAYER, parents=parents, size=l.config.size)
name,
LayerType.NCE_LAYER,
parents=parents,
size=l.config.size,
activation=act)
"""
......
......@@ -67,7 +67,16 @@ class Layer(object):
self.name = name
self.__context__ = {}
self.__parent_layers__ = parent_layers
self.__children_layers__ = [] # used for evaluator.
# some layer may have some extra parent layer
self.__extra_parent__ = []
# used for evaluator.
self.__children_layers__ = []
def extra_parent(self):
return self.__extra_parent__
def append_extra_parent(self, parent):
self.__extra_parent__.append(parent)
def append_child(self, layer, parent_names):
self.__children_layers__.append((layer, parent_names))
......@@ -78,14 +87,20 @@ class Layer(object):
"""
self.__context__ = context
# short cut if myself is parsed before.
# STEP: short cut if this layer is parsed before.
if self.context_name() in context:
if self.use_context_name():
return context[self.context_name()]
else:
return context[self.name]
# parse parent before myself
# STEP: parse extra_parent that is not used by this layer but must
# be parsed before this layer.
for p in self.__extra_parent__:
p.to_proto(context=context)
# STEP: parse parent that is used by this layer, get the result and
# insert into kwargs of the next layer's to_proto_impl method.
kwargs = dict()
for layer_name in self.__parent_layers__:
if not isinstance(self.__parent_layers__[layer_name],
......@@ -97,14 +112,13 @@ class Layer(object):
self.__parent_layers__[layer_name])
kwargs[layer_name] = v1_layer
# parse myself.
# STEP: parse myself and add myself into context.
ret_val = self.to_proto_impl(**kwargs)
if self.context_name() is not None and \
self.context_name() not in context:
if self.context_name() is not None \
and self.context_name() not in context:
context[self.context_name()] = ret_val
# parse children.
# STEP: parse children that should be pased after this layer.
for layer, pnames in self.__children_layers__:
drop = False
......@@ -117,6 +131,7 @@ class Layer(object):
continue
layer.to_proto(context=context)
# STEP: return v1 layer result
if self.context_name() is None:
return ret_val
elif self.use_context_name():
......
......@@ -66,13 +66,6 @@ def download(url, module_name, md5sum):
return filename
def dict_add(a_dict, ele):
if ele in a_dict:
a_dict[ele] += 1
else:
a_dict[ele] = 1
def fetch_all():
for module_name in filter(lambda x: not x.startswith("__"),
dir(paddle.v2.dataset)):
......
......@@ -18,6 +18,7 @@ TODO(yuyang18): Complete comments.
"""
import paddle.v2.dataset.common
import collections
import tarfile
import Queue
import re
......@@ -48,10 +49,10 @@ def tokenize(pattern):
def build_dict(pattern, cutoff):
word_freq = {}
word_freq = collections.defaultdict(int)
for doc in tokenize(pattern):
for word in doc:
paddle.v2.dataset.common.dict_add(word_freq, word)
word_freq[word] += 1
# Not sure if we should prune less-frequent words here.
word_freq = filter(lambda x: x[1] > cutoff, word_freq.items())
......
......@@ -17,6 +17,7 @@ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
Complete comments.
"""
import paddle.v2.dataset.common
import collections
import tarfile
__all__ = ['train', 'test', 'build_dict']
......@@ -26,15 +27,14 @@ MD5 = '30177ea32e27c525793142b6bf2c8e2d'
def word_count(f, word_freq=None):
add = paddle.v2.dataset.common.dict_add
if word_freq == None:
word_freq = {}
if word_freq is None:
word_freq = collections.defaultdict(int)
for l in f:
for w in l.strip().split():
add(word_freq, w)
add(word_freq, '<s>')
add(word_freq, '<e>')
word_freq[w] += 1
word_freq['<s>'] += 1
word_freq['<e>'] += 1
return word_freq
......
......@@ -33,40 +33,52 @@ The primary usage shows below.
import collections
import inspect
from config_base import Layer, __convert_to_v2__
import re
import paddle.trainer_config_helpers as conf_helps
from paddle.trainer.config_parser import \
RecurrentLayerGroupWithoutOutLinksBegin, RecurrentLayerGroupSetOutLink, \
RecurrentLayerGroupEnd, model_type
from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as __parse__
from paddle.trainer_config_helpers.default_decorators import wrap_act_default
from paddle.trainer_config_helpers.default_decorators import \
wrap_bias_attr_default
from paddle.trainer_config_helpers.default_decorators import wrap_name_default
from paddle.trainer_config_helpers.layers import RecurrentLayerGroupSetGenerator, Generator
from paddle.trainer_config_helpers.layers import layer_support
from paddle.trainer.config_parser import \
RecurrentLayerGroupWithoutOutLinksBegin, RecurrentLayerGroupSetOutLink, \
RecurrentLayerGroupEnd, model_type
import activation
import re
import attr
import data_type
from config_base import Layer, __convert_to_v2__
__all__ = ['parse_network', 'data']
def parse_network(*outputs):
def parse_network(output_layers, extra_layers=None):
"""
Parse all output layers and then generate a ModelConfig object.
Parse all layers in the neural network graph and
then generate a ModelConfig object.
.. note::
This function is used internally in paddle.v2 module. User should never
invoke this method.
:param outputs: Output layers.
:type outputs: Layer
:param output_layers: Output layers.
:type output_layers: Layer
:param extra_layers: Some layers in the neural network graph are not in the
path of output_layers.
:type extra_layers: Layer
:return: A ModelConfig object instance.
:rtype: ModelConfig
"""
if not isinstance(output_layers, collections.Sequence):
output_layers = [output_layers]
if extra_layers is not None and not isinstance(extra_layers,
collections.Sequence):
extra_layers = [extra_layers]
def __real_func__():
"""
......@@ -74,7 +86,11 @@ def parse_network(*outputs):
the plain old paddle configuration function.
"""
context = dict()
real_output = [each.to_proto(context=context) for each in outputs]
real_output = [each.to_proto(context=context) for each in output_layers]
if extra_layers is not None:
extra_output = [
each.to_proto(context=context) for each in extra_layers
]
conf_helps.outputs(real_output)
return __parse__(__real_func__)
......@@ -119,54 +135,23 @@ class DataLayerV2(Layer):
return doc
class WithExtraParent(Layer):
def extra_parent(self):
return self.__extra_parent__
def __init__(self, name=None, parent_layers=None):
self.__extra_parent__ = []
super(WithExtraParent, self).__init__(
name=name, parent_layers=parent_layers)
def append_extra_parent(self, parent):
self.__extra_parent__.append(parent)
def to_proto(self, context):
class MemoryV2(Layer):
def __init__(self, name, extra_input=None, **kwargs):
"""
function to set proto attribute
Init memory object, if memory is inited inside recurrent_group step
function, it may depend on a boot_layer that should be initialized
outside recurrent_group, so we:
1. add RecurrentLayerInput to extra_parent of self.
2. add boot_layer to the extra_parent of RecurrentLayerInput.
:param extra_input: list of RecurrentLayerInput
:type extra_input: [RecurrentLayerInput]
"""
kwargs = dict()
for p in self.__extra_parent__:
p.to_proto(context=context)
for layer_name in self.__parent_layers__:
if not isinstance(self.__parent_layers__[layer_name],
collections.Sequence):
v1_layer = self.__parent_layers__[layer_name].to_proto(
context=context)
else:
v1_layer = map(lambda x: x.to_proto(context=context),
self.__parent_layers__[layer_name])
kwargs[layer_name] = v1_layer
if self.context_name() is None:
return self.to_proto_impl(context=context, **kwargs)
elif self.context_name() not in context:
context[self.context_name()] = self.to_proto_impl(
context=context, **kwargs)
if self.use_context_name():
return context[self.context_name()]
else:
return context[self.name]
class MemoryV2(WithExtraParent):
def __init__(self, name, **kwargs):
self.name = name
super(MemoryV2, self).__init__(name=name, parent_layers=dict())
self.__kwargs__ = kwargs
self.__boot_layer_name__ = None
if 'boot_layer' in kwargs:
begin_of_current_rnn = []
# TODO(yuyang18): Fix inspect, it could be wrong when user invoke a
......@@ -189,11 +174,10 @@ class MemoryV2(WithExtraParent):
assert begin_of_current_rnn is not None
for extra in begin_of_current_rnn:
self.append_extra_parent(extra)
assert isinstance(extra, WithExtraParent)
extra.append_extra_parent(kwargs['boot_layer'])
self.__boot_layer_name__ = kwargs['boot_layer'].name
def to_proto_impl(self, context, **kwargs):
def to_proto_impl(self, **kwargs):
args = dict()
for each in kwargs:
args[each] = kwargs[each]
......@@ -201,7 +185,7 @@ class MemoryV2(WithExtraParent):
args[each] = self.__kwargs__[each]
if self.__boot_layer_name__ is not None:
args['boot_layer'] = context[self.__boot_layer_name__]
args['boot_layer'] = self.__context__[self.__boot_layer_name__]
size = args.get('size', None)
if size is not None:
......@@ -223,22 +207,6 @@ class MemoryV2(WithExtraParent):
return True
class LayerOutputV2(Layer):
"""
LayerOutputV2 is used to store the result of LayerOutput in v1 api.
It will not store it's parents because layer_output has been parsed already.
"""
def __init__(self, layer_output):
assert isinstance(layer_output, conf_helps.LayerOutput)
self.layer_output = layer_output
super(LayerOutputV2, self).__init__(
name=layer_output.name, parent_layers=dict())
def to_proto_impl(self):
return self.layer_output
class StaticInputV2(object):
def __init__(self, input, is_seq=False, size=None):
assert isinstance(input, LayerV2)
......@@ -250,6 +218,66 @@ class StaticInputV2(object):
# assert input.size is not None or size is not None
class BaseGeneratedInputV2(object):
def __init__(self):
self.bos_id = None
self.eos_id = None
def before_real_step(self):
raise NotImplementedError()
def after_real_step(self, *args):
raise NotImplementedError()
class GeneratedInputV2(BaseGeneratedInputV2):
def __init__(self, size, embedding_name, embedding_size):
super(GeneratedInputV2, self).__init__()
self.size = size
self.embedding_name = embedding_name
self.embedding_size = embedding_size
def after_real_step(self, input):
return max_id(input=input, name='__beam_search_predict__')
def before_real_step(self):
predict_id = memory(
name='__beam_search_predict__',
size=self.size,
boot_with_const_id=self.bos_id)
trg_emb = embedding(
input=predict_id,
size=self.embedding_size,
param_attr=attr.ParamAttr(name=self.embedding_name))
return trg_emb
class RecurrentLayerGroupSetGeneratorV2(Layer):
def __init__(self, eos_name, max_length, beam_size, num_results_per_sample):
self.eos_name = eos_name
self.max_length = max_length
self.beam_size = beam_size
self.num_results_per_sample = num_results_per_sample
super(RecurrentLayerGroupSetGeneratorV2, self).__init__(
name=eos_name, parent_layers={})
def to_proto_impl(self, **kwargs):
RecurrentLayerGroupSetGenerator(
Generator(
eos_layer_name=self.eos_name,
max_num_frames=self.max_length,
beam_size=self.beam_size,
num_results_per_sample=self.num_results_per_sample))
return self
def context_name(self):
return self.eos_name + ".fake"
def use_context_name(self):
return True
class MixedLayerV2(Layer):
"""
This class is use to support `with` grammar. If not, the following code
......@@ -328,18 +356,24 @@ def mixed(size=0,
return MixedLayerV2(size, input, name, act, bias_attr, layer_attr)
class RecurrentLayerInput(WithExtraParent):
class RecurrentLayerInput(Layer):
def __init__(self, recurrent_name, index, parent_layers):
assert len(parent_layers) == 1
self.__parents__ = parent_layers.values()[0]
super(RecurrentLayerInput, self).__init__(
name=self.__parents__[index].name, parent_layers=parent_layers)
parents_len = len(parent_layers)
assert parents_len <= 1
if parents_len == 0:
self.__parents__ = []
else:
self.__parents__ = parent_layers.values()[0]
self.__recurrent_name__ = recurrent_name
name = self.__parents__[
index].name if index >= 0 else self.context_name()
super(RecurrentLayerInput, self).__init__(
name=name, parent_layers=parent_layers)
def context_name(self):
return self.__recurrent_name__ + ".begin"
def to_proto_impl(self, context, **kwargs):
def to_proto_impl(self, **kwargs):
model_type('recurrent_nn')
RecurrentLayerGroupWithoutOutLinksBegin(
name=self.__recurrent_name__,
......@@ -436,6 +470,11 @@ def recurrent_group(step, input, name=None):
for i in xrange(len(non_static_inputs))
]
extra_input = None
if len(non_static_inputs) == 0:
extra_input = RecurrentLayerInput(
recurrent_name=name, index=-1, parent_layers={})
def __real_step__(*args):
rnn_input = list(args)
static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input)
......@@ -443,6 +482,7 @@ def recurrent_group(step, input, name=None):
mem_name = "__%s_memory__" % static_input.input.name
mem = memory(
name=mem_name,
extra_input=extra_input,
is_seq=static_input.is_seq,
size=static_input.input.calculate_size,
boot_layer=static_input.input)
......@@ -472,6 +512,73 @@ def recurrent_group(step, input, name=None):
return retv
@wrap_name_default()
def beam_search(step,
input,
bos_id,
eos_id,
beam_size,
max_length=500,
name=None,
num_results_per_sample=None):
if num_results_per_sample is None:
num_results_per_sample = beam_size
assert num_results_per_sample <= beam_size
# logger.warning("num_results_per_sample should be less than beam_size")
if isinstance(input, StaticInputV2) or isinstance(input,
BaseGeneratedInputV2):
input = [input]
generated_input_index = -1
real_input = []
for i, each_input in enumerate(input):
assert isinstance(each_input, StaticInputV2) or isinstance(
each_input, BaseGeneratedInputV2)
if isinstance(each_input, BaseGeneratedInputV2):
assert generated_input_index == -1
generated_input_index = i
else:
real_input.append(each_input)
assert generated_input_index != -1
gipt = input[generated_input_index]
assert isinstance(gipt, BaseGeneratedInputV2)
gipt.bos_id = bos_id
gipt.eos_id = eos_id
def __real_step__(*args):
eos_name = "__%s_eos_layer__" % name
generator = RecurrentLayerGroupSetGeneratorV2(
eos_name, max_length, beam_size, num_results_per_sample)
args = list(args)
before_step_layer = gipt.before_real_step()
before_step_layer.append_child(
layer=generator, parent_names=[before_step_layer.name])
args.insert(generated_input_index, before_step_layer)
predict = gipt.after_real_step(step(*args))
eos_layer = eos(input=predict, eos_id=eos_id, name=eos_name)
predict.append_child(layer=eos_layer, parent_names=[predict.name])
return predict
# tmp = paddle.layer.recurrent_group(
# step=__real_step__,
# input=real_input,
# reverse=False,
# name=name,
# is_generating=True)
tmp = recurrent_group(step=__real_step__, input=real_input, name=name)
return tmp
__projection_names__ = filter(lambda x: x.endswith('_projection'),
dir(conf_helps))
......
......@@ -159,7 +159,8 @@ class Parameters(object):
if not self.has_key(key):
raise ValueError("No such parameter %s" % key)
conf = self.__param_conf__[key]
return tuple(map(int, conf.dims))
dims = conf.dims if conf.dims else (1, conf.size)
return tuple(map(int, dims))
def __setitem__(self, key, value):
"""
......
......@@ -59,13 +59,13 @@ class ImageLayerTest(unittest.TestCase):
num_channels=16,
pool_type=pooling.Max())
maxout = layer.maxout(input=conv, num_channels=16, groups=4)
print layer.parse_network(maxpool, spp, maxout)
print layer.parse_network([maxpool, spp, maxout])
def test_norm_layer(self):
norm1 = layer.img_cmrnorm(input=conv, size=5)
norm2 = layer.batch_norm(input=conv)
norm3 = layer.sum_to_one_norm(input=conv)
print layer.parse_network(norm1, norm2, norm3)
print layer.parse_network([norm1, norm2, norm3])
class AggregateLayerTest(unittest.TestCase):
......@@ -78,7 +78,8 @@ class AggregateLayerTest(unittest.TestCase):
first_seq = layer.first_seq(input=pixel)
concat = layer.concat(input=[last_seq, first_seq])
seq_concat = layer.seq_concat(a=last_seq, b=first_seq)
print layer.parse_network(pool, last_seq, first_seq, concat, seq_concat)
print layer.parse_network(
[pool, last_seq, first_seq, concat, seq_concat])
class MathLayerTest(unittest.TestCase):
......@@ -95,8 +96,10 @@ class MathLayerTest(unittest.TestCase):
tensor = layer.tensor(a=pixel, b=pixel, size=1000)
cos_sim = layer.cos_sim(a=pixel, b=pixel)
trans = layer.trans(input=tensor)
print layer.parse_network(addto, linear_comb, interpolation, power,
scaling, slope, tensor, cos_sim, trans)
print layer.parse_network([
addto, linear_comb, interpolation, power, scaling, slope, tensor,
cos_sim, trans
])
class ReshapeLayerTest(unittest.TestCase):
......@@ -110,7 +113,8 @@ class ReshapeLayerTest(unittest.TestCase):
repeat = layer.repeat(input=pixel, num_repeats=4)
reshape = layer.seq_reshape(input=pixel, reshape_size=4)
rotate = layer.rotate(input=pixel, height=16, width=49)
print layer.parse_network(block_expand, expand, repeat, reshape, rotate)
print layer.parse_network(
[block_expand, expand, repeat, reshape, rotate])
class RecurrentLayerTest(unittest.TestCase):
......@@ -119,7 +123,7 @@ class RecurrentLayerTest(unittest.TestCase):
recurrent = layer.recurrent(input=word)
lstm = layer.lstmemory(input=word)
gru = layer.grumemory(input=word)
print layer.parse_network(recurrent, lstm, gru)
print layer.parse_network([recurrent, lstm, gru])
class CostLayerTest(unittest.TestCase):
......@@ -139,10 +143,10 @@ class CostLayerTest(unittest.TestCase):
cost10 = layer.sum_cost(input=inference)
cost11 = layer.huber_cost(input=score, label=label)
print layer.parse_network(cost1, cost2)
print layer.parse_network(cost3, cost4)
print layer.parse_network(cost5, cost6)
print layer.parse_network(cost7, cost8, cost9, cost10, cost11)
print layer.parse_network([cost1, cost2])
print layer.parse_network([cost3, cost4])
print layer.parse_network([cost5, cost6])
print layer.parse_network([cost7, cost8, cost9, cost10, cost11])
crf = layer.crf(input=inference, label=label)
crf_decoding = layer.crf_decoding(input=inference, size=3)
......@@ -151,8 +155,8 @@ class CostLayerTest(unittest.TestCase):
nce = layer.nce(input=inference, label=label, num_classes=3)
hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3)
print layer.parse_network(crf, crf_decoding, ctc, warp_ctc, nce,
hsigmoid)
print layer.parse_network(
[crf, crf_decoding, ctc, warp_ctc, nce, hsigmoid])
class OtherLayerTest(unittest.TestCase):
......@@ -160,7 +164,7 @@ class OtherLayerTest(unittest.TestCase):
maxid = layer.max_id(input=inference)
sampling_id = layer.sampling_id(input=inference)
eos = layer.eos(input=maxid, eos_id=5)
print layer.parse_network(maxid, sampling_id, eos)
print layer.parse_network([maxid, sampling_id, eos])
def test_slicing_joining_layer(self):
pad = layer.pad(input=conv, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1])
......
......@@ -17,7 +17,6 @@ import collections
from paddle.proto.ModelConfig_pb2 import ModelConfig
import layer as v2_layer
from layer import WithExtraParent
__all__ = ['Topology']
......@@ -41,9 +40,8 @@ def __bfs_travel__(callback, *layers):
__break__ = callback(each_layer)
if __break__:
return
__layers__ = each_layer.__parent_layers__.values()
if isinstance(each_layer, WithExtraParent):
__layers__ = __layers__ + each_layer.extra_parent()
__layers__ = each_layer.__parent_layers__.values() + \
each_layer.extra_parent()
__bfs_travel__(callback, *__layers__)
......@@ -53,14 +51,26 @@ class Topology(object):
and network configs.
"""
def __init__(self, layers):
if not isinstance(layers, collections.Sequence):
__check_layer_type__(layers)
layers = [layers]
for layer in layers:
__check_layer_type__(layer)
def __init__(self, layers, extra_layers=None):
def __check__(layers):
if not isinstance(layers, collections.Sequence):
__check_layer_type__(layers)
layers = [layers]
for layer in layers:
__check_layer_type__(layer)
return layers
layers = __check__(layers)
self.layers = layers
self.__model_config__ = v2_layer.parse_network(*layers)
if extra_layers is not None:
extra_layers = __check__(extra_layers)
self.__model_config__ = v2_layer.parse_network(
layers, extra_layers=extra_layers)
if extra_layers is not None:
self.layers.extend(extra_layers)
assert isinstance(self.__model_config__, ModelConfig)
def proto(self):
......
......@@ -37,9 +37,12 @@ class SGD(object):
:type cost: paddle.v2.config_base.Layer
:param parameters: The parameters dictionary.
:type parameters: paddle.v2.parameters.Parameters
:param extra_layers: Some layers in the neural network graph are not
in the path of cost layer.
:type extra_layers: paddle.v2.config_base.Layer
"""
def __init__(self, cost, parameters, update_equation):
def __init__(self, cost, parameters, update_equation, extra_layers=None):
if not isinstance(parameters, v2_parameters.Parameters):
raise TypeError('parameters should be parameters')
......@@ -47,7 +50,7 @@ class SGD(object):
if not isinstance(update_equation, v2_optimizer.Optimizer):
raise TypeError("update equation parameter must be "
"paddle.v2.optimizer.Optimizer")
topology = Topology(cost)
topology = Topology(cost, extra_layers=extra_layers)
self.__optimizer__ = update_equation
self.__topology__ = topology
self.__parameters__ = parameters
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册