提交 c1738e29 编写于 作者: L Luo Tao

Merge branch 'develop' into stride

...@@ -64,6 +64,7 @@ include(external/python) # download, build, install python ...@@ -64,6 +64,7 @@ include(external/python) # download, build, install python
include(external/openblas) # download, build, install openblas include(external/openblas) # download, build, install openblas
include(external/swig) # download, build, install swig include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc include(external/warpctc) # download, build, install warpctc
include(external/any) # download libn::any
include(package) # set paddle packages include(package) # set paddle packages
include(cpplint) # set paddle c++ style include(cpplint) # set paddle c++ style
......
INCLUDE(ExternalProject)
SET(ANY_SOURCE_DIR ${THIRD_PARTY_PATH}/any)
INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/linb_any)
ExternalProject_Add(
linb_any
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/thelink2012/any.git"
GIT_TAG "8fef1e93710a0edf8d7658999e284a1142c4c020"
PREFIX ${ANY_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE)
import sys import sys
import paddle.v2 as paddle import paddle.v2 as paddle
def seqToseq_net(source_dict_dim, target_dict_dim): def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
### Network Architecture ### Network Architecture
word_vector_dim = 512 # dimension of word vector word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network encoder_size = 512 # dimension of hidden unit in GRU Encoder network
beam_size = 3
max_length = 250
#### Encoder #### Encoder
src_word_id = paddle.layer.data( src_word_id = paddle.layer.data(
name='source_language_word', name='source_language_word',
...@@ -67,6 +71,7 @@ def seqToseq_net(source_dict_dim, target_dict_dim): ...@@ -67,6 +71,7 @@ def seqToseq_net(source_dict_dim, target_dict_dim):
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True) group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2] group_inputs = [group_input1, group_input2]
if not is_generating:
trg_embedding = paddle.layer.embedding( trg_embedding = paddle.layer.embedding(
input=paddle.layer.data( input=paddle.layer.data(
name='target_language_word', name='target_language_word',
...@@ -91,6 +96,32 @@ def seqToseq_net(source_dict_dim, target_dict_dim): ...@@ -91,6 +96,32 @@ def seqToseq_net(source_dict_dim, target_dict_dim):
cost = paddle.layer.classification_cost(input=decoder, label=lbl) cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost return cost
else:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the last generated word is automatically gotten by
# GeneratedInputs, which is initialized by a start mark, such as <s>,
# and must be included in generation.
trg_embedding = paddle.layer.GeneratedInputV2(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
beam_gen = paddle.layer.beam_search(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length)
return beam_gen
def main(): def main():
......
...@@ -16,66 +16,6 @@ limitations under the License. */ ...@@ -16,66 +16,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
template <>
size_t FuncConfig::get<size_t>(const std::string& key) const {
auto it = valueMap_.find(key);
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
return it->second.s;
}
template <>
real FuncConfig::get<real>(const std::string& key) const {
auto it = valueMap_.find(key);
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
return it->second.r;
}
template <>
int FuncConfig::get<int>(const std::string& key) const {
auto it = valueMap_.find(key);
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
return it->second.i;
}
template <>
bool FuncConfig::get<bool>(const std::string& key) const {
auto it = valueMap_.find(key);
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
return it->second.b;
}
template <>
FuncConfig& FuncConfig::set<size_t>(const std::string& key, size_t v) {
CHECK_EQ(static_cast<int>(valueMap_.count(key)), 0) << "Duplicated value: "
<< key;
valueMap_[key].s = v;
return *this;
}
template <>
FuncConfig& FuncConfig::set<real>(const std::string& key, real v) {
CHECK_EQ(static_cast<int>(valueMap_.count(key)), 0) << "Duplicated value: "
<< key;
valueMap_[key].r = v;
return *this;
}
template <>
FuncConfig& FuncConfig::set<int>(const std::string& key, int v) {
CHECK_EQ(static_cast<int>(valueMap_.count(key)), 0) << "Duplicated value: "
<< key;
valueMap_[key].i = v;
return *this;
}
template <>
FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) {
CHECK_EQ(static_cast<int>(valueMap_.count(key)), 0) << "Duplicated value: "
<< key;
valueMap_[key].b = v;
return *this;
}
void BufferArgs::addArg(const Matrix& arg, void BufferArgs::addArg(const Matrix& arg,
const TensorShape& shape, const TensorShape& shape,
ArgType argType) { ArgType argType) {
......
...@@ -18,32 +18,49 @@ limitations under the License. */ ...@@ -18,32 +18,49 @@ limitations under the License. */
#include <vector> #include <vector>
#include "BufferArg.h" #include "BufferArg.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/utils/Any.h"
#include "paddle/utils/ClassRegistrar.h" #include "paddle/utils/ClassRegistrar.h"
#include "paddle/utils/Error.h"
namespace paddle { namespace paddle {
/** /**
* Function Configuration. * Function Configuration.
* The argument type of Function::init. * The argument type of Function::init.
* Follow-up will consider moving this data structure to Proto inside.
*/ */
class FuncConfig { class FuncConfig {
public: public:
union value {
size_t s;
real r;
int i;
bool b;
};
template <typename T> template <typename T>
T get(const std::string& key) const; T get(const std::string& key, Error* err = nullptr) const {
try {
return any_cast<T>(valueMap_.at(key));
} catch (std::exception& e) { // could be cast or out of range exception.
if (err) {
*err = Error(e.what());
} else {
LOG(FATAL) << "Cannot get key " << key << "with error " << e.what();
}
return T();
}
}
template <typename T> template <typename T>
FuncConfig& set(const std::string& key, T v); FuncConfig& set(const std::string& key, T v, Error* err = nullptr) {
auto it = valueMap_.find(key);
if (it != valueMap_.end()) { // already contains key.
if (err) {
*err = Error("Key %s is already set in FuncConfig", key.c_str());
} else {
LOG(FATAL) << "Key " << key << " is already set in FuncConfig.";
}
return *this;
}
valueMap_[key] = any(v);
return *this;
}
protected: protected:
std::map<std::string, value> valueMap_; mutable std::unordered_map<std::string, any> valueMap_;
}; };
/** /**
......
...@@ -25,9 +25,9 @@ void Pad<DEVICE_TYPE_CPU>(real* outputs, ...@@ -25,9 +25,9 @@ void Pad<DEVICE_TYPE_CPU>(real* outputs,
const int inH, const int inH,
const int inW, const int inW,
const PadConf& pad) { const PadConf& pad) {
int cstart = pad.channelStart, cend = pad.channelEnd; int cstart = pad.channel[0], cend = pad.channel[1];
int hstart = pad.heightStart, hend = pad.heightEnd; int hstart = pad.height[0], hend = pad.height[1];
int wstart = pad.widthStart, wend = pad.widthEnd; int wstart = pad.width[0], wend = pad.width[1];
int outC = inC + cstart + cend; int outC = inC + cstart + cend;
int outH = inH + hstart + hend; int outH = inH + hstart + hend;
int outW = inW + wstart + wend; int outW = inW + wstart + wend;
...@@ -51,9 +51,9 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad, ...@@ -51,9 +51,9 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad,
const int inH, const int inH,
const int inW, const int inW,
const PadConf& pad) { const PadConf& pad) {
int cstart = pad.channelStart, cend = pad.channelEnd; int cstart = pad.channel[0], cend = pad.channel[1];
int hstart = pad.heightStart, hend = pad.heightEnd; int hstart = pad.height[0], hend = pad.height[1];
int wstart = pad.widthStart, wend = pad.widthEnd; int wstart = pad.width[0], wend = pad.width[1];
int outC = inC + cstart + cend; int outC = inC + cstart + cend;
int outH = inH + hstart + hend; int outH = inH + hstart + hend;
int outW = inW + wstart + wend; int outW = inW + wstart + wend;
...@@ -71,6 +71,12 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad, ...@@ -71,6 +71,12 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad,
} }
} }
static inline PadConf castToPadConf(const FuncConfig& conf) {
return {conf.get<std::vector<uint32_t>>("channel"),
conf.get<std::vector<uint32_t>>("height"),
conf.get<std::vector<uint32_t>>("width")};
}
/** /**
* \brief Padding zeros to input according to the specify dimension. * \brief Padding zeros to input according to the specify dimension.
* The struct pad_ contains the padding size in each dimension. * The struct pad_ contains the padding size in each dimension.
...@@ -127,14 +133,7 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad, ...@@ -127,14 +133,7 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad,
template <DeviceType Device> template <DeviceType Device>
class PadFunc : public FunctionBase { class PadFunc : public FunctionBase {
public: public:
void init(const FuncConfig& config) override { void init(const FuncConfig& config) override { pad_ = castToPadConf(config); }
pad_.channelStart = config.get<int>("cstart");
pad_.channelEnd = config.get<int>("cend");
pad_.heightStart = config.get<int>("hstart");
pad_.heightEnd = config.get<int>("hend");
pad_.widthStart = config.get<int>("wstart");
pad_.widthEnd = config.get<int>("wend");
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(1UL, inputs.size()); CHECK_EQ(1UL, inputs.size());
...@@ -175,14 +174,7 @@ private: ...@@ -175,14 +174,7 @@ private:
template <DeviceType Device> template <DeviceType Device>
class PadGradFunc : public FunctionBase { class PadGradFunc : public FunctionBase {
public: public:
void init(const FuncConfig& config) override { void init(const FuncConfig& config) override { pad_ = castToPadConf(config); }
pad_.channelStart = config.get<int>("cstart");
pad_.channelEnd = config.get<int>("cend");
pad_.heightStart = config.get<int>("hstart");
pad_.heightEnd = config.get<int>("hend");
pad_.widthStart = config.get<int>("wstart");
pad_.widthEnd = config.get<int>("wend");
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(1UL, inputs.size()); CHECK_EQ(1UL, inputs.size());
......
...@@ -19,18 +19,12 @@ limitations under the License. */ ...@@ -19,18 +19,12 @@ limitations under the License. */
namespace paddle { namespace paddle {
struct PadConf { struct PadConf {
/// how many values to add before the data along channel dimension. /// how many values to add before/after the data along channel dimension.
int channelStart; std::vector<uint32_t> channel;
/// how many values to add after the data along channel dimension. /// how many values to add before/after the data along height dimension.
int channelEnd; std::vector<uint32_t> height;
/// how many values to add before the data along height dimension. /// how many values to add before/after the data along width dimension.
int heightStart; std::vector<uint32_t> width;
/// how many values to add after the data along height dimension.
int heightEnd;
/// how many values to add before the data along width dimension.
int widthStart;
/// how many values to add after the data along width dimension.
int widthEnd;
}; };
/** /**
......
...@@ -36,12 +36,9 @@ bool PadLayer::init(const LayerMap& layerMap, ...@@ -36,12 +36,9 @@ bool PadLayer::init(const LayerMap& layerMap,
CHECK_EQ(2, pad_conf.pad_c_size()); CHECK_EQ(2, pad_conf.pad_c_size());
CHECK_EQ(2, pad_conf.pad_h_size()); CHECK_EQ(2, pad_conf.pad_h_size());
CHECK_EQ(2, pad_conf.pad_w_size()); CHECK_EQ(2, pad_conf.pad_w_size());
padc_.push_back(pad_conf.pad_c(0)); padc_ = {pad_conf.pad_c(0), pad_conf.pad_c(1)};
padc_.push_back(pad_conf.pad_c(1)); padh_ = {pad_conf.pad_h(0), pad_conf.pad_h(1)};
padh_.push_back(pad_conf.pad_h(0)); padw_ = {pad_conf.pad_w(0), pad_conf.pad_w(1)};
padh_.push_back(pad_conf.pad_h(1));
padw_.push_back(pad_conf.pad_w(0));
padw_.push_back(pad_conf.pad_w(1));
outDims_ = TensorShape(4); outDims_ = TensorShape(4);
setOutDims(0); setOutDims(0);
...@@ -49,21 +46,15 @@ bool PadLayer::init(const LayerMap& layerMap, ...@@ -49,21 +46,15 @@ bool PadLayer::init(const LayerMap& layerMap,
createFunction(forward_, createFunction(forward_,
"Pad", "Pad",
FuncConfig() FuncConfig()
.set("cstart", padc_[0]) .set("channel", padc_)
.set("cend", padc_[1]) .set("height", padh_)
.set("hstart", padh_[0]) .set("width", padw_));
.set("hend", padh_[1])
.set("wstart", padw_[0])
.set("wend", padw_[1]));
createFunction(backward_, createFunction(backward_,
"PadGrad", "PadGrad",
FuncConfig() FuncConfig()
.set("cstart", padc_[0]) .set("channel", padc_)
.set("cend", padc_[1]) .set("height", padh_)
.set("hstart", padh_[0]) .set("width", padw_));
.set("hend", padh_[1])
.set("wstart", padw_[0])
.set("wend", padw_[1]));
return true; return true;
} }
......
...@@ -38,9 +38,9 @@ protected: ...@@ -38,9 +38,9 @@ protected:
void setOutDims(const size_t batchSize); void setOutDims(const size_t batchSize);
void setTensorDim(const size_t batchSize); void setTensorDim(const size_t batchSize);
std::vector<int> padc_; std::vector<uint32_t> padc_;
std::vector<int> padh_; std::vector<uint32_t> padh_;
std::vector<int> padw_; std::vector<uint32_t> padw_;
TensorShape inDims_; TensorShape inDims_;
TensorShape outDims_; TensorShape outDims_;
}; };
......
...@@ -160,10 +160,19 @@ class SparseFloatScanner(SparseBinaryScanner): ...@@ -160,10 +160,19 @@ class SparseFloatScanner(SparseBinaryScanner):
class IndexScanner(IScanner): class IndexScanner(IScanner):
def __init__(self, input_type, pos): def __init__(self, input_type, pos):
IScanner.__init__(self, input_type, pos) IScanner.__init__(self, input_type, pos)
self.__ids__ = [] self.__ids__ = None
self.__idx__ = 0
def pre_scan(self, dat):
self.__idx__ += 1
def finish_pre_scan(self, argument):
self.__ids__ = [0] * self.__idx__
self.__idx__ = 0
def scan(self, dat): def scan(self, dat):
self.__ids__.append(dat) self.__ids__[self.__idx__] = dat
self.__idx__ += 1
def finish_scan(self, argument): def finish_scan(self, argument):
ids = swig_paddle.IVector.create(self.__ids__, self.data_in_gpu) ids = swig_paddle.IVector.create(self.__ids__, self.data_in_gpu)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#if __cplusplus > 201402L
#include <any>
namespace paddle {
// using std::any for C++ 17
using std::any;
using std::any_cast;
using std::bad_any_cast;
} // namespace paddle
#else
#include <any.hpp>
namespace paddle {
// use linb::any for C++ 11
using linb::any;
using linb::any_cast;
using linb::bad_any_cast;
} // namespace paddle
#endif
...@@ -18,7 +18,7 @@ import inspect ...@@ -18,7 +18,7 @@ import inspect
from paddle.trainer.config_parser import * from paddle.trainer.config_parser import *
from .activations import LinearActivation, SigmoidActivation, TanhActivation, \ from .activations import LinearActivation, SigmoidActivation, TanhActivation, \
ReluActivation, IdentityActivation, SoftmaxActivation ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation
from .evaluators import * from .evaluators import *
from .poolings import MaxPooling, AvgPooling, BasePoolingType from .poolings import MaxPooling, AvgPooling, BasePoolingType
from .attrs import * from .attrs import *
...@@ -2277,7 +2277,8 @@ def img_pool_layer(input, ...@@ -2277,7 +2277,8 @@ def img_pool_layer(input,
pool_type.name = 'avg' pool_type.name = 'avg'
type_name = pool_type.name + '-projection' \ type_name = pool_type.name + '-projection' \
if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \ if (
isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
else pool_type.name else pool_type.name
pool_size_y = pool_size if pool_size_y is None else pool_size_y pool_size_y = pool_size if pool_size_y is None else pool_size_y
...@@ -4831,12 +4832,14 @@ def crf_decoding_layer(input, ...@@ -4831,12 +4832,14 @@ def crf_decoding_layer(input,
return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=1) return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=1)
@wrap_act_default(act=SigmoidActivation())
@wrap_bias_attr_default(has_bias=True) @wrap_bias_attr_default(has_bias=True)
@wrap_name_default() @wrap_name_default()
@layer_support() @layer_support()
def nce_layer(input, def nce_layer(input,
label, label,
num_classes, num_classes,
act=None,
weight=None, weight=None,
num_neg_samples=10, num_neg_samples=10,
neg_distribution=None, neg_distribution=None,
...@@ -4865,6 +4868,8 @@ def nce_layer(input, ...@@ -4865,6 +4868,8 @@ def nce_layer(input,
:type weight: LayerOutput :type weight: LayerOutput
:param num_classes: number of classes. :param num_classes: number of classes.
:type num_classes: int :type num_classes: int
:param act: Activation, default is Sigmoid.
:type act: BaseActivation
:param num_neg_samples: number of negative samples. Default is 10. :param num_neg_samples: number of negative samples. Default is 10.
:type num_neg_samples: int :type num_neg_samples: int
:param neg_distribution: The distribution for generating the random negative labels. :param neg_distribution: The distribution for generating the random negative labels.
...@@ -4887,6 +4892,8 @@ def nce_layer(input, ...@@ -4887,6 +4892,8 @@ def nce_layer(input,
assert isinstance(neg_distribution, collections.Sequence) assert isinstance(neg_distribution, collections.Sequence)
assert len(neg_distribution) == num_classes assert len(neg_distribution) == num_classes
assert sum(neg_distribution) == 1 assert sum(neg_distribution) == 1
if not isinstance(act, BaseActivation):
raise TypeError()
ipts_for_layer = [] ipts_for_layer = []
parents = [] parents = []
...@@ -4908,12 +4915,17 @@ def nce_layer(input, ...@@ -4908,12 +4915,17 @@ def nce_layer(input,
type=LayerType.NCE_LAYER, type=LayerType.NCE_LAYER,
num_classes=num_classes, num_classes=num_classes,
neg_sampling_dist=neg_distribution, neg_sampling_dist=neg_distribution,
active_type=act.name,
num_neg_samples=num_neg_samples, num_neg_samples=num_neg_samples,
inputs=ipts_for_layer, inputs=ipts_for_layer,
bias=ParamAttr.to_bias(bias_attr), bias=ParamAttr.to_bias(bias_attr),
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput( return LayerOutput(
name, LayerType.NCE_LAYER, parents=parents, size=l.config.size) name,
LayerType.NCE_LAYER,
parents=parents,
size=l.config.size,
activation=act)
""" """
......
...@@ -67,7 +67,16 @@ class Layer(object): ...@@ -67,7 +67,16 @@ class Layer(object):
self.name = name self.name = name
self.__context__ = {} self.__context__ = {}
self.__parent_layers__ = parent_layers self.__parent_layers__ = parent_layers
self.__children_layers__ = [] # used for evaluator. # some layer may have some extra parent layer
self.__extra_parent__ = []
# used for evaluator.
self.__children_layers__ = []
def extra_parent(self):
return self.__extra_parent__
def append_extra_parent(self, parent):
self.__extra_parent__.append(parent)
def append_child(self, layer, parent_names): def append_child(self, layer, parent_names):
self.__children_layers__.append((layer, parent_names)) self.__children_layers__.append((layer, parent_names))
...@@ -78,14 +87,20 @@ class Layer(object): ...@@ -78,14 +87,20 @@ class Layer(object):
""" """
self.__context__ = context self.__context__ = context
# short cut if myself is parsed before. # STEP: short cut if this layer is parsed before.
if self.context_name() in context: if self.context_name() in context:
if self.use_context_name(): if self.use_context_name():
return context[self.context_name()] return context[self.context_name()]
else: else:
return context[self.name] return context[self.name]
# parse parent before myself # STEP: parse extra_parent that is not used by this layer but must
# be parsed before this layer.
for p in self.__extra_parent__:
p.to_proto(context=context)
# STEP: parse parent that is used by this layer, get the result and
# insert into kwargs of the next layer's to_proto_impl method.
kwargs = dict() kwargs = dict()
for layer_name in self.__parent_layers__: for layer_name in self.__parent_layers__:
if not isinstance(self.__parent_layers__[layer_name], if not isinstance(self.__parent_layers__[layer_name],
...@@ -97,14 +112,13 @@ class Layer(object): ...@@ -97,14 +112,13 @@ class Layer(object):
self.__parent_layers__[layer_name]) self.__parent_layers__[layer_name])
kwargs[layer_name] = v1_layer kwargs[layer_name] = v1_layer
# parse myself. # STEP: parse myself and add myself into context.
ret_val = self.to_proto_impl(**kwargs) ret_val = self.to_proto_impl(**kwargs)
if self.context_name() is not None \
if self.context_name() is not None and \ and self.context_name() not in context:
self.context_name() not in context:
context[self.context_name()] = ret_val context[self.context_name()] = ret_val
# parse children. # STEP: parse children that should be pased after this layer.
for layer, pnames in self.__children_layers__: for layer, pnames in self.__children_layers__:
drop = False drop = False
...@@ -117,6 +131,7 @@ class Layer(object): ...@@ -117,6 +131,7 @@ class Layer(object):
continue continue
layer.to_proto(context=context) layer.to_proto(context=context)
# STEP: return v1 layer result
if self.context_name() is None: if self.context_name() is None:
return ret_val return ret_val
elif self.use_context_name(): elif self.use_context_name():
......
...@@ -66,13 +66,6 @@ def download(url, module_name, md5sum): ...@@ -66,13 +66,6 @@ def download(url, module_name, md5sum):
return filename return filename
def dict_add(a_dict, ele):
if ele in a_dict:
a_dict[ele] += 1
else:
a_dict[ele] = 1
def fetch_all(): def fetch_all():
for module_name in filter(lambda x: not x.startswith("__"), for module_name in filter(lambda x: not x.startswith("__"),
dir(paddle.v2.dataset)): dir(paddle.v2.dataset)):
......
...@@ -18,6 +18,7 @@ TODO(yuyang18): Complete comments. ...@@ -18,6 +18,7 @@ TODO(yuyang18): Complete comments.
""" """
import paddle.v2.dataset.common import paddle.v2.dataset.common
import collections
import tarfile import tarfile
import Queue import Queue
import re import re
...@@ -48,10 +49,10 @@ def tokenize(pattern): ...@@ -48,10 +49,10 @@ def tokenize(pattern):
def build_dict(pattern, cutoff): def build_dict(pattern, cutoff):
word_freq = {} word_freq = collections.defaultdict(int)
for doc in tokenize(pattern): for doc in tokenize(pattern):
for word in doc: for word in doc:
paddle.v2.dataset.common.dict_add(word_freq, word) word_freq[word] += 1
# Not sure if we should prune less-frequent words here. # Not sure if we should prune less-frequent words here.
word_freq = filter(lambda x: x[1] > cutoff, word_freq.items()) word_freq = filter(lambda x: x[1] > cutoff, word_freq.items())
......
...@@ -17,6 +17,7 @@ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/ ...@@ -17,6 +17,7 @@ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
Complete comments. Complete comments.
""" """
import paddle.v2.dataset.common import paddle.v2.dataset.common
import collections
import tarfile import tarfile
__all__ = ['train', 'test', 'build_dict'] __all__ = ['train', 'test', 'build_dict']
...@@ -26,15 +27,14 @@ MD5 = '30177ea32e27c525793142b6bf2c8e2d' ...@@ -26,15 +27,14 @@ MD5 = '30177ea32e27c525793142b6bf2c8e2d'
def word_count(f, word_freq=None): def word_count(f, word_freq=None):
add = paddle.v2.dataset.common.dict_add if word_freq is None:
if word_freq == None: word_freq = collections.defaultdict(int)
word_freq = {}
for l in f: for l in f:
for w in l.strip().split(): for w in l.strip().split():
add(word_freq, w) word_freq[w] += 1
add(word_freq, '<s>') word_freq['<s>'] += 1
add(word_freq, '<e>') word_freq['<e>'] += 1
return word_freq return word_freq
......
...@@ -33,40 +33,52 @@ The primary usage shows below. ...@@ -33,40 +33,52 @@ The primary usage shows below.
import collections import collections
import inspect import inspect
from config_base import Layer, __convert_to_v2__ import re
import paddle.trainer_config_helpers as conf_helps import paddle.trainer_config_helpers as conf_helps
from paddle.trainer.config_parser import \
RecurrentLayerGroupWithoutOutLinksBegin, RecurrentLayerGroupSetOutLink, \
RecurrentLayerGroupEnd, model_type
from paddle.trainer_config_helpers.config_parser_utils import \ from paddle.trainer_config_helpers.config_parser_utils import \
parse_network_config as __parse__ parse_network_config as __parse__
from paddle.trainer_config_helpers.default_decorators import wrap_act_default from paddle.trainer_config_helpers.default_decorators import wrap_act_default
from paddle.trainer_config_helpers.default_decorators import \ from paddle.trainer_config_helpers.default_decorators import \
wrap_bias_attr_default wrap_bias_attr_default
from paddle.trainer_config_helpers.default_decorators import wrap_name_default from paddle.trainer_config_helpers.default_decorators import wrap_name_default
from paddle.trainer_config_helpers.layers import RecurrentLayerGroupSetGenerator, Generator
from paddle.trainer_config_helpers.layers import layer_support from paddle.trainer_config_helpers.layers import layer_support
from paddle.trainer.config_parser import \
RecurrentLayerGroupWithoutOutLinksBegin, RecurrentLayerGroupSetOutLink, \
RecurrentLayerGroupEnd, model_type
import activation import activation
import re import attr
import data_type import data_type
from config_base import Layer, __convert_to_v2__
__all__ = ['parse_network', 'data'] __all__ = ['parse_network', 'data']
def parse_network(*outputs): def parse_network(output_layers, extra_layers=None):
""" """
Parse all output layers and then generate a ModelConfig object. Parse all layers in the neural network graph and
then generate a ModelConfig object.
.. note:: .. note::
This function is used internally in paddle.v2 module. User should never This function is used internally in paddle.v2 module. User should never
invoke this method. invoke this method.
:param outputs: Output layers. :param output_layers: Output layers.
:type outputs: Layer :type output_layers: Layer
:param extra_layers: Some layers in the neural network graph are not in the
path of output_layers.
:type extra_layers: Layer
:return: A ModelConfig object instance. :return: A ModelConfig object instance.
:rtype: ModelConfig :rtype: ModelConfig
""" """
if not isinstance(output_layers, collections.Sequence):
output_layers = [output_layers]
if extra_layers is not None and not isinstance(extra_layers,
collections.Sequence):
extra_layers = [extra_layers]
def __real_func__(): def __real_func__():
""" """
...@@ -74,7 +86,11 @@ def parse_network(*outputs): ...@@ -74,7 +86,11 @@ def parse_network(*outputs):
the plain old paddle configuration function. the plain old paddle configuration function.
""" """
context = dict() context = dict()
real_output = [each.to_proto(context=context) for each in outputs] real_output = [each.to_proto(context=context) for each in output_layers]
if extra_layers is not None:
extra_output = [
each.to_proto(context=context) for each in extra_layers
]
conf_helps.outputs(real_output) conf_helps.outputs(real_output)
return __parse__(__real_func__) return __parse__(__real_func__)
...@@ -119,54 +135,23 @@ class DataLayerV2(Layer): ...@@ -119,54 +135,23 @@ class DataLayerV2(Layer):
return doc return doc
class WithExtraParent(Layer): class MemoryV2(Layer):
def extra_parent(self): def __init__(self, name, extra_input=None, **kwargs):
return self.__extra_parent__
def __init__(self, name=None, parent_layers=None):
self.__extra_parent__ = []
super(WithExtraParent, self).__init__(
name=name, parent_layers=parent_layers)
def append_extra_parent(self, parent):
self.__extra_parent__.append(parent)
def to_proto(self, context):
""" """
function to set proto attribute Init memory object, if memory is inited inside recurrent_group step
function, it may depend on a boot_layer that should be initialized
outside recurrent_group, so we:
1. add RecurrentLayerInput to extra_parent of self.
2. add boot_layer to the extra_parent of RecurrentLayerInput.
:param extra_input: list of RecurrentLayerInput
:type extra_input: [RecurrentLayerInput]
""" """
kwargs = dict()
for p in self.__extra_parent__:
p.to_proto(context=context)
for layer_name in self.__parent_layers__:
if not isinstance(self.__parent_layers__[layer_name],
collections.Sequence):
v1_layer = self.__parent_layers__[layer_name].to_proto(
context=context)
else:
v1_layer = map(lambda x: x.to_proto(context=context),
self.__parent_layers__[layer_name])
kwargs[layer_name] = v1_layer
if self.context_name() is None:
return self.to_proto_impl(context=context, **kwargs)
elif self.context_name() not in context:
context[self.context_name()] = self.to_proto_impl(
context=context, **kwargs)
if self.use_context_name():
return context[self.context_name()]
else:
return context[self.name]
class MemoryV2(WithExtraParent):
def __init__(self, name, **kwargs):
self.name = name self.name = name
super(MemoryV2, self).__init__(name=name, parent_layers=dict()) super(MemoryV2, self).__init__(name=name, parent_layers=dict())
self.__kwargs__ = kwargs self.__kwargs__ = kwargs
self.__boot_layer_name__ = None self.__boot_layer_name__ = None
if 'boot_layer' in kwargs: if 'boot_layer' in kwargs:
begin_of_current_rnn = [] begin_of_current_rnn = []
# TODO(yuyang18): Fix inspect, it could be wrong when user invoke a # TODO(yuyang18): Fix inspect, it could be wrong when user invoke a
...@@ -189,11 +174,10 @@ class MemoryV2(WithExtraParent): ...@@ -189,11 +174,10 @@ class MemoryV2(WithExtraParent):
assert begin_of_current_rnn is not None assert begin_of_current_rnn is not None
for extra in begin_of_current_rnn: for extra in begin_of_current_rnn:
self.append_extra_parent(extra) self.append_extra_parent(extra)
assert isinstance(extra, WithExtraParent)
extra.append_extra_parent(kwargs['boot_layer']) extra.append_extra_parent(kwargs['boot_layer'])
self.__boot_layer_name__ = kwargs['boot_layer'].name self.__boot_layer_name__ = kwargs['boot_layer'].name
def to_proto_impl(self, context, **kwargs): def to_proto_impl(self, **kwargs):
args = dict() args = dict()
for each in kwargs: for each in kwargs:
args[each] = kwargs[each] args[each] = kwargs[each]
...@@ -201,7 +185,7 @@ class MemoryV2(WithExtraParent): ...@@ -201,7 +185,7 @@ class MemoryV2(WithExtraParent):
args[each] = self.__kwargs__[each] args[each] = self.__kwargs__[each]
if self.__boot_layer_name__ is not None: if self.__boot_layer_name__ is not None:
args['boot_layer'] = context[self.__boot_layer_name__] args['boot_layer'] = self.__context__[self.__boot_layer_name__]
size = args.get('size', None) size = args.get('size', None)
if size is not None: if size is not None:
...@@ -223,22 +207,6 @@ class MemoryV2(WithExtraParent): ...@@ -223,22 +207,6 @@ class MemoryV2(WithExtraParent):
return True return True
class LayerOutputV2(Layer):
"""
LayerOutputV2 is used to store the result of LayerOutput in v1 api.
It will not store it's parents because layer_output has been parsed already.
"""
def __init__(self, layer_output):
assert isinstance(layer_output, conf_helps.LayerOutput)
self.layer_output = layer_output
super(LayerOutputV2, self).__init__(
name=layer_output.name, parent_layers=dict())
def to_proto_impl(self):
return self.layer_output
class StaticInputV2(object): class StaticInputV2(object):
def __init__(self, input, is_seq=False, size=None): def __init__(self, input, is_seq=False, size=None):
assert isinstance(input, LayerV2) assert isinstance(input, LayerV2)
...@@ -250,6 +218,66 @@ class StaticInputV2(object): ...@@ -250,6 +218,66 @@ class StaticInputV2(object):
# assert input.size is not None or size is not None # assert input.size is not None or size is not None
class BaseGeneratedInputV2(object):
def __init__(self):
self.bos_id = None
self.eos_id = None
def before_real_step(self):
raise NotImplementedError()
def after_real_step(self, *args):
raise NotImplementedError()
class GeneratedInputV2(BaseGeneratedInputV2):
def __init__(self, size, embedding_name, embedding_size):
super(GeneratedInputV2, self).__init__()
self.size = size
self.embedding_name = embedding_name
self.embedding_size = embedding_size
def after_real_step(self, input):
return max_id(input=input, name='__beam_search_predict__')
def before_real_step(self):
predict_id = memory(
name='__beam_search_predict__',
size=self.size,
boot_with_const_id=self.bos_id)
trg_emb = embedding(
input=predict_id,
size=self.embedding_size,
param_attr=attr.ParamAttr(name=self.embedding_name))
return trg_emb
class RecurrentLayerGroupSetGeneratorV2(Layer):
def __init__(self, eos_name, max_length, beam_size, num_results_per_sample):
self.eos_name = eos_name
self.max_length = max_length
self.beam_size = beam_size
self.num_results_per_sample = num_results_per_sample
super(RecurrentLayerGroupSetGeneratorV2, self).__init__(
name=eos_name, parent_layers={})
def to_proto_impl(self, **kwargs):
RecurrentLayerGroupSetGenerator(
Generator(
eos_layer_name=self.eos_name,
max_num_frames=self.max_length,
beam_size=self.beam_size,
num_results_per_sample=self.num_results_per_sample))
return self
def context_name(self):
return self.eos_name + ".fake"
def use_context_name(self):
return True
class MixedLayerV2(Layer): class MixedLayerV2(Layer):
""" """
This class is use to support `with` grammar. If not, the following code This class is use to support `with` grammar. If not, the following code
...@@ -328,18 +356,24 @@ def mixed(size=0, ...@@ -328,18 +356,24 @@ def mixed(size=0,
return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) return MixedLayerV2(size, input, name, act, bias_attr, layer_attr)
class RecurrentLayerInput(WithExtraParent): class RecurrentLayerInput(Layer):
def __init__(self, recurrent_name, index, parent_layers): def __init__(self, recurrent_name, index, parent_layers):
assert len(parent_layers) == 1 parents_len = len(parent_layers)
assert parents_len <= 1
if parents_len == 0:
self.__parents__ = []
else:
self.__parents__ = parent_layers.values()[0] self.__parents__ = parent_layers.values()[0]
super(RecurrentLayerInput, self).__init__(
name=self.__parents__[index].name, parent_layers=parent_layers)
self.__recurrent_name__ = recurrent_name self.__recurrent_name__ = recurrent_name
name = self.__parents__[
index].name if index >= 0 else self.context_name()
super(RecurrentLayerInput, self).__init__(
name=name, parent_layers=parent_layers)
def context_name(self): def context_name(self):
return self.__recurrent_name__ + ".begin" return self.__recurrent_name__ + ".begin"
def to_proto_impl(self, context, **kwargs): def to_proto_impl(self, **kwargs):
model_type('recurrent_nn') model_type('recurrent_nn')
RecurrentLayerGroupWithoutOutLinksBegin( RecurrentLayerGroupWithoutOutLinksBegin(
name=self.__recurrent_name__, name=self.__recurrent_name__,
...@@ -436,6 +470,11 @@ def recurrent_group(step, input, name=None): ...@@ -436,6 +470,11 @@ def recurrent_group(step, input, name=None):
for i in xrange(len(non_static_inputs)) for i in xrange(len(non_static_inputs))
] ]
extra_input = None
if len(non_static_inputs) == 0:
extra_input = RecurrentLayerInput(
recurrent_name=name, index=-1, parent_layers={})
def __real_step__(*args): def __real_step__(*args):
rnn_input = list(args) rnn_input = list(args)
static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input) static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input)
...@@ -443,6 +482,7 @@ def recurrent_group(step, input, name=None): ...@@ -443,6 +482,7 @@ def recurrent_group(step, input, name=None):
mem_name = "__%s_memory__" % static_input.input.name mem_name = "__%s_memory__" % static_input.input.name
mem = memory( mem = memory(
name=mem_name, name=mem_name,
extra_input=extra_input,
is_seq=static_input.is_seq, is_seq=static_input.is_seq,
size=static_input.input.calculate_size, size=static_input.input.calculate_size,
boot_layer=static_input.input) boot_layer=static_input.input)
...@@ -472,6 +512,73 @@ def recurrent_group(step, input, name=None): ...@@ -472,6 +512,73 @@ def recurrent_group(step, input, name=None):
return retv return retv
@wrap_name_default()
def beam_search(step,
input,
bos_id,
eos_id,
beam_size,
max_length=500,
name=None,
num_results_per_sample=None):
if num_results_per_sample is None:
num_results_per_sample = beam_size
assert num_results_per_sample <= beam_size
# logger.warning("num_results_per_sample should be less than beam_size")
if isinstance(input, StaticInputV2) or isinstance(input,
BaseGeneratedInputV2):
input = [input]
generated_input_index = -1
real_input = []
for i, each_input in enumerate(input):
assert isinstance(each_input, StaticInputV2) or isinstance(
each_input, BaseGeneratedInputV2)
if isinstance(each_input, BaseGeneratedInputV2):
assert generated_input_index == -1
generated_input_index = i
else:
real_input.append(each_input)
assert generated_input_index != -1
gipt = input[generated_input_index]
assert isinstance(gipt, BaseGeneratedInputV2)
gipt.bos_id = bos_id
gipt.eos_id = eos_id
def __real_step__(*args):
eos_name = "__%s_eos_layer__" % name
generator = RecurrentLayerGroupSetGeneratorV2(
eos_name, max_length, beam_size, num_results_per_sample)
args = list(args)
before_step_layer = gipt.before_real_step()
before_step_layer.append_child(
layer=generator, parent_names=[before_step_layer.name])
args.insert(generated_input_index, before_step_layer)
predict = gipt.after_real_step(step(*args))
eos_layer = eos(input=predict, eos_id=eos_id, name=eos_name)
predict.append_child(layer=eos_layer, parent_names=[predict.name])
return predict
# tmp = paddle.layer.recurrent_group(
# step=__real_step__,
# input=real_input,
# reverse=False,
# name=name,
# is_generating=True)
tmp = recurrent_group(step=__real_step__, input=real_input, name=name)
return tmp
__projection_names__ = filter(lambda x: x.endswith('_projection'), __projection_names__ = filter(lambda x: x.endswith('_projection'),
dir(conf_helps)) dir(conf_helps))
......
...@@ -159,7 +159,8 @@ class Parameters(object): ...@@ -159,7 +159,8 @@ class Parameters(object):
if not self.has_key(key): if not self.has_key(key):
raise ValueError("No such parameter %s" % key) raise ValueError("No such parameter %s" % key)
conf = self.__param_conf__[key] conf = self.__param_conf__[key]
return tuple(map(int, conf.dims)) dims = conf.dims if conf.dims else (1, conf.size)
return tuple(map(int, dims))
def __setitem__(self, key, value): def __setitem__(self, key, value):
""" """
......
...@@ -59,13 +59,13 @@ class ImageLayerTest(unittest.TestCase): ...@@ -59,13 +59,13 @@ class ImageLayerTest(unittest.TestCase):
num_channels=16, num_channels=16,
pool_type=pooling.Max()) pool_type=pooling.Max())
maxout = layer.maxout(input=conv, num_channels=16, groups=4) maxout = layer.maxout(input=conv, num_channels=16, groups=4)
print layer.parse_network(maxpool, spp, maxout) print layer.parse_network([maxpool, spp, maxout])
def test_norm_layer(self): def test_norm_layer(self):
norm1 = layer.img_cmrnorm(input=conv, size=5) norm1 = layer.img_cmrnorm(input=conv, size=5)
norm2 = layer.batch_norm(input=conv) norm2 = layer.batch_norm(input=conv)
norm3 = layer.sum_to_one_norm(input=conv) norm3 = layer.sum_to_one_norm(input=conv)
print layer.parse_network(norm1, norm2, norm3) print layer.parse_network([norm1, norm2, norm3])
class AggregateLayerTest(unittest.TestCase): class AggregateLayerTest(unittest.TestCase):
...@@ -78,7 +78,8 @@ class AggregateLayerTest(unittest.TestCase): ...@@ -78,7 +78,8 @@ class AggregateLayerTest(unittest.TestCase):
first_seq = layer.first_seq(input=pixel) first_seq = layer.first_seq(input=pixel)
concat = layer.concat(input=[last_seq, first_seq]) concat = layer.concat(input=[last_seq, first_seq])
seq_concat = layer.seq_concat(a=last_seq, b=first_seq) seq_concat = layer.seq_concat(a=last_seq, b=first_seq)
print layer.parse_network(pool, last_seq, first_seq, concat, seq_concat) print layer.parse_network(
[pool, last_seq, first_seq, concat, seq_concat])
class MathLayerTest(unittest.TestCase): class MathLayerTest(unittest.TestCase):
...@@ -95,8 +96,10 @@ class MathLayerTest(unittest.TestCase): ...@@ -95,8 +96,10 @@ class MathLayerTest(unittest.TestCase):
tensor = layer.tensor(a=pixel, b=pixel, size=1000) tensor = layer.tensor(a=pixel, b=pixel, size=1000)
cos_sim = layer.cos_sim(a=pixel, b=pixel) cos_sim = layer.cos_sim(a=pixel, b=pixel)
trans = layer.trans(input=tensor) trans = layer.trans(input=tensor)
print layer.parse_network(addto, linear_comb, interpolation, power, print layer.parse_network([
scaling, slope, tensor, cos_sim, trans) addto, linear_comb, interpolation, power, scaling, slope, tensor,
cos_sim, trans
])
class ReshapeLayerTest(unittest.TestCase): class ReshapeLayerTest(unittest.TestCase):
...@@ -110,7 +113,8 @@ class ReshapeLayerTest(unittest.TestCase): ...@@ -110,7 +113,8 @@ class ReshapeLayerTest(unittest.TestCase):
repeat = layer.repeat(input=pixel, num_repeats=4) repeat = layer.repeat(input=pixel, num_repeats=4)
reshape = layer.seq_reshape(input=pixel, reshape_size=4) reshape = layer.seq_reshape(input=pixel, reshape_size=4)
rotate = layer.rotate(input=pixel, height=16, width=49) rotate = layer.rotate(input=pixel, height=16, width=49)
print layer.parse_network(block_expand, expand, repeat, reshape, rotate) print layer.parse_network(
[block_expand, expand, repeat, reshape, rotate])
class RecurrentLayerTest(unittest.TestCase): class RecurrentLayerTest(unittest.TestCase):
...@@ -119,7 +123,7 @@ class RecurrentLayerTest(unittest.TestCase): ...@@ -119,7 +123,7 @@ class RecurrentLayerTest(unittest.TestCase):
recurrent = layer.recurrent(input=word) recurrent = layer.recurrent(input=word)
lstm = layer.lstmemory(input=word) lstm = layer.lstmemory(input=word)
gru = layer.grumemory(input=word) gru = layer.grumemory(input=word)
print layer.parse_network(recurrent, lstm, gru) print layer.parse_network([recurrent, lstm, gru])
class CostLayerTest(unittest.TestCase): class CostLayerTest(unittest.TestCase):
...@@ -139,10 +143,10 @@ class CostLayerTest(unittest.TestCase): ...@@ -139,10 +143,10 @@ class CostLayerTest(unittest.TestCase):
cost10 = layer.sum_cost(input=inference) cost10 = layer.sum_cost(input=inference)
cost11 = layer.huber_cost(input=score, label=label) cost11 = layer.huber_cost(input=score, label=label)
print layer.parse_network(cost1, cost2) print layer.parse_network([cost1, cost2])
print layer.parse_network(cost3, cost4) print layer.parse_network([cost3, cost4])
print layer.parse_network(cost5, cost6) print layer.parse_network([cost5, cost6])
print layer.parse_network(cost7, cost8, cost9, cost10, cost11) print layer.parse_network([cost7, cost8, cost9, cost10, cost11])
crf = layer.crf(input=inference, label=label) crf = layer.crf(input=inference, label=label)
crf_decoding = layer.crf_decoding(input=inference, size=3) crf_decoding = layer.crf_decoding(input=inference, size=3)
...@@ -151,8 +155,8 @@ class CostLayerTest(unittest.TestCase): ...@@ -151,8 +155,8 @@ class CostLayerTest(unittest.TestCase):
nce = layer.nce(input=inference, label=label, num_classes=3) nce = layer.nce(input=inference, label=label, num_classes=3)
hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3) hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3)
print layer.parse_network(crf, crf_decoding, ctc, warp_ctc, nce, print layer.parse_network(
hsigmoid) [crf, crf_decoding, ctc, warp_ctc, nce, hsigmoid])
class OtherLayerTest(unittest.TestCase): class OtherLayerTest(unittest.TestCase):
...@@ -160,7 +164,7 @@ class OtherLayerTest(unittest.TestCase): ...@@ -160,7 +164,7 @@ class OtherLayerTest(unittest.TestCase):
maxid = layer.max_id(input=inference) maxid = layer.max_id(input=inference)
sampling_id = layer.sampling_id(input=inference) sampling_id = layer.sampling_id(input=inference)
eos = layer.eos(input=maxid, eos_id=5) eos = layer.eos(input=maxid, eos_id=5)
print layer.parse_network(maxid, sampling_id, eos) print layer.parse_network([maxid, sampling_id, eos])
def test_slicing_joining_layer(self): def test_slicing_joining_layer(self):
pad = layer.pad(input=conv, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1]) pad = layer.pad(input=conv, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1])
......
...@@ -17,7 +17,6 @@ import collections ...@@ -17,7 +17,6 @@ import collections
from paddle.proto.ModelConfig_pb2 import ModelConfig from paddle.proto.ModelConfig_pb2 import ModelConfig
import layer as v2_layer import layer as v2_layer
from layer import WithExtraParent
__all__ = ['Topology'] __all__ = ['Topology']
...@@ -41,9 +40,8 @@ def __bfs_travel__(callback, *layers): ...@@ -41,9 +40,8 @@ def __bfs_travel__(callback, *layers):
__break__ = callback(each_layer) __break__ = callback(each_layer)
if __break__: if __break__:
return return
__layers__ = each_layer.__parent_layers__.values() __layers__ = each_layer.__parent_layers__.values() + \
if isinstance(each_layer, WithExtraParent): each_layer.extra_parent()
__layers__ = __layers__ + each_layer.extra_parent()
__bfs_travel__(callback, *__layers__) __bfs_travel__(callback, *__layers__)
...@@ -53,14 +51,26 @@ class Topology(object): ...@@ -53,14 +51,26 @@ class Topology(object):
and network configs. and network configs.
""" """
def __init__(self, layers): def __init__(self, layers, extra_layers=None):
def __check__(layers):
if not isinstance(layers, collections.Sequence): if not isinstance(layers, collections.Sequence):
__check_layer_type__(layers) __check_layer_type__(layers)
layers = [layers] layers = [layers]
for layer in layers: for layer in layers:
__check_layer_type__(layer) __check_layer_type__(layer)
return layers
layers = __check__(layers)
self.layers = layers self.layers = layers
self.__model_config__ = v2_layer.parse_network(*layers) if extra_layers is not None:
extra_layers = __check__(extra_layers)
self.__model_config__ = v2_layer.parse_network(
layers, extra_layers=extra_layers)
if extra_layers is not None:
self.layers.extend(extra_layers)
assert isinstance(self.__model_config__, ModelConfig) assert isinstance(self.__model_config__, ModelConfig)
def proto(self): def proto(self):
......
...@@ -37,9 +37,12 @@ class SGD(object): ...@@ -37,9 +37,12 @@ class SGD(object):
:type cost: paddle.v2.config_base.Layer :type cost: paddle.v2.config_base.Layer
:param parameters: The parameters dictionary. :param parameters: The parameters dictionary.
:type parameters: paddle.v2.parameters.Parameters :type parameters: paddle.v2.parameters.Parameters
:param extra_layers: Some layers in the neural network graph are not
in the path of cost layer.
:type extra_layers: paddle.v2.config_base.Layer
""" """
def __init__(self, cost, parameters, update_equation): def __init__(self, cost, parameters, update_equation, extra_layers=None):
if not isinstance(parameters, v2_parameters.Parameters): if not isinstance(parameters, v2_parameters.Parameters):
raise TypeError('parameters should be parameters') raise TypeError('parameters should be parameters')
...@@ -47,7 +50,7 @@ class SGD(object): ...@@ -47,7 +50,7 @@ class SGD(object):
if not isinstance(update_equation, v2_optimizer.Optimizer): if not isinstance(update_equation, v2_optimizer.Optimizer):
raise TypeError("update equation parameter must be " raise TypeError("update equation parameter must be "
"paddle.v2.optimizer.Optimizer") "paddle.v2.optimizer.Optimizer")
topology = Topology(cost) topology = Topology(cost, extra_layers=extra_layers)
self.__optimizer__ = update_equation self.__optimizer__ = update_equation
self.__topology__ = topology self.__topology__ = topology
self.__parameters__ = parameters self.__parameters__ = parameters
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册