提交 5bca34ed 编写于 作者: L liaogang

Merge remote-tracking branch 'upstream/master'

......@@ -142,12 +142,15 @@ We also project the encoder vector to :code:`decoder_size` dimensional space, ge
The decoder uses :code:`recurrent_group` to define the recurrent neural network. The step and output functions are defined in :code:`gru_decoder_with_attention`:
.. code-block:: python
group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
StaticInput(input=encoded_proj,is_seq=True)]
trg_embedding = embedding_layer(
input=data_layer(name='target_language_word',
size=target_dict_dim),
size=word_vector_dim,
param_attr=ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embedding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
......@@ -156,13 +159,7 @@ The decoder uses :code:`recurrent_group` to define the recurrent neural network.
# All sequence inputs should have the same length.
decoder = recurrent_group(name=decoder_group_name,
step=gru_decoder_with_attention,
input=[
StaticInput(input=encoded_vector,
is_seq=True),
StaticInput(input=encoded_proj,
is_seq=True),
trg_embedding
])
input=group_inputs)
The implementation of the step function is listed as below. First, it defines the **memory** of the decoder network. Then it defines attention, gated recurrent unit step function, and the output function:
......@@ -217,10 +214,8 @@ The code is listed below:
.. code-block:: python
gen_inputs = [StaticInput(input=encoded_vector,
is_seq=True),
StaticInput(input=encoded_proj,
is_seq=True), ]
group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
StaticInput(input=encoded_proj,is_seq=True)]
# In generation, decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
......@@ -231,10 +226,10 @@ The code is listed below:
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
gen_inputs.append(trg_embedding)
group_inputs.append(trg_embedding)
beam_gen = beam_search(name=decoder_group_name,
step=gru_decoder_with_attention,
input=gen_inputs,
input=group_inputs,
id_input=data_layer(name="sent_id",
size=1),
dict_file=trg_dict_path,
......
......@@ -169,6 +169,12 @@ dotmul_projection
:members: dotmul_projection
:noindex:
dotmul_operator
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: dotmul_operator
:noindex:
full_matrix_projection
----------------------
.. automodule:: paddle.trainer_config_helpers.layers
......
......@@ -85,6 +85,7 @@ bool CudnnConvLayer::init(const LayerMap &layerMap,
biasOffset_ = numFilters_ / groups_[0];
}
batchNum_ = 0;
isSelectAlgo_ = false;
return true;
}
......@@ -132,6 +133,11 @@ void CudnnConvLayer::reshape(int batchSize) {
getOutput().setFrameHeight(outputH_);
getOutput().setFrameWidth(outputW_);
// if the batchSize remains the same, set isSelectAlgo_ true.
// Otherwise, set isSelectAlgo_ false and select algo again.
isSelectAlgo_ = (batchSize == batchNum_);
batchNum_ = batchSize;
size_t maxWorkSpace = 0;
for (size_t i = 0; i < inputLayers_.size(); i++) {
CHECK_EQ(inputLayers_[i]->getOutput().value->getWidth(),
......@@ -160,6 +166,10 @@ void CudnnConvLayer::reshape(int batchSize) {
maxWorkSpace = std::max(fwdLimitBytes_[i], bwdDataLimitBytes_[i]);
maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_[i]);
VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_[i]
<< " / " << bwdDataAlgo_[i]
<< " / " << bwdFilterAlgo_[i];
}
}
......
......@@ -87,6 +87,10 @@ protected:
/// Is or not select conv algorihtm.
bool isSelectAlgo_;
/// batchNum is used to record batch size. If the batch size is changed,
/// the selection algorithm will be called.
int batchNum_;
public:
explicit CudnnConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
......
......@@ -19,7 +19,7 @@ namespace paddle {
MultinomialSampler::MultinomialSampler(const real* prob, int size)
: rand_(0.0, size) {
intervals_.reserve(size + 1);
intervals_.resize(size + 1);
double sum = 0;
for (int i = 0; i < size; ++i) {
sum += prob[i];
......@@ -50,12 +50,13 @@ MultinomialSampler::MultinomialSampler(const real* prob, int size)
int bigPos = nextBigPos(0);
auto fillIntervals = [&]() {
while (bigPos < size && smallPos < size) {
while (bigPos < size) {
while (intervals_[bigPos].thresh > 1 && smallPos < size) {
intervals_[smallPos].otherId = bigPos;
intervals_[bigPos].thresh -= 1 - intervals_[smallPos].thresh;
smallPos = nextSmallPos(smallPos + 1);
}
if (smallPos >= size) break;
bigPos = nextBigPos(bigPos + 1);
// If intervals_[bigPos].thresh < 1, it becomes a small interval
}
......
......@@ -41,39 +41,42 @@ public:
TEST(MultinomialSampler, gen) {
int numGrids = 1024 * 1024;
int size = 1024 * 4;
default_random_engine reng;
uniform_int_distribution<int> rand(1, numGrids / size * 1.8);
vector<real> prob;
int sum = 0;
for (int i = 0; i < size; ++i) {
prob.push_back(rand(reng));
sum += prob.back();
}
CHECK_LE(sum, numGrids);
prob.back() += numGrids - sum;
vector<int> counts(size);
MultinomialSamplerTester sampler(&prob[0], size);
counts.assign(size, 0);
{
double s = (double)size / (double)numGrids;
REGISTER_TIMER("MultinomialSampler");
for (double i = 0; i < numGrids; ++i) {
int ret = sampler.testGen([i, s]() { return s * i; });
if (ret < 0 || ret >= size) {
EXPECT_GE(ret, 0);
EXPECT_LT(ret, size);
break;
for (size_t iter=0; iter < 256; ++iter) {
uniform_int_distribution<int> rand(1, numGrids / size * 1.8);
vector<real> prob;
int sum = 0;
for (int i = 0; i < size; ++i) {
prob.push_back(rand(reng));
sum += prob.back();
}
CHECK_LE(sum, numGrids);
prob.back() += numGrids - sum;
vector<int> counts(size);
MultinomialSamplerTester sampler(&prob[0], size);
counts.assign(size, 0);
{
double s = (double)size / (double)numGrids;
REGISTER_TIMER("MultinomialSampler");
for (double i = 0; i < numGrids; ++i) {
int ret = sampler.testGen([i, s]() { return s * i; });
if (ret < 0 || ret >= size) {
EXPECT_GE(ret, 0);
EXPECT_LT(ret, size);
break;
}
++counts[ret];
}
++counts[ret];
}
}
for (int i = 0; i < size; ++i) {
if (prob[i] != counts[i]) {
EXPECT_EQ(prob[i], counts[i]);
LOG(INFO) << "i=" << i;
break;
for (int i = 0; i < size; ++i) {
if (prob[i] != counts[i]) {
EXPECT_EQ(prob[i], counts[i]);
LOG(INFO) << iter;
break;
}
}
}
}
......@@ -135,6 +138,7 @@ void benchmarkRandom() {
LOG(INFO) << "sum1=" << sum1;
}
int main(int argc, char** argv) {
initMain(argc, argv);
testing::InitGoogleTest(&argc, argv);
......
......@@ -636,7 +636,6 @@ class Operator(Cfg):
input_layer_names,
):
self.add_keys(locals())
self.operator_conf = OperatorConfig()
self.operator_conf.type = self.type
......@@ -686,12 +685,15 @@ class ConvOperator(Operator):
if num_filters is not None:
self.operator_conf.num_filters = num_filters
parse_conv(conv_conf, input_layer_names[0], self.operator_conf.conv_conf, True)
parse_conv(conv_conf,
MakeLayerNameInSubmodel(input_layer_names[0]),
self.operator_conf.conv_conf)
self.operator_conf.output_size = (self.operator_conf.conv_conf.output_x ** 2) * num_filters
config_assert(len(input_layer_names) == 2, "Conv is binary operator")
def calc_output_size(self, input_sizes):
return self.operator_conf.output_size
# please refer to the comments in proto/ModelConfig.proto
......@@ -2462,11 +2464,11 @@ class MixedLayer(LayerBase):
if size != 0:
self.set_layer_size(size)
else:
size = operator.calc_output_size(operator_conf.input_sizes)
if size != 0:
config_assert(size == self.config.size,
sz = operator.calc_output_size(operator_conf.input_sizes)
if sz != 0:
config_assert(sz == self.config.size,
"different inputs have different size: %s vs. %s" %
(size, self.config.size))
(sz, self.config.size))
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
input = self.inputs[input_index]
......
......@@ -286,7 +286,6 @@ def full_matrix_projection(input, size=0, param_attr=None):
size=size,
**param_attr.attr)
proj.origin = input
proj.origin.projection = "matrix"
return proj
......@@ -333,7 +332,6 @@ def table_projection(input, size=0, param_attr=None):
size=size,
**param_attr.attr)
proj.origin = input
proj.origin.projection = "table"
return proj
......@@ -377,17 +375,15 @@ def identity_projection(input, offset=None):
if offset is None:
proj = IdentityProjection(input_layer_name=input.name)
proj.origin = input
proj.origin.projection = 'identity'
else:
proj = IdentityOffsetProjection(input_layer_name=input.name,
offset=offset)
proj.origin = input
proj.origin.projection = 'identity_offset'
return proj
@wrap_param_attr_default()
def dotmul_projection(input, param_attr=None, scale=1):
def dotmul_projection(input, param_attr=None):
"""
DotMulProjection with a layer as input.
It performs element-wise multiplication with weight.
......@@ -407,30 +403,35 @@ def dotmul_projection(input, param_attr=None, scale=1):
:type input: LayerOutput
:param param_attr: Parameter config, None if use default.
:type param_attr: ParameterAttribute
:param scale: config scalar, default value is one.
:type scale: float
:return: A DotMulProjection Object.
:rtype: DotMulProjection
"""
proj = DotMulProjection(input_layer_name=input.name,
size=input.size,
**param_attr.attr)
proj.origin = input
size=input.size,
**param_attr.attr)
proj.origin = input
return proj
def dotmul_operator(x, y, scale=1):
"""
DotMulOperator takes two inputs and performs element-wise multiplication:
.. math::
out.row[i] += scale * (in1.row[i] .* in2.row[i])
out.row[i] += scale * (x.row[i] .* y.row[i])
where :math:`.*` means element-wise multiplication, and
scale is a config scalar, its default value is one.
The example usage is:
.. code-block:: python
op = dotmul_operator(x, y,
scale=1)
:param input: Input layer
:type input: LayerOutput
op = dotmul_operator(x=layer1, y=layer2, scale=0.5)
:param x: Input layer1
:type x: LayerOutput
:param y: Input layer2
:type y: LayerOutput
:param scale: config scalar, default value is one.
:type scale: float
:return: A DotMulOperator Object.
......@@ -487,7 +488,6 @@ def context_projection(input, context_len, context_start=None,
trainable_padding=trainable,
**extra_dict)
proj.origin = input
proj.origin.projection = 'context'
return proj
......@@ -2667,8 +2667,8 @@ def classification_cost(input, label, name=None,
return LayerOutput(name, LayerType.COST, parents=[input, label])
def conv_operator(input, filter_size, num_filters,
num_channel=None, stride=1, padding=0,
def conv_operator(img, filter, filter_size, num_filters,
num_channel=None, stride=1, padding=0, groups=1,
filter_size_y=None, stride_y=None, padding_y=None):
"""
Different from img_conv_layer, conv_op is an Operator, which can be used
......@@ -2680,13 +2680,16 @@ def conv_operator(input, filter_size, num_filters,
.. code-block:: python
op = conv_operator(input=[layer1, layer2],
op = conv_operator(img=input1,
filter=input2,
filter_size=3.0,
num_filters=64,
num_channels=64)
:param input: Input layer.
:type input: LayerOutput|list|tuple
:param img: input image
:type img: LayerOutput
:param filter: input filter
:type filter: LayerOutput
:param filter_size: The x dimension of a filter kernel.
:type filter_size: int
:param filter_size_y: The y dimension of a filter kernel. Since
......@@ -2708,14 +2711,13 @@ def conv_operator(input, filter_size, num_filters,
:return: A ConvOperator Object.
:rtype: ConvOperator
"""
assert isinstance(input, list) or isinstance(input, tuple)
if filter_size_y is None:
filter_size_y = filter_size
if stride_y is None:
stride_y = stride
if padding_y is None:
padding_y = padding
op = ConvOperator(input_layer_name=[x.name for x in input],
op = ConvOperator(input_layer_names=[img.name, filter.name],
num_filters = num_filter,
conv_conf=Conv(filter_size=filter_size,
padding=padding,
......@@ -2723,9 +2725,9 @@ def conv_operator(input, filter_size, num_filters,
channels=num_channel,
filter_size_y=filter_size_y,
padding_y=padding_y,
stride_y=stride_y))
op.origin = input
op.origin.operator = "conv_op"
stride_y=stride_y,
groups=groups))
op.origin = [img, filter]
return op
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册