提交 05a97ab5 编写于 作者: X xuwei06

Further fix the memory for Hierarchical RNN

Sequences should be sorted according to the number of subsequences they have.
上级 a9d327bd
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include "hl_matrix_apply.cuh" #include "hl_matrix_apply.cuh"
#include "hl_sequence.h" #include "hl_sequence.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "hl_device_functions.cuh"
DEFINE_MATRIX_UNARY_OP(Zero, a = 0); DEFINE_MATRIX_UNARY_OP(Zero, a = 0);
DEFINE_MATRIX_TERNARY_PARAMETER_OP(_add, TWO_PARAMETER, c = p1*a + p2*b); DEFINE_MATRIX_TERNARY_PARAMETER_OP(_add, TWO_PARAMETER, c = p1*a + p2*b);
......
...@@ -434,23 +434,25 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs, ...@@ -434,23 +434,25 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
} }
} }
seqLengthAndStart_.clear();
info_.clear(); info_.clear();
info_.resize(inFrameLines_.size()); info_.resize(inFrameLines_.size());
seqLengthAndStart_.resize(inFrameLines_.size());
seqInfos_.clear();
seqInfos_.resize(inFrameLines_.size());
{ {
AsyncGpuBlock asyncGpuBlock; AsyncGpuBlock asyncGpuBlock;
// if shareInlinkInfo, only calculate info of the first inlink // if shareInlinkInfo, only calculate info of the first inlink
// else, calculate info for each inlink // else, calculate info for each inlink
if (shareInlinkInfo) { if (shareInlinkInfo) {
input.getSeqLengthAndStart(&seqLengthAndStart_[0], &maxSequenceLength_); input.getSeqInfo(&seqInfos_[0]);
maxSequenceLength_ = seqInfos_[0][0].topLevelLength;
createInFrameInfo(0, input, passType); createInFrameInfo(0, input, passType);
} else { } else {
for (size_t i = 0; i < inFrameLines_.size(); i++) { for (size_t i = 0; i < inFrameLines_.size(); i++) {
const Argument& input1 = inFrameLines_[i].inLayer->getOutput(); const Argument& input1 = inFrameLines_[i].inLayer->getOutput();
input1.getSeqLengthAndStart(&seqLengthAndStart_[i], input1.getSeqInfo(&seqInfos_[i]);
&maxSequenceLength_); maxSequenceLength_ = seqInfos_[i][0].topLevelLength;
createInFrameInfo(i, input1, passType); createInFrameInfo(i, input1, passType);
} }
} }
...@@ -614,7 +616,7 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() { ...@@ -614,7 +616,7 @@ void RecurrentGradientMachine::removeBeamSearchStatisticsCallbacks() {
* for all realLayer of inFrameLines one time. * for all realLayer of inFrameLines one time.
*/ */
void RecurrentGradientMachine::createInFrameInfo(int inlinks_id, void RecurrentGradientMachine::createInFrameInfo(int inlinkId,
const Argument& input, const Argument& input,
PassType passType) { PassType passType) {
bool hasSubseq = input.hasSubseq(); bool hasSubseq = input.hasSubseq();
...@@ -622,66 +624,67 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id, ...@@ -622,66 +624,67 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
size_t numSequences = input.getNumSequences(); size_t numSequences = input.getNumSequences();
std::vector<int> allIds; std::vector<int> allIds;
auto& seqInfo = seqInfos_[inlinkId];
numSeqs_.clear(); numSeqs_.clear();
Info* inlink_info = &info_[inlinks_id]; Info* inlinkInfo = &info_[inlinkId];
inlink_info->idIndex.clear(); inlinkInfo->idIndex.clear();
inlink_info->idIndex.push_back(0); // first idIndex = 0 inlinkInfo->idIndex.push_back(0); // first idIndex = 0
std::vector<int> sequenceStartPositions;
const int* subSequenceStartPositions = nullptr;
if (hasSubseq) { // for sequenceScatterAgentLayer if (hasSubseq) { // for sequenceScatterAgentLayer
// numSubSequences : all sentences within all samples(batch) subSequenceStartPositions =
size_t numSubSequences = input.getNumSubSequences(); input.subSequenceStartPositions->getData(false);
std::vector<int> sequenceStartPositions; inlinkInfo->seqStartPosIndex.clear();
inlink_info->seqStartPosIndex.clear(); inlinkInfo->seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0
inlink_info->seqStartPosIndex.push_back(0); // first seqStartPosIndex = 0 }
// maxSequenceLength_: max number of sentences(subseq) in allsamples // maxSequenceLength_: max topLevelLength in allsamples
for (int i = 0; i < maxSequenceLength_; ++i) { for (int i = 0; i < maxSequenceLength_; ++i) {
if (hasSubseq) {
sequenceStartPositions.push_back(0); // first element = 0 sequenceStartPositions.push_back(0); // first element = 0
int numSeqs = 0;
for (size_t j = 0; j < numSubSequences; ++j) { // for each sentence
// seqLengthAndStart_[inlinks_id][j]:
// a 4-tuple including <subseqlen, subseqstart, seqid, subseqid>
if (std::get<3>(seqLengthAndStart_[inlinks_id][j]) == i) {
++numSeqs;
// subseqstart: the cpuSubSequenceStartPositions of this subseq
int subSeqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]);
int subSeqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]);
for (int k = subSeqStart; k < subSeqStart + subSeqLength; ++k) {
allIds.push_back(k);
}
sequenceStartPositions.push_back(sequenceStartPositions.back() +
subSeqLength);
}
}
inlink_info->idIndex.push_back(allIds.size());
inlink_info->seqStartPosIndex.push_back(sequenceStartPositions.size());
numSeqs_.push_back(numSeqs);
} }
// inFrameLine create sequenceStartPositions one time int numSeqs = 0;
CHECK_EQ(sequenceStartPositions.size(), for (size_t j = 0; j < numSequences; ++j) {
maxSequenceLength_ + numSubSequences); int seqLength = seqInfo[j].topLevelLength;
CHECK_EQ(inlink_info->seqStartPosIndex.size(), if (i >= seqLength) {
static_cast<size_t>(maxSequenceLength_ + 1)); break;
createSeqPos(sequenceStartPositions, &inlink_info->sequenceStartPositions); }
} else { // for scatterAgentLayer ++numSeqs;
for (int i = 0; i < maxSequenceLength_; ++i) { if (hasSubseq) {
int numSeqs = 0; int subSeqStart = subSequenceStartPositions[seqInfo[j].subSeqStart + i];
for (size_t j = 0; j < numSequences; ++j) { int subSeqEnd =
int seqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]); subSequenceStartPositions[seqInfo[j].subSeqStart + i + 1];
if (i >= seqLength) { for (int k = subSeqStart; k < subSeqEnd; ++k) {
break; allIds.push_back(k);
} }
++numSeqs; sequenceStartPositions.push_back(sequenceStartPositions.back() +
int seqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]); subSeqEnd - subSeqStart);
} else {
int seqStart = seqInfo[j].seqStart;
allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i) allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i)
: (seqStart + i)); : (seqStart + i));
} }
inlink_info->idIndex.push_back(allIds.size());
numSeqs_.push_back(numSeqs);
} }
inlinkInfo->idIndex.push_back(allIds.size());
numSeqs_.push_back(numSeqs);
if (hasSubseq) {
inlinkInfo->seqStartPosIndex.push_back(sequenceStartPositions.size());
}
}
if (hasSubseq) {
// inFrameLine create sequenceStartPositions one time
CHECK_EQ(sequenceStartPositions.size(),
maxSequenceLength_ + input.getNumSubSequences());
CHECK_EQ(inlinkInfo->seqStartPosIndex.size(),
static_cast<size_t>(maxSequenceLength_ + 1));
createSeqPos(sequenceStartPositions, &inlinkInfo->sequenceStartPositions);
} }
// copy and check scatterId // copy and check scatterId
copyScattedId(allIds, &inlink_info->allIds, input.getBatchSize()); copyScattedId(allIds, &inlinkInfo->allIds, input.getBatchSize());
CHECK_EQ(inlink_info->idIndex.size(), CHECK_EQ(inlinkInfo->idIndex.size(),
static_cast<size_t>(maxSequenceLength_ + 1)); static_cast<size_t>(maxSequenceLength_ + 1));
} }
...@@ -701,7 +704,7 @@ void RecurrentGradientMachine::createMemoryFrameInfo( ...@@ -701,7 +704,7 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
const int* starts = input.sequenceStartPositions->getData(false); const int* starts = input.sequenceStartPositions->getData(false);
for (size_t i = 0; i < numSequences; ++i) { for (size_t i = 0; i < numSequences; ++i) {
// memory info adopt info of inlinks[0] // memory info adopt info of inlinks[0]
int seqId = std::get<2>(seqLengthAndStart_[0][i]); int seqId = seqInfos_[0][i].seqId;
for (int k = starts[seqId]; k < starts[seqId + 1]; ++k) { for (int k = starts[seqId]; k < starts[seqId + 1]; ++k) {
allIds.push_back(k); allIds.push_back(k);
} }
...@@ -713,7 +716,7 @@ void RecurrentGradientMachine::createMemoryFrameInfo( ...@@ -713,7 +716,7 @@ void RecurrentGradientMachine::createMemoryFrameInfo(
} else { // for scatterAgentLayer } else { // for scatterAgentLayer
for (size_t i = 0; i < numSequences; ++i) { for (size_t i = 0; i < numSequences; ++i) {
allIds.push_back(std::get<2>(seqLengthAndStart_[0][i])); allIds.push_back(seqInfos_[0][i].seqId);
} }
} }
// copy and check scatterId // copy and check scatterId
......
...@@ -337,11 +337,7 @@ protected: ...@@ -337,11 +337,7 @@ protected:
// data) or has more than i subsequences (for subsequence data) // data) or has more than i subsequences (for subsequence data)
std::vector<int> numSeqs_; std::vector<int> numSeqs_;
// each inlinks has a "std::vector<std::tuple<int, int, int, int>>" denotes std::vector<std::vector<Argument::SeqInfo>> seqInfos_;
// its sequence info:
// if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
// else, tuple of (seqLength, seqStart, seqIndex, seqIndex)
std::vector<std::vector<std::tuple<int, int, int, int>>> seqLengthAndStart_;
// the id of inlink which share info with outlinks // the id of inlink which share info with outlinks
int targetInfoInlinkId_; int targetInfoInlinkId_;
......
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "Layer.h"
namespace paddle {
class PrintLayer : public Layer {
public:
explicit PrintLayer(const LayerConfig& config)
: Layer(config) {}
void forward(PassType passType);
void backward(const UpdateCallback& callback) {}
};
void PrintLayer::forward(PassType passType) {
Layer::forward(passType);
for (size_t i = 0; i != inputLayers_.size(); ++i) {
const auto& argu = getInput(i);
const std::string& name = inputLayers_[i]->getName();
if (argu.value) {
std::ostringstream os;
argu.value->print(os);
LOG(INFO) << "layer=" << name << " value matrix:\n" << os.str();
}
if (argu.ids) {
std::ostringstream os;
argu.ids->print(os, argu.ids->getSize());
LOG(INFO) << "layer=" << name << " ids vector:\n" << os.str();
}
if (auto startPos = argu.sequenceStartPositions) {
std::ostringstream os;
startPos->getVector(false)->print(os, startPos->getSize());
LOG(INFO) << "layer=" << name << " sequence pos vector:\n" << os.str();
}
if (auto subStartPos = argu.subSequenceStartPositions) {
std::ostringstream os;
subStartPos->getVector(false)->print(os, subStartPos->getSize());
LOG(INFO) << "layer=" << name << " sub-sequence pos vector:\n"
<< os.str();
}
}
}
REGISTER_LAYER(print, PrintLayer);
} // namespace paddle
...@@ -42,14 +42,16 @@ def outer_step(x): ...@@ -42,14 +42,16 @@ def outer_step(x):
inner_mem = memory(name="inner_rnn_state", inner_mem = memory(name="inner_rnn_state",
size=hidden_dim, size=hidden_dim,
boot_layer=outer_mem) boot_layer=outer_mem)
return fc_layer(input=[y, inner_mem], out = fc_layer(input=[y, inner_mem],
size=hidden_dim, size=hidden_dim,
act=TanhActivation(), act=TanhActivation(),
bias_attr=True, bias_attr=True,
name="inner_rnn_state") name="inner_rnn_state")
return out
inner_rnn_output = recurrent_group( inner_rnn_output = recurrent_group(
step=inner_step, step=inner_step,
name="inner",
input=x) input=x)
last = last_seq(input=inner_rnn_output, name="outer_rnn_state") last = last_seq(input=inner_rnn_output, name="outer_rnn_state")
...@@ -60,11 +62,10 @@ def outer_step(x): ...@@ -60,11 +62,10 @@ def outer_step(x):
return inner_rnn_output return inner_rnn_output
out = recurrent_group( out = recurrent_group(
name="outer",
step=outer_step, step=outer_step,
input=SubsequenceInput(emb)) input=SubsequenceInput(emb))
value_printer_evaluator(input=out)
rep = last_seq(input=out) rep = last_seq(input=out)
prob = fc_layer(size=label_dim, prob = fc_layer(size=label_dim,
input=rep, input=rep,
......
...@@ -35,18 +35,18 @@ emb = embedding_layer(input=data, size=word_dim) ...@@ -35,18 +35,18 @@ emb = embedding_layer(input=data, size=word_dim)
def step(y): def step(y):
mem = memory(name="rnn_state", size=hidden_dim) mem = memory(name="rnn_state", size=hidden_dim)
return fc_layer(input=[y, mem], out = fc_layer(input=[y, mem],
size=hidden_dim, size=hidden_dim,
act=TanhActivation(), act=TanhActivation(),
bias_attr=True, bias_attr=True,
name="rnn_state") name="rnn_state")
return out
out = recurrent_group( out = recurrent_group(
name="rnn",
step=step, step=step,
input=emb) input=emb)
value_printer_evaluator(input=out)
rep = last_seq(input=out) rep = last_seq(input=out)
prob = fc_layer(size=label_dim, prob = fc_layer(size=label_dim,
input=rep, input=rep,
......
...@@ -92,7 +92,7 @@ void CalCost(const string& conf, const string& dir, real* cost, ...@@ -92,7 +92,7 @@ void CalCost(const string& conf, const string& dir, real* cost,
rmDir(dir.c_str()); rmDir(dir.c_str());
} }
void test(const string& conf1, const string& conf2) { void test(const string& conf1, const string& conf2, double eps) {
int num_passes = 5; int num_passes = 5;
real* cost1 = new real[num_passes]; real* cost1 = new real[num_passes];
const string dir1 = "gserver/tests/t1"; const string dir1 = "gserver/tests/t1";
...@@ -104,8 +104,9 @@ void test(const string& conf1, const string& conf2) { ...@@ -104,8 +104,9 @@ void test(const string& conf1, const string& conf2) {
for (int i = 0; i < num_passes; i++) { for (int i = 0; i < num_passes; i++) {
LOG(INFO) << "num_passes: " << i << ", cost1=" << cost1[i] LOG(INFO) << "num_passes: " << i << ", cost1=" << cost1[i]
<< ", cost2=" << cost2[i]; << ", cost2=" << cost2[i]
ASSERT_NEAR(cost1[i], cost2[i], 1e-3); << ", diff=" << std::abs(cost1[i] - cost2[i]);
ASSERT_NEAR(cost1[i], cost2[i], eps);
} }
delete[] cost1; delete[] cost1;
delete[] cost2; delete[] cost2;
...@@ -113,12 +114,14 @@ void test(const string& conf1, const string& conf2) { ...@@ -113,12 +114,14 @@ void test(const string& conf1, const string& conf2) {
TEST(RecurrentGradientMachine, HasSubSequence) { TEST(RecurrentGradientMachine, HasSubSequence) {
test("gserver/tests/sequence_layer_group.conf", test("gserver/tests/sequence_layer_group.conf",
"gserver/tests/sequence_nest_layer_group.conf"); "gserver/tests/sequence_nest_layer_group.conf",
1e-5);
} }
TEST(RecurrentGradientMachine, rnn) { TEST(RecurrentGradientMachine, rnn) {
test("gserver/tests/sequence_rnn.conf", test("gserver/tests/sequence_rnn.conf",
"gserver/tests/sequence_nest_rnn.conf"); "gserver/tests/sequence_nest_rnn.conf",
0);
} }
......
...@@ -477,51 +477,34 @@ void Argument::splitByDataId(const std::vector<Argument>& argus, ...@@ -477,51 +477,34 @@ void Argument::splitByDataId(const std::vector<Argument>& argus,
} }
} }
void Argument::getSeqLengthAndStart( void Argument::getSeqInfo(std::vector<SeqInfo>* seqInfo) const {
std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart,
int* maxSequenceLength) const {
const int* starts = sequenceStartPositions->getData(false); const int* starts = sequenceStartPositions->getData(false);
if (hasSubseq()) { const int* subStarts = hasSubseq()
size_t numSubSequences = getNumSubSequences(); ? subSequenceStartPositions->getData(false) : nullptr;
(*seqLengthAndStart).reserve(numSubSequences); size_t numSequences = getNumSequences();
const int* subStarts = subSequenceStartPositions->getData(false); seqInfo->reserve(numSequences);
int seqIndex = 0; int subSeqEnd = 0;
int subSeqIndex = 0; for (size_t i = 0; i < numSequences; ++i) {
*maxSequenceLength = 0; SeqInfo info;
for (size_t i = 0; i < numSubSequences; ++i) { info.seqStart = starts[i];
if (subStarts[i] == starts[seqIndex]) { info.subLevelLength = starts[i + 1] - starts[i];
subSeqIndex = 0; info.seqId = i;
(*seqLengthAndStart) if (hasSubseq()) {
.push_back(std::make_tuple<int, int, int, int>( info.subSeqStart = subSeqEnd;
subStarts[i + 1] - subStarts[i], (int)subStarts[i], while (subStarts[subSeqEnd] < starts[i + 1]) {
(int)seqIndex, (int)subSeqIndex)); ++subSeqEnd;
++subSeqIndex;
++seqIndex;
} else if (subStarts[i] < starts[seqIndex]) {
(*seqLengthAndStart)
.push_back(std::make_tuple<int, int, int, int>(
subStarts[i + 1] - subStarts[i], (int)subStarts[i],
(int)seqIndex - 1, (int)subSeqIndex));
++subSeqIndex;
} }
// maxSequenceLength_ = 1 + max(subSeqIndex) in each Seq. info.topLevelLength = subSeqEnd - info.subSeqStart;
if (*maxSequenceLength < std::get<3>((*seqLengthAndStart)[i])) } else {
*maxSequenceLength = std::get<3>((*seqLengthAndStart)[i]); info.topLevelLength = info.subLevelLength;
} info.subSeqStart = 0; // not used
*maxSequenceLength += 1;
} else {
size_t numSequences = getNumSequences();
(*seqLengthAndStart).reserve(numSequences);
for (size_t i = 0; i < numSequences; ++i) {
(*seqLengthAndStart)
.push_back(std::make_tuple<int, int, int, int>(
starts[i + 1] - starts[i], (int)starts[i], (int)i, (int)i));
} }
std::sort((*seqLengthAndStart).begin(), (*seqLengthAndStart).end(), seqInfo->push_back(info);
std::greater<std::tuple<int, int, int, int>>());
*maxSequenceLength = std::get<0>((*seqLengthAndStart)[0]);
} }
std::sort(seqInfo->begin(), seqInfo->end(),
[](const SeqInfo& a, const SeqInfo& b) {
return a.topLevelLength > b.topLevelLength;
});
} }
void Argument::checkSubset() const { void Argument::checkSubset() const {
......
...@@ -253,21 +253,29 @@ struct Argument { ...@@ -253,21 +253,29 @@ struct Argument {
static void splitByDataId(const std::vector<Argument>& argus, static void splitByDataId(const std::vector<Argument>& argus,
std::vector<std::vector<Argument>>* arguGroups); std::vector<std::vector<Argument>>* arguGroups);
struct SeqInfo {
// Equal to sequence length for sequence data
// Equal to number of subsequences for subsequence data
int topLevelLength;
int seqStart;
int seqId;
// Equal to topLevelLength for sequence data
// Equal to sum of the length of subsequences for subsequence data
int subLevelLength;
// Only used for subsequence data, start position of this sequence
// is subSequenceStartPositions, i.e.
// subSequenceStartPositions[subSeqStart] == seqStart
int subSeqStart;
};
/* /*
Get Sequence Length, startPositions and max Length according to input Get SeqInfo for each sequence of this argument
1. For sequence data: Elements in *seqInfo are sorted by topLevelLength in descending order
Each tuple is (seq_length, seq_start, seq_id, seq_id) */
The tuples are sorted according to seq_length or subseq_length void getSeqInfo(std::vector<SeqInfo>* segInfo) const;
*maxSequenceLength is the maximal sequence length
2. For subsequence data:
Each tuple is (subseq_length, subseq_start, seq_id, subseq_id)
The tuples are not sorted. They are in the original order.
*maxSequenceLenth is the maximal number of subsequences in each sequence.
*/
void getSeqLengthAndStart(
std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart,
int* maxSequenceLength) const;
/* /*
Check Whether sequenceStartPositions is subset of Check Whether sequenceStartPositions is subset of
subSequenceStartPositions. subSequenceStartPositions.
......
...@@ -1408,6 +1408,14 @@ class SelectiveFCLayer(LayerBase): ...@@ -1408,6 +1408,14 @@ class SelectiveFCLayer(LayerBase):
input_index, psize, dims, sparse, format) input_index, psize, dims, sparse, format)
self.create_bias_parameter(bias, self.config.size) self.create_bias_parameter(bias, self.config.size)
@config_layer('print')
class PrintLayer(LayerBase):
def __init__(
self,
name,
inputs):
super(PrintLayer, self).__init__(name, 'print', 0, inputs)
@config_layer('data') @config_layer('data')
class DataLayer(LayerBase): class DataLayer(LayerBase):
def __init__( def __init__(
......
...@@ -52,7 +52,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", ...@@ -52,7 +52,7 @@ __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
'cross_entropy_with_selfnorm', 'cross_entropy', 'cross_entropy_with_selfnorm', 'cross_entropy',
'multi_binary_label_cross_entropy', 'multi_binary_label_cross_entropy',
'rank_cost', 'lambda_cost', 'huber_cost', 'rank_cost', 'lambda_cost', 'huber_cost',
'block_expand_layer', 'out_prod_layer', 'block_expand_layer', 'out_prod_layer', 'print_layer'
] ]
...@@ -108,6 +108,8 @@ class LayerType(object): ...@@ -108,6 +108,8 @@ class LayerType(object):
LINEAR_COMBINATION_LAYER = "convex_comb" LINEAR_COMBINATION_LAYER = "convex_comb"
BLOCK_EXPAND = "blockexpand" BLOCK_EXPAND = "blockexpand"
PRINT_LAYER = "print"
CTC_LAYER = "ctc" CTC_LAYER = "ctc"
CRF_LAYER = "crf" CRF_LAYER = "crf"
CRF_DECODING_LAYER = "crf_decoding" CRF_DECODING_LAYER = "crf_decoding"
...@@ -729,6 +731,19 @@ def fc_layer(input, size, act=None, name=None, ...@@ -729,6 +731,19 @@ def fc_layer(input, size, act=None, name=None,
return LayerOutput(name, LayerType.FC_LAYER, input, activation=act, return LayerOutput(name, LayerType.FC_LAYER, input, activation=act,
size=size) size=size)
@wrap_name_default("print")
def print_layer(input, name=None):
"""
Print the output value of input layers. This layer is useful for debugging.
"""
assert isinstance(input, list)
Layer(
name=name,
type=LayerType.PRINT_LAYER,
inputs=[l.name for l in input],
)
return LayerOutput(name, LayerType.PRINT_LAYER, input)
@wrap_name_default("seq_pooling") @wrap_name_default("seq_pooling")
@wrap_bias_attr_default(has_bias=False) @wrap_bias_attr_default(has_bias=False)
......
...@@ -34,6 +34,8 @@ out = fc_layer(input=[cos1, cos3, linear_comb, z], ...@@ -34,6 +34,8 @@ out = fc_layer(input=[cos1, cos3, linear_comb, z],
size=num_classes, size=num_classes,
act=SoftmaxActivation()) act=SoftmaxActivation())
print_layer(input=[out])
outputs(classification_cost(out, data_layer(name="label", size=num_classes))) outputs(classification_cost(out, data_layer(name="label", size=num_classes)))
# for ctc # for ctc
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册