提交 3438d650 编写于 作者: X xuwei06

Fix bugs for rnn generation

1. v2.layer.parse_network does not correctly handle the generation output.
2. GatherAgentLayer does not correctly handle generation output when batch_size > 1
3. Fix CustomStackTrace for rnn group
上级 4d6cb5d0
...@@ -241,11 +241,14 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs, ...@@ -241,11 +241,14 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs,
dataLayers_[i]->setData(inArgs[i]); dataLayers_[i]->setData(inArgs[i]);
} }
gLayerStackTrace.set_stage(true);
{ {
for (auto& layer : layers_) { for (auto& layer : layers_) {
REGISTER_TIMER_INFO("ForwardTimer", layer->getName().c_str()); REGISTER_TIMER_INFO("ForwardTimer", layer->getName().c_str());
gLayerStackTrace.push(layer->getName()); gLayerStackTrace.push(layer->getName());
layer->forward(passType); layer->forward(passType);
gLayerStackTrace.pop(layer->getName());
} }
} }
...@@ -254,9 +257,6 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs, ...@@ -254,9 +257,6 @@ void NeuralNetwork::forward(const std::vector<Argument>& inArgs,
for (auto& layer : outputLayers_) { for (auto& layer : outputLayers_) {
outArgs->push_back(layer->getOutput()); outArgs->push_back(layer->getOutput());
} }
if (passType == PASS_TEST) {
gLayerStackTrace.clear();
}
} }
void NeuralNetwork::resetState() { void NeuralNetwork::resetState() {
...@@ -283,9 +283,10 @@ void NeuralNetwork::getState(MachineState& machineState) { ...@@ -283,9 +283,10 @@ void NeuralNetwork::getState(MachineState& machineState) {
} }
void NeuralNetwork::backward(const UpdateCallback& callback) { void NeuralNetwork::backward(const UpdateCallback& callback) {
gLayerStackTrace.pop(""); // tell layer trace is during backward. gLayerStackTrace.set_stage(false);
FOR_EACH_R(layer, layers_) { FOR_EACH_R(layer, layers_) {
REGISTER_TIMER_INFO("BackwardTimer", (*layer)->getName().c_str()); REGISTER_TIMER_INFO("BackwardTimer", (*layer)->getName().c_str());
gLayerStackTrace.push((*layer)->getName());
if ((*layer)->needGradient()) { if ((*layer)->needGradient()) {
(*layer)->backward(callback); (*layer)->backward(callback);
} }
......
...@@ -208,6 +208,7 @@ void RecurrentGradientMachine::init( ...@@ -208,6 +208,7 @@ void RecurrentGradientMachine::init(
}); });
CHECK(subModelConfig != config.sub_models().end()); CHECK(subModelConfig != config.sub_models().end());
reversed_ = subModelConfig->reversed(); reversed_ = subModelConfig->reversed();
generating_ = subModelConfig->has_generator();
inFrameLines_.resize(subModelConfig->in_links_size()); inFrameLines_.resize(subModelConfig->in_links_size());
for (size_t i = 0; i < inFrameLines_.size(); ++i) { for (size_t i = 0; i < inFrameLines_.size(); ++i) {
...@@ -538,7 +539,7 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs, ...@@ -538,7 +539,7 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
The outputs are outFramesLines_[i].agentLayer The outputs are outFramesLines_[i].agentLayer
*/ */
if (inFrameLines_.empty() && passType == PASS_TEST) { if (generating_) {
generateSequence(); generateSequence();
return; return;
} // else forward.. } // else forward..
...@@ -569,6 +570,9 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs, ...@@ -569,6 +570,9 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
} }
void RecurrentGradientMachine::backward(const UpdateCallback& callback) { void RecurrentGradientMachine::backward(const UpdateCallback& callback) {
if (generating_) {
return;
}
REGISTER_TIMER_INFO("RecurrentBwTime", "RecurrentBwTime"); REGISTER_TIMER_INFO("RecurrentBwTime", "RecurrentBwTime");
AsyncGpuBlock asyncGpuBlock; AsyncGpuBlock asyncGpuBlock;
for (int i = maxSequenceLength_ - 1; i >= 0; --i) { for (int i = maxSequenceLength_ - 1; i >= 0; --i) {
...@@ -1321,11 +1325,10 @@ void RecurrentGradientMachine::fillGenOutputs() { ...@@ -1321,11 +1325,10 @@ void RecurrentGradientMachine::fillGenOutputs() {
batchMachineIdVec_.clear(); batchMachineIdVec_.clear();
generator_.ids.clear(); generator_.ids.clear();
int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false);
starts[0] = 0;
if (numResults > 1) { if (numResults > 1) {
real* probs = generator_.outArg.in->getData(); real* probs = generator_.outArg.in->getData();
int* starts =
generator_.outArg.sequenceStartPositions->getMutableData(false);
starts[0] = 0;
for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t i = 0; i < finalPaths_.size(); ++i) {
for (size_t j = 0; j < finalPaths_[i].size(); ++j) { for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
Path& path = finalPaths_[i][j]; Path& path = finalPaths_[i][j];
...@@ -1348,7 +1351,10 @@ void RecurrentGradientMachine::fillGenOutputs() { ...@@ -1348,7 +1351,10 @@ void RecurrentGradientMachine::fillGenOutputs() {
} else { } else {
for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t i = 0; i < finalPaths_.size(); ++i) {
CHECK(!finalPaths_[i].empty()); CHECK(!finalPaths_[i].empty());
generator_.ids = finalPaths_[i][0].ids; generator_.ids.insert(generator_.ids.begin(),
finalPaths_[i][0].ids.begin(),
finalPaths_[i][0].ids.end());
starts[i + 1] = starts[i] + finalPaths_[i][0].ids.size();
} }
} }
} }
......
...@@ -414,6 +414,7 @@ protected: ...@@ -414,6 +414,7 @@ protected:
std::vector<int> ids; // store generated sequences std::vector<int> ids; // store generated sequences
Argument outArg; // final output argument Argument outArg; // final output argument
}; };
bool generating_;
Generator generator_; Generator generator_;
std::vector<std::unique_ptr<NeuralNetwork>> frames_; std::vector<std::unique_ptr<NeuralNetwork>> frames_;
......
...@@ -109,6 +109,40 @@ void GatherAgentLayer::forwardValue(PassType passType) { ...@@ -109,6 +109,40 @@ void GatherAgentLayer::forwardValue(PassType passType) {
} }
} }
namespace {
// dest[index[i]] <- src[i] for each i
void copyElements(const IVector& srcVec,
const IVector& indexVec,
IVector& destVec) {
const int* src = srcVec.getData();
const int* index = indexVec.getData();
int* dest = destVec.getData();
int len = indexVec.getSize();
CHECK_EQ(srcVec.getSize(), indexVec.getSize());
for (int i = 0; i < len; ++i) {
dest[index[i]] = src[i];
}
}
}
void GatherAgentLayer::forwardIds(PassType passType) {
IVectorPtr realId = realLayers_[0]->getOutputLabel();
if (!realId) return;
IVector::resizeOrCreate(output_.ids, allIds_->getSize(), useGpu_);
IVectorPtr outId = output_.ids;
idsVec_.resize(idIndex_.size());
for (size_t i = 0; i < realLayers_.size(); ++i) {
const IVectorPtr& realId = realLayers_[i]->getOutputLabel();
idsVec_[i] = IVector::create(allIds_->getData() + idIndex_[i],
/* size */ realId->getSize(),
useGpu_);
execViaCpu(&copyElements, *realId, *idsVec_[i], *outId);
}
}
void GatherAgentLayer::backward(const UpdateCallback& callback) { void GatherAgentLayer::backward(const UpdateCallback& callback) {
(void)callback; (void)callback;
const MatrixPtr& outputGrad = getOutputGrad(); const MatrixPtr& outputGrad = getOutputGrad();
...@@ -174,41 +208,6 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) { ...@@ -174,41 +208,6 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) {
REGISTER_LAYER(gather_agent, GatherAgentLayer); REGISTER_LAYER(gather_agent, GatherAgentLayer);
REGISTER_LAYER(scatter_agent, ScatterAgentLayer); REGISTER_LAYER(scatter_agent, ScatterAgentLayer);
void GatherAgentLayer::forwardIds(PassType passType) {
int height = 0;
IVectorPtr idReal = realLayers_[0]->getOutputLabel();
if (!idReal) return;
if (output_.subSequenceStartPositions) {
int* starts = output_.subSequenceStartPositions->getMutableData(false);
// Gather generator.idsVec
// if is beam search generation result. Get first result.
if (idReal->getData()[idReal->getSize() - 1] == -1) {
for (size_t i = 0; i < realLayers_.size(); ++i) {
// The first element stores first result size
idReal = realLayers_[i]->getOutputLabel();
idReal->subVecFrom(*idReal, 1, idReal->getData()[0]);
}
}
for (size_t i = 0; i < realLayers_.size(); ++i) {
CHECK(realLayers_[i]->getOutputLabel());
starts[i] = height;
height += realLayers_[i]->getOutputLabel()->getSize();
}
starts[realLayers_.size()] = height;
output_.sequenceStartPositions->getMutableData(false)[1] = height;
IVector::resizeOrCreate(output_.ids, height, false);
for (size_t i = 0; i < realLayers_.size(); ++i) {
output_.ids->subVec(starts[i], starts[i + 1] - starts[i])
->copyFrom(*realLayers_[i]->getOutputLabel());
}
} else {
LOG(FATAL) << "Not implemented";
}
}
void ScatterAgentLayer::forwardSequence(PassType passType) { void ScatterAgentLayer::forwardSequence(PassType passType) {
Layer::forward(passType); Layer::forward(passType);
CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId());
......
...@@ -53,7 +53,7 @@ def outer_step(dummy_data): ...@@ -53,7 +53,7 @@ def outer_step(dummy_data):
bos_id=0, bos_id=0,
eos_id=num_words-1, eos_id=num_words-1,
beam_size=2 if beam_flag else 1, beam_size=2 if beam_flag else 1,
num_results_per_sample=2 if beam_flag else 1, num_results_per_sample=1,
max_length=10) max_length=10)
return beam_gen return beam_gen
......
...@@ -55,13 +55,17 @@ public: ...@@ -55,13 +55,17 @@ public:
* Else, just set status to popping. * Else, just set status to popping.
*/ */
void pop(const T& item) { void pop(const T& item) {
pushing() = false;
auto& s = this->stack(); auto& s = this->stack();
if (item == s.top()) { if (item == s.top()) {
s.pop(); s.pop();
} }
} }
/**
* @brief Indicate whether we are at forward or backward stage of computation
*/
void set_stage(bool isForward) { pushing() = isForward; }
/** /**
* @brief clear current thread stack. * @brief clear current thread stack.
*/ */
......
...@@ -72,7 +72,6 @@ TEST(CustomStackTrace, normalTrain) { ...@@ -72,7 +72,6 @@ TEST(CustomStackTrace, normalTrain) {
for (size_t i = 0; i < layerSize; ++i) { for (size_t i = 0; i < layerSize; ++i) {
tracer.push("layer_" + paddle::str::to_string(i)); tracer.push("layer_" + paddle::str::to_string(i));
} }
tracer.pop("");
for (size_t i = 0; i < layerSize; ++i) { for (size_t i = 0; i < layerSize; ++i) {
tracer.pop("layer_" + paddle::str::to_string(layerSize - 1 - i)); tracer.pop("layer_" + paddle::str::to_string(layerSize - 1 - i));
} }
......
...@@ -45,12 +45,12 @@ __all__ = ['data', 'parse_network'] ...@@ -45,12 +45,12 @@ __all__ = ['data', 'parse_network']
def __need_to_keep__(name): def __need_to_keep__(name):
return name in [ return name in [
'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType', 'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType',
'layer_support' 'layer_support', 'BaseGeneratedInput'
] ]
def __need_to_wrap__(name): def __need_to_wrap__(name):
return name not in ['AggregateLevel', 'ExpandLevel'] return name not in ['AggregateLevel', 'ExpandLevel', 'BaseGeneratedInput']
def __convert_name__(inname): def __convert_name__(inname):
...@@ -199,6 +199,15 @@ def __get_used_submodels__(layer_names): ...@@ -199,6 +199,15 @@ def __get_used_submodels__(layer_names):
return submodel_names return submodel_names
def __get_submodel_data_out_links__():
data_links = set()
for submodel in cp.g_config.model_config.sub_models:
for link in submodel.out_links:
if cp.g_layer_map[link.link_name].type == 'data':
data_links.add(link.link_name)
return data_links
def __get_used_evaluators__(layer_names): def __get_used_evaluators__(layer_names):
evaluator_names = set() evaluator_names = set()
for e in cp.g_config.model_config.evaluators: for e in cp.g_config.model_config.evaluators:
...@@ -264,6 +273,7 @@ def parse_network(output_layers, extra_layers=None): ...@@ -264,6 +273,7 @@ def parse_network(output_layers, extra_layers=None):
submodel_names = __get_used_submodels__(layer_names) submodel_names = __get_used_submodels__(layer_names)
submodel_names.add('root') submodel_names.add('root')
evaluator_names = __get_used_evaluators__(layer_names) evaluator_names = __get_used_evaluators__(layer_names)
data_out_links = __get_submodel_data_out_links__()
input_layer_names = set() input_layer_names = set()
output_layer_names = set() output_layer_names = set()
...@@ -279,7 +289,7 @@ def parse_network(output_layers, extra_layers=None): ...@@ -279,7 +289,7 @@ def parse_network(output_layers, extra_layers=None):
continue continue
model_config.layers.extend([l]) model_config.layers.extend([l])
if l.type == 'data': if l.type == 'data':
if l.name in model_config.output_layer_names: if l.name in data_out_links:
""" """
In text generation, the outlink to save the generated word In text generation, the outlink to save the generated word
indices is a data_layer defined in recurrent_group. This indices is a data_layer defined in recurrent_group. This
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册