提交 91df6062 编写于 作者: L luotao1 提交者: emailweixu

remove some copyfrom in AgentLayer and ExpandLayer, fix warning in seq2seq config (#183)

上级 cebdb667
...@@ -96,12 +96,12 @@ def gru_encoder_decoder(data_conf, ...@@ -96,12 +96,12 @@ def gru_encoder_decoder(data_conf,
encoded_vector = concat_layer(input=[src_forward, src_backward]) encoded_vector = concat_layer(input=[src_forward, src_backward])
with mixed_layer(size=decoder_size) as encoded_proj: with mixed_layer(size=decoder_size) as encoded_proj:
encoded_proj += full_matrix_projection(encoded_vector) encoded_proj += full_matrix_projection(input=encoded_vector)
backward_first = first_seq(input=src_backward) backward_first = first_seq(input=src_backward)
with mixed_layer(size=decoder_size, with mixed_layer(size=decoder_size,
act=TanhActivation(), ) as decoder_boot: act=TanhActivation(), ) as decoder_boot:
decoder_boot += full_matrix_projection(backward_first) decoder_boot += full_matrix_projection(input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word): def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = memory(name='gru_decoder', decoder_mem = memory(name='gru_decoder',
...@@ -113,8 +113,8 @@ def gru_encoder_decoder(data_conf, ...@@ -113,8 +113,8 @@ def gru_encoder_decoder(data_conf,
decoder_state=decoder_mem, ) decoder_state=decoder_mem, )
with mixed_layer(size=decoder_size * 3) as decoder_inputs: with mixed_layer(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += full_matrix_projection(context) decoder_inputs += full_matrix_projection(input=context)
decoder_inputs += full_matrix_projection(current_word) decoder_inputs += full_matrix_projection(input=current_word)
gru_step = gru_step_layer(name='gru_decoder', gru_step = gru_step_layer(name='gru_decoder',
input=decoder_inputs, input=decoder_inputs,
......
...@@ -143,7 +143,7 @@ extern void hl_context_projection_backward_weight(real* outputGrad, ...@@ -143,7 +143,7 @@ extern void hl_context_projection_backward_weight(real* outputGrad,
*/ */
extern void hl_sequence2batch_copy(real *batch, extern void hl_sequence2batch_copy(real *batch,
real *sequence, real *sequence,
int *batchIndex, const int *batchIndex,
int seqWidth, int seqWidth,
int batchCount, int batchCount,
bool seq2batch); bool seq2batch);
......
...@@ -62,7 +62,7 @@ inline void hl_context_projection_backward_weight(real* outputGrad, ...@@ -62,7 +62,7 @@ inline void hl_context_projection_backward_weight(real* outputGrad,
inline void hl_sequence2batch_copy(real *batch, inline void hl_sequence2batch_copy(real *batch,
real *sequence, real *sequence,
int *batchIndex, const int *batchIndex,
int seqWidth, int seqWidth,
int batchCount, int batchCount,
bool seq2batch) {} bool seq2batch) {}
......
...@@ -374,7 +374,7 @@ template<int blockDimX, int blockDimY, int gridDimX, bool seq2batch, bool isAdd> ...@@ -374,7 +374,7 @@ template<int blockDimX, int blockDimY, int gridDimX, bool seq2batch, bool isAdd>
__global__ __global__
void KeSequence2Batch(real *batch, void KeSequence2Batch(real *batch,
real *sequence, real *sequence,
int *batchIndex, const int *batchIndex,
int seqWidth, int seqWidth,
int batchCount) { int batchCount) {
int idx = threadIdx.x; int idx = threadIdx.x;
...@@ -405,7 +405,7 @@ void KeSequence2Batch(real *batch, ...@@ -405,7 +405,7 @@ void KeSequence2Batch(real *batch,
void hl_sequence2batch_copy(real *batch, void hl_sequence2batch_copy(real *batch,
real *sequence, real *sequence,
int *batchIndex, const int *batchIndex,
int seqWidth, int seqWidth,
int batchCount, int batchCount,
bool seq2batch) { bool seq2batch) {
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "AgentLayer.h" #include "AgentLayer.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
...@@ -62,8 +61,8 @@ void SequenceAgentLayer::forward(PassType passType) { ...@@ -62,8 +61,8 @@ void SequenceAgentLayer::forward(PassType passType) {
// get Arguments from real layers // get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realNumSequences) { if (numSamples_ > 0 && numSamples_ < realNumSequences) {
int numRows = realOutput.sequenceStartPositions-> int numRows =
getData(false)[numSamples_]; realOutput.sequenceStartPositions->getData(false)[numSamples_];
CHECK(!realOutput.ids) << "Not supported"; CHECK(!realOutput.ids) << "Not supported";
output_.subArgFrom(realOutput, /* offset */ 0, numRows, getSize(), useGpu_, output_.subArgFrom(realOutput, /* offset */ 0, numRows, getSize(), useGpu_,
/* trans */ false, /* seqFlag */ true, /* trans */ false, /* seqFlag */ true,
...@@ -141,8 +140,8 @@ void ScatterAgentLayer::forward(PassType passType) { ...@@ -141,8 +140,8 @@ void ScatterAgentLayer::forward(PassType passType) {
int width = this->getSize(); int width = this->getSize();
if (realOutArg_.value || realOutArg_.ids) { if (realOutArg_.value || realOutArg_.ids) {
output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, width,
width, useGpu_); useGpu_);
} else { // used in generation } else { // used in generation
if (realLayer_->getOutput().ids) { if (realLayer_->getOutput().ids) {
IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_); IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_);
...@@ -224,8 +223,8 @@ void SequenceScatterAgentLayer::forward(PassType passType) { ...@@ -224,8 +223,8 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
if (realOutArg_.value || realOutArg_.ids) { if (realOutArg_.value || realOutArg_.ids) {
CHECK(realOutArg_.sequenceStartPositions); CHECK(realOutArg_.sequenceStartPositions);
output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, width,
width, useGpu_, /* trans */ false, /* seqFlag */ true, useGpu_, /* trans */ false, /* seqFlag */ true,
/* seqStart */ seqStartPosIndex_, /* seqStart */ seqStartPosIndex_,
/* seqSize */ numSequences_); /* seqSize */ numSequences_);
} else { } else {
...@@ -249,11 +248,12 @@ void SequenceScatterAgentLayer::forward(PassType passType) { ...@@ -249,11 +248,12 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
CHECK_NE(input.sequenceStartPositions.get(), CHECK_NE(input.sequenceStartPositions.get(),
output_.sequenceStartPositions.get()); output_.sequenceStartPositions.get());
ICpuGpuVector::resizeOrCreate(output_.sequenceStartPositions, ICpuGpuVector::resizeOrCreate(output_.sequenceStartPositions,
numSequences + 1, false); numSequences + 1, false);
int* outStarts = output_.sequenceStartPositions->getMutableData(false); int* outStarts = output_.sequenceStartPositions->getMutableData(false);
IVector::resizeOrCreate(cpuInputStartPos_, height, false); ICpuGpuVector::resizeOrCreate(inputStartPos_, height, false);
int* inStarts = cpuInputStartPos_->getData(); int* inStarts = inputStartPos_->getMutableData(false);
size_t offsetOut = 0; size_t offsetOut = 0;
for (size_t i = 0; i < numSequences; ++i) { for (size_t i = 0; i < numSequences; ++i) {
outStarts[i] = offsetOut; outStarts[i] = offsetOut;
...@@ -266,13 +266,8 @@ void SequenceScatterAgentLayer::forward(PassType passType) { ...@@ -266,13 +266,8 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
} }
outStarts[numSequences] = offsetOut; outStarts[numSequences] = offsetOut;
if (useGpu_) { outputValue->copyByRowIndex(*input.value,
IVector::resizeOrCreate(inputStartPos_, height, true); *inputStartPos_->getVector(useGpu_));
inputStartPos_->copyFrom(*cpuInputStartPos_, HPPL_STREAM_DEFAULT);
} else {
inputStartPos_ = cpuInputStartPos_;
}
outputValue->copyByRowIndex(*input.value, *inputStartPos_);
} }
} }
......
...@@ -191,11 +191,7 @@ class SequenceScatterAgentLayer : public ScatterAgentLayer { ...@@ -191,11 +191,7 @@ class SequenceScatterAgentLayer : public ScatterAgentLayer {
protected: protected:
// use to store expanded cpuStartPositions or subSequenceStartPositions // use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer. // of real layer.
IVectorPtr cpuInputStartPos_; ICpuGpuVectorPtr inputStartPos_;
// point to cpuInputStartPos_ when useGpu_ is false
// copy from cpuInputStartPos_ when useGpu_ is true
IVectorPtr inputStartPos_;
public: public:
explicit SequenceScatterAgentLayer(const LayerConfig& config) explicit SequenceScatterAgentLayer(const LayerConfig& config)
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "ExpandLayer.h" #include "ExpandLayer.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h" #include "paddle/utils/Stat.h"
...@@ -53,9 +52,8 @@ void ExpandLayer::forward(PassType passType) { ...@@ -53,9 +52,8 @@ void ExpandLayer::forward(PassType passType) {
const Argument& shapeInput = getInput(1); const Argument& shapeInput = getInput(1);
const Argument& dataInput = getInput(0); const Argument& dataInput = getInput(0);
size_t outputBatchSize = shapeInput.getBatchSize(); size_t outputBatchSize = shapeInput.getBatchSize();
auto startPositions = auto startPositions = type_ ? shapeInput.subSequenceStartPositions
type_ ? shapeInput.subSequenceStartPositions : shapeInput.sequenceStartPositions;
: shapeInput.sequenceStartPositions;
size_t numSequences = startPositions->getSize() - 1; size_t numSequences = startPositions->getSize() - 1;
const int* starts = startPositions->getData(false); const int* starts = startPositions->getData(false);
...@@ -71,8 +69,7 @@ void ExpandLayer::forward(PassType passType) { ...@@ -71,8 +69,7 @@ void ExpandLayer::forward(PassType passType) {
// set output sequence info as shape sequence // set output sequence info as shape sequence
output_.sequenceStartPositions = shapeInput.sequenceStartPositions; output_.sequenceStartPositions = shapeInput.sequenceStartPositions;
if (shapeInput.hasSubseq()) { if (shapeInput.hasSubseq()) {
output_.subSequenceStartPositions = output_.subSequenceStartPositions = shapeInput.subSequenceStartPositions;
shapeInput.subSequenceStartPositions;
} }
// reserve output: Expand output to batchsize of sequence data. // reserve output: Expand output to batchsize of sequence data.
...@@ -81,8 +78,8 @@ void ExpandLayer::forward(PassType passType) { ...@@ -81,8 +78,8 @@ void ExpandLayer::forward(PassType passType) {
MatrixPtr inputValue = getInputValue(0); MatrixPtr inputValue = getInputValue(0);
MatrixPtr outputValue = getOutputValue(); MatrixPtr outputValue = getOutputValue();
IVector::resizeOrCreate(cpuExpandStartsPos_, outputBatchSize, false); ICpuGpuVector::resizeOrCreate(expandStartsPos_, outputBatchSize, false);
int* expandStarts = cpuExpandStartsPos_->getData(); int* expandStarts = expandStartsPos_->getMutableData(false);
for (size_t sequenceId = 0; sequenceId < numSequences; ++sequenceId) { for (size_t sequenceId = 0; sequenceId < numSequences; ++sequenceId) {
int sequenceLength = starts[sequenceId + 1] - starts[sequenceId]; int sequenceLength = starts[sequenceId + 1] - starts[sequenceId];
for (int j = 0; j < sequenceLength; j++) { for (int j = 0; j < sequenceLength; j++) {
...@@ -90,15 +87,8 @@ void ExpandLayer::forward(PassType passType) { ...@@ -90,15 +87,8 @@ void ExpandLayer::forward(PassType passType) {
} }
} }
if (useGpu_) { outputValue->copyByRowIndex(*inputValue,
// TODO(Dangqingqing) move copyFrom *expandStartsPos_->getVector(useGpu_));
IVector::resizeOrCreate(expandStartsPos_, outputBatchSize, true);
expandStartsPos_->copyFrom(*cpuExpandStartsPos_, HPPL_STREAM_DEFAULT);
} else {
expandStartsPos_ = cpuExpandStartsPos_;
}
outputValue->copyByRowIndex(*inputValue, *expandStartsPos_);
if (biases_.get() != NULL) { if (biases_.get() != NULL) {
outputValue->addBias(*(biases_->getW()), 1); outputValue->addBias(*(biases_->getW()), 1);
...@@ -108,16 +98,15 @@ void ExpandLayer::forward(PassType passType) { ...@@ -108,16 +98,15 @@ void ExpandLayer::forward(PassType passType) {
void ExpandLayer::backward(const UpdateCallback& callback) { void ExpandLayer::backward(const UpdateCallback& callback) {
if (biases_ && biases_->getWGrad()) { if (biases_ && biases_->getWGrad()) {
biases_->getWGrad()->collectBias(*getOutputGrad(), 1); biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
/* Increasing the number of gradient */ /* Increasing the number of gradient */
biases_->getParameterPtr()->incUpdate(callback); biases_->getParameterPtr()->incUpdate(callback);
} }
if (!getInputGrad(0)) return; if (!getInputGrad(0)) return;
MatrixPtr inputGrad = getInputGrad(0); MatrixPtr inputGrad = getInputGrad(0);
MatrixPtr outputGrad = getOutputGrad(); MatrixPtr outputGrad = getOutputGrad();
auto cpuSeqStartPos = auto cpuSeqStartPos = type_ ? getInput(1).subSequenceStartPositions
type_ ? getInput(1).subSequenceStartPositions : getInput(1).sequenceStartPositions;
: getInput(1).sequenceStartPositions;
size_t numSequences = cpuSeqStartPos->getSize() - 1; size_t numSequences = cpuSeqStartPos->getSize() - 1;
const int* starts = cpuSeqStartPos->getData(false); const int* starts = cpuSeqStartPos->getData(false);
......
...@@ -44,14 +44,9 @@ protected: ...@@ -44,14 +44,9 @@ protected:
enum ExpandLevel { kNonSeq = 0, kSeq = 1 }; enum ExpandLevel { kNonSeq = 0, kSeq = 1 };
/// store the ExpandLevel /// store the ExpandLevel
int type_; int type_;
// TODO(luotao) use ICpuGpuVectorPtr to merge cpuExpandStartsPos_
// and expandStartsPos_
/// expanded sequenceStartPositions or subSequenceStartPositions /// expanded sequenceStartPositions or subSequenceStartPositions
/// of input[1] /// of input[1]
IVectorPtr cpuExpandStartsPos_; ICpuGpuVectorPtr expandStartsPos_;
/// point to cpuExpandStartsPos_ when useGpu_ is false,
/// copy from cpuExpandStartsPos_ when useGpu_ is true
IVectorPtr expandStartsPos_;
public: public:
explicit ExpandLayer(const LayerConfig& config) : Layer(config) {} explicit ExpandLayer(const LayerConfig& config) : Layer(config) {}
......
...@@ -282,13 +282,13 @@ void GpuMatrix::copyFrom(const IVector& src) { ...@@ -282,13 +282,13 @@ void GpuMatrix::copyFrom(const IVector& src) {
copyFrom(matrix); copyFrom(matrix);
} }
void GpuMatrix::copyByRowIndex(Matrix& b, IVector& rowIndex) { void GpuMatrix::copyByRowIndex(Matrix& b, const IVector& rowIndex) {
size_t height = getHeight(); size_t height = getHeight();
size_t width = getWidth(); size_t width = getWidth();
CHECK_EQ(b.getWidth(), width); CHECK_EQ(b.getWidth(), width);
real* dst = getData(); real* dst = getData();
real* src = b.getData(); real* src = b.getData();
int* index = rowIndex.getData(); const int* index = rowIndex.getData();
hl_sequence2batch_copy(dst, src, index, width, height, true); hl_sequence2batch_copy(dst, src, index, width, height, true);
} }
...@@ -1278,11 +1278,11 @@ void CpuMatrix::copyFrom(const IVector& src) { ...@@ -1278,11 +1278,11 @@ void CpuMatrix::copyFrom(const IVector& src) {
} }
} }
void CpuMatrix::copyByRowIndex(Matrix& b, IVector& rowIndex) { void CpuMatrix::copyByRowIndex(Matrix& b, const IVector& rowIndex) {
size_t height = getHeight(); size_t height = getHeight();
size_t width = getWidth(); size_t width = getWidth();
CHECK_EQ(b.getWidth(), width); CHECK_EQ(b.getWidth(), width);
int* index = rowIndex.getData(); const int* index = rowIndex.getData();
for (size_t i = 0; i < height; i++) { for (size_t i = 0; i < height; i++) {
CHECK_LT(static_cast<size_t>(index[i]), b.getHeight()); CHECK_LT(static_cast<size_t>(index[i]), b.getHeight());
real* src = b.getData() + index[i] * width; real* src = b.getData() + index[i] * width;
......
...@@ -253,7 +253,7 @@ public: ...@@ -253,7 +253,7 @@ public:
LOG(FATAL) << "copy data from int vector only available on CpuMatrix."; LOG(FATAL) << "copy data from int vector only available on CpuMatrix.";
} }
virtual void copyByRowIndex(Matrix& b, IVector& rowIndex) { virtual void copyByRowIndex(Matrix& b, const IVector& rowIndex) {
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
} }
...@@ -979,7 +979,7 @@ public: ...@@ -979,7 +979,7 @@ public:
void copyFrom(const IVector& src); void copyFrom(const IVector& src);
void copyByRowIndex(Matrix& b, IVector& rowIndex); void copyByRowIndex(Matrix& b, const IVector& rowIndex);
MatrixPtr clone(size_t height, size_t width, bool useGpu = false); MatrixPtr clone(size_t height, size_t width, bool useGpu = false);
...@@ -1241,7 +1241,7 @@ public: ...@@ -1241,7 +1241,7 @@ public:
void copyFrom(CpuSparseMatrix& src); void copyFrom(CpuSparseMatrix& src);
void copyByRowIndex(Matrix& b, IVector& rowIndex); void copyByRowIndex(Matrix& b, const IVector& rowIndex);
MatrixPtr clone(size_t height, size_t width, bool useGpu = false); MatrixPtr clone(size_t height, size_t width, bool useGpu = false);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册