提交 91df6062 编写于 作者: L luotao1 提交者: emailweixu

remove some copyfrom in AgentLayer and ExpandLayer, fix warning in seq2seq config (#183)

上级 cebdb667
......@@ -96,12 +96,12 @@ def gru_encoder_decoder(data_conf,
encoded_vector = concat_layer(input=[src_forward, src_backward])
with mixed_layer(size=decoder_size) as encoded_proj:
encoded_proj += full_matrix_projection(encoded_vector)
encoded_proj += full_matrix_projection(input=encoded_vector)
backward_first = first_seq(input=src_backward)
with mixed_layer(size=decoder_size,
act=TanhActivation(), ) as decoder_boot:
decoder_boot += full_matrix_projection(backward_first)
decoder_boot += full_matrix_projection(input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = memory(name='gru_decoder',
......@@ -113,8 +113,8 @@ def gru_encoder_decoder(data_conf,
decoder_state=decoder_mem, )
with mixed_layer(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += full_matrix_projection(context)
decoder_inputs += full_matrix_projection(current_word)
decoder_inputs += full_matrix_projection(input=context)
decoder_inputs += full_matrix_projection(input=current_word)
gru_step = gru_step_layer(name='gru_decoder',
input=decoder_inputs,
......
......@@ -143,7 +143,7 @@ extern void hl_context_projection_backward_weight(real* outputGrad,
*/
extern void hl_sequence2batch_copy(real *batch,
real *sequence,
int *batchIndex,
const int *batchIndex,
int seqWidth,
int batchCount,
bool seq2batch);
......
......@@ -62,7 +62,7 @@ inline void hl_context_projection_backward_weight(real* outputGrad,
inline void hl_sequence2batch_copy(real *batch,
real *sequence,
int *batchIndex,
const int *batchIndex,
int seqWidth,
int batchCount,
bool seq2batch) {}
......
......@@ -374,7 +374,7 @@ template<int blockDimX, int blockDimY, int gridDimX, bool seq2batch, bool isAdd>
__global__
void KeSequence2Batch(real *batch,
real *sequence,
int *batchIndex,
const int *batchIndex,
int seqWidth,
int batchCount) {
int idx = threadIdx.x;
......@@ -405,7 +405,7 @@ void KeSequence2Batch(real *batch,
void hl_sequence2batch_copy(real *batch,
real *sequence,
int *batchIndex,
const int *batchIndex,
int seqWidth,
int batchCount,
bool seq2batch) {
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "AgentLayer.h"
#include "paddle/utils/Logging.h"
......@@ -62,8 +61,8 @@ void SequenceAgentLayer::forward(PassType passType) {
// get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realNumSequences) {
int numRows = realOutput.sequenceStartPositions->
getData(false)[numSamples_];
int numRows =
realOutput.sequenceStartPositions->getData(false)[numSamples_];
CHECK(!realOutput.ids) << "Not supported";
output_.subArgFrom(realOutput, /* offset */ 0, numRows, getSize(), useGpu_,
/* trans */ false, /* seqFlag */ true,
......@@ -141,8 +140,8 @@ void ScatterAgentLayer::forward(PassType passType) {
int width = this->getSize();
if (realOutArg_.value || realOutArg_.ids) {
output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_,
width, useGpu_);
output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, width,
useGpu_);
} else { // used in generation
if (realLayer_->getOutput().ids) {
IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_);
......@@ -224,8 +223,8 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
if (realOutArg_.value || realOutArg_.ids) {
CHECK(realOutArg_.sequenceStartPositions);
output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_,
width, useGpu_, /* trans */ false, /* seqFlag */ true,
output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, width,
useGpu_, /* trans */ false, /* seqFlag */ true,
/* seqStart */ seqStartPosIndex_,
/* seqSize */ numSequences_);
} else {
......@@ -249,11 +248,12 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
CHECK_NE(input.sequenceStartPositions.get(),
output_.sequenceStartPositions.get());
ICpuGpuVector::resizeOrCreate(output_.sequenceStartPositions,
numSequences + 1, false);
numSequences + 1, false);
int* outStarts = output_.sequenceStartPositions->getMutableData(false);
IVector::resizeOrCreate(cpuInputStartPos_, height, false);
int* inStarts = cpuInputStartPos_->getData();
ICpuGpuVector::resizeOrCreate(inputStartPos_, height, false);
int* inStarts = inputStartPos_->getMutableData(false);
size_t offsetOut = 0;
for (size_t i = 0; i < numSequences; ++i) {
outStarts[i] = offsetOut;
......@@ -266,13 +266,8 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
}
outStarts[numSequences] = offsetOut;
if (useGpu_) {
IVector::resizeOrCreate(inputStartPos_, height, true);
inputStartPos_->copyFrom(*cpuInputStartPos_, HPPL_STREAM_DEFAULT);
} else {
inputStartPos_ = cpuInputStartPos_;
}
outputValue->copyByRowIndex(*input.value, *inputStartPos_);
outputValue->copyByRowIndex(*input.value,
*inputStartPos_->getVector(useGpu_));
}
}
......
......@@ -191,11 +191,7 @@ class SequenceScatterAgentLayer : public ScatterAgentLayer {
protected:
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
IVectorPtr cpuInputStartPos_;
// point to cpuInputStartPos_ when useGpu_ is false
// copy from cpuInputStartPos_ when useGpu_ is true
IVectorPtr inputStartPos_;
ICpuGpuVectorPtr inputStartPos_;
public:
explicit SequenceScatterAgentLayer(const LayerConfig& config)
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ExpandLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
......@@ -53,9 +52,8 @@ void ExpandLayer::forward(PassType passType) {
const Argument& shapeInput = getInput(1);
const Argument& dataInput = getInput(0);
size_t outputBatchSize = shapeInput.getBatchSize();
auto startPositions =
type_ ? shapeInput.subSequenceStartPositions
: shapeInput.sequenceStartPositions;
auto startPositions = type_ ? shapeInput.subSequenceStartPositions
: shapeInput.sequenceStartPositions;
size_t numSequences = startPositions->getSize() - 1;
const int* starts = startPositions->getData(false);
......@@ -71,8 +69,7 @@ void ExpandLayer::forward(PassType passType) {
// set output sequence info as shape sequence
output_.sequenceStartPositions = shapeInput.sequenceStartPositions;
if (shapeInput.hasSubseq()) {
output_.subSequenceStartPositions =
shapeInput.subSequenceStartPositions;
output_.subSequenceStartPositions = shapeInput.subSequenceStartPositions;
}
// reserve output: Expand output to batchsize of sequence data.
......@@ -81,8 +78,8 @@ void ExpandLayer::forward(PassType passType) {
MatrixPtr inputValue = getInputValue(0);
MatrixPtr outputValue = getOutputValue();
IVector::resizeOrCreate(cpuExpandStartsPos_, outputBatchSize, false);
int* expandStarts = cpuExpandStartsPos_->getData();
ICpuGpuVector::resizeOrCreate(expandStartsPos_, outputBatchSize, false);
int* expandStarts = expandStartsPos_->getMutableData(false);
for (size_t sequenceId = 0; sequenceId < numSequences; ++sequenceId) {
int sequenceLength = starts[sequenceId + 1] - starts[sequenceId];
for (int j = 0; j < sequenceLength; j++) {
......@@ -90,15 +87,8 @@ void ExpandLayer::forward(PassType passType) {
}
}
if (useGpu_) {
// TODO(Dangqingqing) move copyFrom
IVector::resizeOrCreate(expandStartsPos_, outputBatchSize, true);
expandStartsPos_->copyFrom(*cpuExpandStartsPos_, HPPL_STREAM_DEFAULT);
} else {
expandStartsPos_ = cpuExpandStartsPos_;
}
outputValue->copyByRowIndex(*inputValue, *expandStartsPos_);
outputValue->copyByRowIndex(*inputValue,
*expandStartsPos_->getVector(useGpu_));
if (biases_.get() != NULL) {
outputValue->addBias(*(biases_->getW()), 1);
......@@ -108,16 +98,15 @@ void ExpandLayer::forward(PassType passType) {
void ExpandLayer::backward(const UpdateCallback& callback) {
if (biases_ && biases_->getWGrad()) {
biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
/* Increasing the number of gradient */
/* Increasing the number of gradient */
biases_->getParameterPtr()->incUpdate(callback);
}
if (!getInputGrad(0)) return;
MatrixPtr inputGrad = getInputGrad(0);
MatrixPtr outputGrad = getOutputGrad();
auto cpuSeqStartPos =
type_ ? getInput(1).subSequenceStartPositions
: getInput(1).sequenceStartPositions;
auto cpuSeqStartPos = type_ ? getInput(1).subSequenceStartPositions
: getInput(1).sequenceStartPositions;
size_t numSequences = cpuSeqStartPos->getSize() - 1;
const int* starts = cpuSeqStartPos->getData(false);
......
......@@ -44,14 +44,9 @@ protected:
enum ExpandLevel { kNonSeq = 0, kSeq = 1 };
/// store the ExpandLevel
int type_;
// TODO(luotao) use ICpuGpuVectorPtr to merge cpuExpandStartsPos_
// and expandStartsPos_
/// expanded sequenceStartPositions or subSequenceStartPositions
/// of input[1]
IVectorPtr cpuExpandStartsPos_;
/// point to cpuExpandStartsPos_ when useGpu_ is false,
/// copy from cpuExpandStartsPos_ when useGpu_ is true
IVectorPtr expandStartsPos_;
ICpuGpuVectorPtr expandStartsPos_;
public:
explicit ExpandLayer(const LayerConfig& config) : Layer(config) {}
......
......@@ -282,13 +282,13 @@ void GpuMatrix::copyFrom(const IVector& src) {
copyFrom(matrix);
}
void GpuMatrix::copyByRowIndex(Matrix& b, IVector& rowIndex) {
void GpuMatrix::copyByRowIndex(Matrix& b, const IVector& rowIndex) {
size_t height = getHeight();
size_t width = getWidth();
CHECK_EQ(b.getWidth(), width);
real* dst = getData();
real* src = b.getData();
int* index = rowIndex.getData();
const int* index = rowIndex.getData();
hl_sequence2batch_copy(dst, src, index, width, height, true);
}
......@@ -1278,11 +1278,11 @@ void CpuMatrix::copyFrom(const IVector& src) {
}
}
void CpuMatrix::copyByRowIndex(Matrix& b, IVector& rowIndex) {
void CpuMatrix::copyByRowIndex(Matrix& b, const IVector& rowIndex) {
size_t height = getHeight();
size_t width = getWidth();
CHECK_EQ(b.getWidth(), width);
int* index = rowIndex.getData();
const int* index = rowIndex.getData();
for (size_t i = 0; i < height; i++) {
CHECK_LT(static_cast<size_t>(index[i]), b.getHeight());
real* src = b.getData() + index[i] * width;
......
......@@ -253,7 +253,7 @@ public:
LOG(FATAL) << "copy data from int vector only available on CpuMatrix.";
}
virtual void copyByRowIndex(Matrix& b, IVector& rowIndex) {
virtual void copyByRowIndex(Matrix& b, const IVector& rowIndex) {
LOG(FATAL) << "Not implemented";
}
......@@ -979,7 +979,7 @@ public:
void copyFrom(const IVector& src);
void copyByRowIndex(Matrix& b, IVector& rowIndex);
void copyByRowIndex(Matrix& b, const IVector& rowIndex);
MatrixPtr clone(size_t height, size_t width, bool useGpu = false);
......@@ -1241,7 +1241,7 @@ public:
void copyFrom(CpuSparseMatrix& src);
void copyByRowIndex(Matrix& b, IVector& rowIndex);
void copyByRowIndex(Matrix& b, const IVector& rowIndex);
MatrixPtr clone(size_t height, size_t width, bool useGpu = false);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册