提交 d8ea560e 编写于 作者: D dangqingqing

resolve conflicts

...@@ -45,6 +45,7 @@ cc_library(paddle_pybind SHARED ...@@ -45,6 +45,7 @@ cc_library(paddle_pybind SHARED
SRCS pybind.cc SRCS pybind.cc
DEPS pybind python backward DEPS pybind python backward
sgd_op sgd_op
gather_op
add_op add_op
mul_op mul_op
rowwise_add_op rowwise_add_op
......
...@@ -43,6 +43,7 @@ USE_OP_ITSELF(recurrent_op); ...@@ -43,6 +43,7 @@ USE_OP_ITSELF(recurrent_op);
USE_OP(gaussian_random); USE_OP(gaussian_random);
USE_OP(uniform_random); USE_OP(uniform_random);
USE_OP(lookup_table); USE_OP(lookup_table);
USE_CPU_ONLY_OP(gather);
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -1012,11 +1012,6 @@ void RecurrentGradientMachine::generateSequence() { ...@@ -1012,11 +1012,6 @@ void RecurrentGradientMachine::generateSequence() {
/* width */ resultNum, /* width */ resultNum,
false, false,
/* useGpu */ false); /* useGpu */ false);
Matrix::resizeOrCreate(generator_.outArg.value,
/* height */ maxGenWordCount,
/* width */ 1,
false,
/* useGpu */ false);
} }
ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions, ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions,
numSequences + 1, numSequences + 1,
...@@ -1026,7 +1021,7 @@ void RecurrentGradientMachine::generateSequence() { ...@@ -1026,7 +1021,7 @@ void RecurrentGradientMachine::generateSequence() {
} else { } else {
oneWaySearch(numSequences); oneWaySearch(numSequences);
} }
if (dataArgsSize_) createDataOutlink(batchMachineIdVec_); if (dataArgsSize_) createDataOutlink();
size_t size = generator_.ids.size(); size_t size = generator_.ids.size();
generator_.outArg.ids->resize(size); generator_.outArg.ids->resize(size);
...@@ -1106,6 +1101,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) { ...@@ -1106,6 +1101,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) {
} }
batchMachineIdVec_.clear(); batchMachineIdVec_.clear();
batchMachineStartPos_.clear();
int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false); int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false);
starts[0] = 0; starts[0] = 0;
generator_.ids.clear(); generator_.ids.clear();
...@@ -1312,13 +1308,20 @@ void RecurrentGradientMachine::fillGenOutputs() { ...@@ -1312,13 +1308,20 @@ void RecurrentGradientMachine::fillGenOutputs() {
finalPaths_[i].resize(minFinalPathsSize); finalPaths_[i].resize(minFinalPathsSize);
} }
batchMachineIdVec_.clear();
generator_.ids.clear(); generator_.ids.clear();
int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false); int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false);
starts[0] = 0; starts[0] = 0;
if (numResults > 1) { if (numResults > 1) {
real* probs = generator_.outArg.in->getData(); int idsProbSaveSize = 0;
for (auto inSeq : finalPaths_) {
for (auto path : inSeq) idsProbSaveSize += path.ids.size();
idsProbSaveSize += inSeq.size();
}
Matrix::resizeOrCreate(
generator_.outArg.value, idsProbSaveSize, 1, false, false);
real* idsProb = generator_.outArg.value->getData(); real* idsProb = generator_.outArg.value->getData();
real* probs = generator_.outArg.in->getData();
size_t curPos = 0; size_t curPos = 0;
for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t i = 0; i < finalPaths_.size(); ++i) {
for (size_t j = 0; j < finalPaths_[i].size(); ++j) { for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
...@@ -1333,24 +1336,16 @@ void RecurrentGradientMachine::fillGenOutputs() { ...@@ -1333,24 +1336,16 @@ void RecurrentGradientMachine::fillGenOutputs() {
curPos += genLen; curPos += genLen;
idsProb[curPos++] = -1.0; idsProb[curPos++] = -1.0;
probs[i * numResults + j] = path.logProb; probs[i * numResults + j] = path.logProb;
if (!j && dataArgsSize_) {
// in beam search, here only reserved the top 1 generated result
// for out_links that are not the generated word indices.
batchMachineIdVec_.insert(batchMachineIdVec_.end(),
path.machineIdVec.begin(),
path.machineIdVec.end());
}
} }
starts[i + 1] = generator_.ids.size(); starts[i + 1] = generator_.ids.size();
} }
} else { } else {
for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t i = 0; i < finalPaths_.size(); ++i) {
CHECK(!finalPaths_[i].empty()); CHECK(!finalPaths_[i].empty());
generator_.ids.insert(generator_.ids.begin(), Path& path = finalPaths_[i][0];
finalPaths_[i][0].ids.begin(), generator_.ids.insert(
finalPaths_[i][0].ids.end()); generator_.ids.begin(), path.ids.begin(), path.ids.end());
starts[i + 1] = starts[i] + finalPaths_[i][0].ids.size(); starts[i + 1] = starts[i] + path.ids.size();
} }
} }
} }
...@@ -1364,25 +1359,76 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) { ...@@ -1364,25 +1359,76 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) {
} }
} }
void RecurrentGradientMachine::createDataOutlink( void RecurrentGradientMachine::createDataOutlinkSelRowsInfo(
std::vector<int>& machineIdVec) { bool isSeq, std::vector<Argument>& outArgs) {
size_t seqNum = batchMachineIdVec_.clear();
getBeamSize() > 1UL ? finalPaths_.size() : finalPaths_[0].size();
std::vector<int> starts(seqNum + 1, 0); size_t seqIdx = 0;
for (size_t i = 0; i < seqNum; ++i) { for (size_t i = 0; i < finalPaths_.size(); ++i) {
size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size() for (size_t j = 0; j < finalPaths_[i].size(); ++j) {
: finalPaths_[0][i].ids.size(); std::vector<int>& machineIdVec = finalPaths_[i][j].machineIdVec;
starts[i + 1] = starts[i] + seqLen; if (isSeq) {
for (size_t i = 0; i < machineIdVec.size(); ++i) {
size_t rowId = machineIdVec[i];
int* seqPos =
outArgs[i].sequenceStartPositions->getMutableData(false);
batchMachineIdVec_.push_back(seqPos[rowId]);
}
} else {
batchMachineIdVec_.insert(
batchMachineIdVec_.end(), machineIdVec.begin(), machineIdVec.end());
}
seqIdx++;
}
}
}
void RecurrentGradientMachine::createDataOutlinkCopySizeInfo(
bool isSeq, std::vector<Argument>& outArgs, std::vector<int>& copySize) {
size_t totalSeqNum = std::accumulate(
finalPaths_.begin(),
finalPaths_.end(),
0UL,
[](size_t a, const std::vector<Path>& b) { return a + b.size(); });
copySize.resize(totalSeqNum, 1);
batchMachineStartPos_.resize(totalSeqNum + 1, 0);
if (isSeq) {
ICpuGpuVectorPtr inputSeqStartPos = outArgs[0].sequenceStartPositions;
CHECK_EQ(static_cast<size_t>(inputSeqStartPos->getSize() - 1),
getBeamSize() > 1 ? finalPaths_.size() : finalPaths_[0].size());
int* starts = inputSeqStartPos->getMutableData(false);
int seqId = 0;
for (int i = 0; i < finalPaths_.size(); ++i) {
for (int j = 0; j < finalPaths_[i].size(); ++j) {
copySize[seqId] = getBeamSize() > 1 ? starts[i + 1] - starts[i]
: starts[j + 1] - starts[j];
batchMachineStartPos_[seqId + 1] =
batchMachineStartPos_[seqId] + finalPaths_[i][j].ids.size();
seqId++;
}
}
} else {
for (size_t i = 0; i < finalPaths_[0].size(); ++i)
batchMachineStartPos_[i + 1] =
batchMachineStartPos_[i] + finalPaths_[0][i].ids.size();
} }
}
void RecurrentGradientMachine::createDataOutlink() {
for (size_t i = 0; i < dataArgsSize_; i++) { for (size_t i = 0; i < dataArgsSize_; i++) {
bool isSeq = dataArgsFrame_[i][0].hasSeq();
std::vector<int> copySize;
createDataOutlinkCopySizeInfo(isSeq, dataArgsFrame_[i], copySize);
createDataOutlinkSelRowsInfo(isSeq, dataArgsFrame_[i]);
dataArgs_[i].concat(dataArgsFrame_[i], dataArgs_[i].concat(dataArgsFrame_[i],
machineIdVec, batchMachineIdVec_,
starts, batchMachineStartPos_,
copySize,
useGpu_, useGpu_,
HPPL_STREAM_1, HPPL_STREAM_1,
PASS_TEST); PASS_TEST);
auto dataAgent = auto dataAgent =
dynamic_cast<DataLayer*>(outFrameLines_[i + 1].agentLayer.get()); dynamic_cast<DataLayer*>(outFrameLines_[i + 1].agentLayer.get());
CHECK_NOTNULL(dataAgent); CHECK_NOTNULL(dataAgent);
......
...@@ -190,7 +190,7 @@ public: ...@@ -190,7 +190,7 @@ public:
std::vector<int> ids; std::vector<int> ids;
/** /**
* @brief idsProb, log probability of each generated words. * @brief idsProb, log probability of each generated word.
*/ */
std::vector<real> idsProb; std::vector<real> idsProb;
...@@ -472,15 +472,43 @@ private: ...@@ -472,15 +472,43 @@ private:
void copyDataOutlinkFrame(size_t machineCur); void copyDataOutlinkFrame(size_t machineCur);
/* /*
* @brief In generation, if the layer group has more than 1 outlink, outlinks * @brief In generation, if the layer group has more than 1 outlink, outlink
* except the first one are data outlinks. This function creates the data * except the first one is a data outlink. In RecurrentLayerGroup, each time
* outlinks. * step is a separate Network, outputs of a layer inside the
* @note In beam search, only one generated sequence with the hightest log * RecurrentLayerGroup are stored in separate Arguments. If one layer is
* probabilites are retained. * specified as an outlink of RecurrentLayerGroup. This function will
* @param machineIdVec : select a row of output matrix in each frame * collect outputs in each time step of each generated sequence which are
* that the generation process expanded. * dispersed in separate Arguments to form a new single Argument as output of
* RecurrentLayerGroup.
*/ */
void createDataOutlink(std::vector<int>& machineIdVec); void createDataOutlink();
/*
* @brief decide to select how many rows from the Matrix stored the forward
* pass results from a start position.
*
* @param isSeq: a flag indicating whetehr the layer to be output of the
* RecurrentGradientMachine is a sequence or not
* @param outArgs: all of the the returned Arguments of the forward pass
* during the generation process.
* @param copySize: the returned result, number of rows to select from the
* Matrix stored the forward pass results from a start position.
*/
void createDataOutlinkCopySizeInfo(bool isSeq,
std::vector<Argument>& outArgs,
std::vector<int>& copySize);
/*
* @brief decide index of the start row for each time step of a generated
* sequence in Matrix stored the entire beam search batch's forward pass
* results.
*
* @param isSeq: a flag indicating whether the layer to be output of the
* RecurrentGradientMachine is a sequence or not
* @param outArgs: all of the returned Arguments of the forward pass
* during the generation process.
*/
void createDataOutlinkSelRowsInfo(bool isSeq, std::vector<Argument>& outArgs);
/* /*
* @brief used in beam search, connect previous frame to form recurrent link * @brief used in beam search, connect previous frame to form recurrent link
...@@ -543,6 +571,7 @@ private: ...@@ -543,6 +571,7 @@ private:
std::vector<int> topIds_; std::vector<int> topIds_;
std::vector<int> seqIds_; std::vector<int> seqIds_;
std::vector<int> batchMachineIdVec_; std::vector<int> batchMachineIdVec_;
std::vector<int> batchMachineStartPos_;
std::vector<std::vector<Path>> finalPaths_; std::vector<std::vector<Path>> finalPaths_;
std::vector<real> minFinalPathLogProb_; std::vector<real> minFinalPathLogProb_;
BeamSearchControlCallbacks* beamSearchCtrlCallbacks_; BeamSearchControlCallbacks* beamSearchCtrlCallbacks_;
......
...@@ -44,6 +44,7 @@ endfunction() ...@@ -44,6 +44,7 @@ endfunction()
add_subdirectory(math) add_subdirectory(math)
cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(gather_test SRCS gather_test.cc DEPS tensor)
op_library(gather_op SRCS gather_op.cc gather_op.cu)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <cstring> #include <cstring>
#include "paddle/framework/ddim.h" #include "paddle/framework/ddim.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/tensor.h" #include "paddle/framework/tensor.h"
#include "paddle/platform/place.h" #include "paddle/platform/place.h"
...@@ -25,13 +26,13 @@ namespace operators { ...@@ -25,13 +26,13 @@ namespace operators {
// Implementation of CPU copy // Implementation of CPU copy
template <typename T> template <typename T>
void CPUGather(const T* params, const int* indices, const int slice_size, void CPUGather(const T* src, const int* indices, const int slice_size,
const int index_size, T* output) { const int index_size, T* output) {
const size_t slice_bytes = slice_size * sizeof(T); const size_t slice_bytes = slice_size * sizeof(T);
for (int i = 0; i < index_size; ++i) { for (int i = 0; i < index_size; ++i) {
int index_ = indices[i]; int index_ = indices[i];
memcpy(output + i * slice_size, params + index_ * slice_size, slice_bytes); memcpy(output + i * slice_size, src + index_ * slice_size, slice_bytes);
} }
} }
...@@ -55,7 +56,7 @@ void Gather(const platform::Place& place, const paddle::framework::Tensor* src, ...@@ -55,7 +56,7 @@ void Gather(const platform::Place& place, const paddle::framework::Tensor* src,
int index_size = index->dims()[0]; int index_size = index->dims()[0];
auto src_dims = src->dims(); auto src_dims = src->dims();
paddle::framework::DDim output_dims(src_dims); framework::DDim output_dims(src_dims);
output_dims[0] = index_size; output_dims[0] = index_size;
// slice size // slice size
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/gather_op.h"
#include "paddle/framework/ddim.h"
namespace paddle {
namespace operators {
class GatherOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
int batch_size = ctx.Input<Tensor>("Index")->dims()[0];
PADDLE_ENFORCE_GE(batch_size, 0, "Batch size must be >0");
framework::DDim output_dims(ctx.Input<Tensor>("X")->dims());
output_dims[0] = batch_size;
ctx.Output<Tensor>("Out")->Resize(output_dims);
}
};
class GatherGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
auto X_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto X = ctx.Input<Tensor>("X");
X_grad->Resize(X->dims());
}
};
class GatherOpMaker : public framework::OpProtoAndCheckerMaker {
public:
GatherOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The source input of gather op");
AddInput("Index", "The index input of gather op");
AddOutput("Out", "The output of add op");
AddComment(R"DOC(
Gather Operator by selecting from the first axis,
Out = X[Index]
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(gather, ops::GatherOp, ops::GatherOpMaker, gather_grad,
ops::GatherGradOp);
REGISTER_OP_CPU_KERNEL(gather,
ops::GatherOpKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
gather_grad,
ops::GatherGradientOpKernel<paddle::platform::CPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/gather_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(gather,
ops::GatherOpKernel<paddle::platform::GPUPlace, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "gather.h"
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "scatter.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename Place, typename T>
class GatherOpKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *X = ctx.Input<Tensor>("X");
auto *Index = ctx.Input<Tensor>("Index");
auto *Y = ctx.Output<Tensor>("Out");
Y->mutable_data<T>(ctx.GetPlace());
Gather<T>(ctx.GetPlace(), X, Index, Y);
}
};
template <typename Place, typename T>
class GatherGradientOpKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *Index = ctx.Input<Tensor>("Index");
auto *dX = ctx.Output<Tensor>(framework::GradVarName("X"));
auto *dO = ctx.Input<Tensor>(framework::GradVarName("Out"));
dX->mutable_data<T>(ctx.GetPlace());
ScatterUpdate<T>(ctx.GetPlace(), dO, Index, dX);
}
};
} // namespace operators
} // namespace paddle
...@@ -276,17 +276,21 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src, ...@@ -276,17 +276,21 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src,
void Argument::concat(const std::vector<Argument>& args, void Argument::concat(const std::vector<Argument>& args,
const std::vector<int>& selectRows, const std::vector<int>& selectRows,
const std::vector<int>& seqStartPos, const std::vector<int>& seqStartPos,
const std::vector<int>& copySize,
bool useGpu, bool useGpu,
hl_stream_t stream, hl_stream_t stream,
PassType passType) { PassType passType) {
CHECK(!subSequenceStartPositions) CHECK(!subSequenceStartPositions)
<< "undefined behavior for subsequence positions"; << "undefined behavior for subsequence positions";
size_t batchSize = selectRows.size(); size_t batchSize = 0;
for (size_t i = 0; i < copySize.size(); ++i)
batchSize += copySize[i] * (seqStartPos[i + 1] - seqStartPos[i]);
auto copyArg = [batchSize, stream](MatrixPtr& dst, auto copyArg = [batchSize, stream](MatrixPtr& dst,
MatrixPtr src, MatrixPtr src,
int startRow, int desStartRow,
int pos, int srcStartRow,
int size, int size,
bool useGpu) { bool useGpu) {
if (!src) { if (!src) {
...@@ -300,14 +304,14 @@ void Argument::concat(const std::vector<Argument>& args, ...@@ -300,14 +304,14 @@ void Argument::concat(const std::vector<Argument>& args,
dst->resize(batchSize, width); dst->resize(batchSize, width);
} }
MatrixPtr tmpMatrix = dst->subMatrix(startRow, size); MatrixPtr tmpMatrix = dst->subMatrix(desStartRow, size);
tmpMatrix->copyFrom(*src->subMatrix(pos, size), stream); tmpMatrix->copyFrom(*src->subMatrix(srcStartRow, size), stream);
}; };
auto copyIds = [batchSize, stream](IVectorPtr& dst, auto copyIds = [batchSize, stream](IVectorPtr& dst,
const IVectorPtr& src, const IVectorPtr& src,
int startRow, int desStartRow,
int pos, int srcStartRow,
int size, int size,
bool useGpu) { bool useGpu) {
if (!src) { if (!src) {
...@@ -315,13 +319,14 @@ void Argument::concat(const std::vector<Argument>& args, ...@@ -315,13 +319,14 @@ void Argument::concat(const std::vector<Argument>& args,
return; return;
} }
IVector::resizeOrCreate(dst, batchSize, useGpu); IVector::resizeOrCreate(dst, batchSize, useGpu);
dst->subVec(startRow, size)->copyFrom(*src->subVec(pos, size), stream); dst->subVec(desStartRow, size)
->copyFrom(*src->subVec(srcStartRow, size), stream);
}; };
auto copyStrs = [batchSize, stream](SVectorPtr& dst, auto copyStrs = [batchSize, stream](SVectorPtr& dst,
const SVectorPtr& src, const SVectorPtr& src,
int startRow, int desStartRow,
int pos, int srcStartRow,
int size, int size,
bool useGpu) { bool useGpu) {
if (!src) { if (!src) {
...@@ -333,30 +338,31 @@ void Argument::concat(const std::vector<Argument>& args, ...@@ -333,30 +338,31 @@ void Argument::concat(const std::vector<Argument>& args,
} else { } else {
dst->resize(batchSize); dst->resize(batchSize);
} }
std::copy( std::copy(src->begin() + srcStartRow,
src->begin() + pos, src->begin() + pos + size, dst->begin() + startRow); src->begin() + srcStartRow + size,
dst->begin() + desStartRow);
}; };
dataId = args[0].dataId; dataId = args[0].dataId;
CHECK_NE(seqStartPos.size(), 0UL); CHECK_NE(seqStartPos.size(), 0UL);
size_t sampleNum = seqStartPos.size() - 1; int desStartRow = 0;
for (size_t i = 0; i < sampleNum; ++i) { for (size_t i = 0; i < copySize.size(); ++i) {
int startPos = seqStartPos[i]; int startPos = seqStartPos[i];
int endPos = seqStartPos[i + 1]; int endPos = seqStartPos[i + 1];
CHECK_GE(args.size(), static_cast<size_t>(endPos - startPos)); CHECK_GE(args.size(), static_cast<size_t>(endPos - startPos));
for (int j = startPos; j < endPos; ++j) { for (int j = startPos; j < endPos; ++j) {
const Argument& arg = args[j - startPos]; const Argument& arg = args[j - startPos];
CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have" CHECK_EQ(arg.dataId, dataId) << "Arguments to concatenate should have "
<< " same dataId"; << "the same dataId.";
const int copySize = 1; const int srcStartRow = selectRows[j];
const int rowIdx = selectRows[j]; copyArg(in, arg.in, desStartRow, srcStartRow, copySize[i], useGpu);
copyArg(in, arg.in, j, rowIdx, copySize, useGpu); copyArg(value, arg.value, desStartRow, srcStartRow, copySize[i], useGpu);
copyArg(value, arg.value, j, rowIdx, copySize, useGpu);
if (passType != PASS_TEST) { if (passType != PASS_TEST) {
copyArg(grad, arg.grad, j, rowIdx, copySize, useGpu); copyArg(grad, arg.grad, desStartRow, srcStartRow, copySize[i], useGpu);
} }
copyIds(ids, arg.ids, j, rowIdx, copySize, useGpu); copyIds(ids, arg.ids, desStartRow, srcStartRow, copySize[i], useGpu);
copyStrs(strs, arg.strs, j, rowIdx, copySize, useGpu); copyStrs(strs, arg.strs, desStartRow, srcStartRow, copySize[i], useGpu);
desStartRow += copySize[i];
} }
} }
ICpuGpuVector::resizeOrCreate( ICpuGpuVector::resizeOrCreate(
......
...@@ -240,6 +240,7 @@ struct Argument { ...@@ -240,6 +240,7 @@ struct Argument {
void concat(const std::vector<Argument>& args, void concat(const std::vector<Argument>& args,
const std::vector<int>& selectRows, const std::vector<int>& selectRows,
const std::vector<int>& seqStartPos, const std::vector<int>& seqStartPos,
const std::vector<int>& copySize,
bool useGpu, bool useGpu,
hl_stream_t stream, hl_stream_t stream,
PassType passType); PassType passType);
......
...@@ -338,7 +338,8 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, ...@@ -338,7 +338,8 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name,
in_links_count += 1 in_links_count += 1
layer_name = MakeLayerNameInParentSubmodel(name) layer_name = MakeLayerNameInParentSubmodel(name)
layer = g_layer_map[layer_name] layer = g_layer_map[layer_name]
ScatterAgentLayer(name=name, size=layer.size) ScatterAgentLayer(
name=name, size=layer.size, width=layer.width, height=layer.height)
pair = g_current_submodel.in_links.add() pair = g_current_submodel.in_links.add()
pair.layer_name = layer_name pair.layer_name = layer_name
...@@ -2197,8 +2198,8 @@ class MaxOutLayer(LayerBase): ...@@ -2197,8 +2198,8 @@ class MaxOutLayer(LayerBase):
maxout_conf = self.config.inputs[0].maxout_conf maxout_conf = self.config.inputs[0].maxout_conf
parse_maxout(self.inputs[0].maxout, input_layer.name, maxout_conf) parse_maxout(self.inputs[0].maxout, input_layer.name, maxout_conf)
out_channels = maxout_conf.image_conf.channels / maxout_conf.groups out_channels = maxout_conf.image_conf.channels / maxout_conf.groups
self.set_cnn_layer(name, g_layer_map[input_layer.name].height, self.set_cnn_layer(name, maxout_conf.image_conf.img_size_y,
g_layer_map[input_layer.name].width, out_channels) maxout_conf.image_conf.img_size, out_channels)
@config_layer('row_conv') @config_layer('row_conv')
...@@ -2405,9 +2406,11 @@ class GatherAgentLayer(LayerBase): ...@@ -2405,9 +2406,11 @@ class GatherAgentLayer(LayerBase):
@config_layer('scatter_agent') @config_layer('scatter_agent')
class ScatterAgentLayer(LayerBase): class ScatterAgentLayer(LayerBase):
def __init__(self, name, size, device=None): def __init__(self, name, size, width=None, height=None, device=None):
super(ScatterAgentLayer, self).__init__( super(ScatterAgentLayer, self).__init__(
name, 'scatter_agent', size, inputs=[], device=device) name, 'scatter_agent', size, inputs=[], device=device)
if height and width:
self.set_layer_height_width(height, width)
@config_layer('multiplex') @config_layer('multiplex')
......
...@@ -16,11 +16,13 @@ import functools ...@@ -16,11 +16,13 @@ import functools
import collections import collections
import inspect import inspect
import paddle.trainer.config_parser as cp
from paddle.trainer.config_parser import * from paddle.trainer.config_parser import *
from .activations import LinearActivation, SigmoidActivation, TanhActivation, \ from .activations import LinearActivation, SigmoidActivation, TanhActivation, \
ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation
from .evaluators import * from .evaluators import *
from .poolings import MaxPooling, AvgPooling, BasePoolingType from .poolings import MaxPooling, AvgPooling, BasePoolingType, \
CudnnAvgPooling, CudnnMaxPooling
from .attrs import * from .attrs import *
from .default_decorators import * from .default_decorators import *
...@@ -330,6 +332,14 @@ class LayerOutput(object): ...@@ -330,6 +332,14 @@ class LayerOutput(object):
self.outputs = outputs self.outputs = outputs
self.reverse = reverse self.reverse = reverse
@property
def width(self):
return cp.g_layer_map[self.full_name].width
@property
def height(self):
return cp.g_layer_map[self.full_name].height
def set_input(self, input): def set_input(self, input):
""" """
Set the input for a memory layer. Can only be used for memory layer Set the input for a memory layer. Can only be used for memory layer
...@@ -911,7 +921,13 @@ def data_layer(name, size, height=None, width=None, layer_attr=None): ...@@ -911,7 +921,13 @@ def data_layer(name, size, height=None, width=None, layer_attr=None):
width=width, width=width,
**ExtraLayerAttribute.to_kwargs(layer_attr)) **ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(name, LayerType.DATA, size=size) num_filters = None
if height is not None and width is not None:
num_filters = size / (width * height)
assert num_filters * width * height == size, \
"size=%s width=%s height=%s" % (size, width, height)
return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters)
@wrap_name_default("embedding") @wrap_name_default("embedding")
...@@ -2571,6 +2587,10 @@ def img_pool_layer(input, ...@@ -2571,6 +2587,10 @@ def img_pool_layer(input,
assert input.num_filters is not None assert input.num_filters is not None
num_channels = input.num_filters num_channels = input.num_filters
assert type(pool_type) in [AvgPooling, MaxPooling, CudnnAvgPooling,
CudnnMaxPooling], \
"only (Cudnn)AvgPooling, (Cudnn)MaxPooling are supported"
if pool_type is None: if pool_type is None:
pool_type = MaxPooling() pool_type = MaxPooling()
elif isinstance(pool_type, AvgPooling): elif isinstance(pool_type, AvgPooling):
...@@ -2580,7 +2600,6 @@ def img_pool_layer(input, ...@@ -2580,7 +2600,6 @@ def img_pool_layer(input,
if ( if (
isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \ isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
else pool_type.name else pool_type.name
pool_size_y = pool_size if pool_size_y is None else pool_size_y pool_size_y = pool_size if pool_size_y is None else pool_size_y
stride_y = stride if stride_y is None else stride_y stride_y = stride if stride_y is None else stride_y
padding_y = padding if padding_y is None else padding_y padding_y = padding if padding_y is None else padding_y
...@@ -4204,8 +4223,7 @@ def conv_operator(img, ...@@ -4204,8 +4223,7 @@ def conv_operator(img,
num_channels = img.num_filters num_channels = img.num_filters
assert isinstance(filter, LayerOutput) assert isinstance(filter, LayerOutput)
if filter.size is not None: assert filter.size is not None
filter.size = filter_size * filter_size_y * num_filters * num_channels
opCls = ConvTransOperator if trans else ConvOperator opCls = ConvTransOperator if trans else ConvOperator
...@@ -4916,7 +4934,6 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): ...@@ -4916,7 +4934,6 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None):
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
assert input.layer_type == LayerType.CONV_LAYER
assert isinstance(input.activation, LinearActivation) assert isinstance(input.activation, LinearActivation)
assert groups > 1 assert groups > 1
if num_channels is None: if num_channels is None:
...@@ -6219,11 +6236,11 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1): ...@@ -6219,11 +6236,11 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1):
@wrap_bias_attr_default() @wrap_bias_attr_default()
def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None): def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None):
""" """
A layer applies a linear transformation to each element in each row of A layer applies a linear transformation to each element in each row of
the input matrix. For each element, the layer first re-scale it and then the input matrix. For each element, the layer first re-scale it and then
adds a bias to it. adds a bias to it.
This layer is very like the SlopeInterceptLayer, except the scale and This layer is very like the SlopeInterceptLayer, except the scale and
bias are trainable. bias are trainable.
.. math:: .. math::
......
...@@ -13,6 +13,7 @@ py_test(test_add_two_op SRCS test_add_two_op.py) ...@@ -13,6 +13,7 @@ py_test(test_add_two_op SRCS test_add_two_op.py)
py_test(test_sigmoid_op SRCS test_sigmoid_op.py) py_test(test_sigmoid_op SRCS test_sigmoid_op.py)
py_test(test_softmax_op SRCS test_softmax_op.py) py_test(test_softmax_op SRCS test_softmax_op.py)
py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py) py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py)
py_test(test_gather_op SRCS test_gather_op.py)
py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py) py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py)
py_test(gradient_checker SRCS gradient_checker.py) py_test(gradient_checker SRCS gradient_checker.py)
......
import unittest
from op_test_util import OpTestMeta
from gradient_checker import GradientChecker, create_op
import numpy
import paddle.v2.framework.core as core
from paddle.v2.framework.op import Operator
class TestGatherOp(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = "gather"
xnp = numpy.random.random((10, 20)).astype("float32")
self.inputs = {
'X': xnp,
'Index': numpy.array([1, 3, 5]).astype("int32")
}
self.outputs = {'Out': self.inputs['X'][self.inputs['Index']]}
class TestGatherGradOp(GradientChecker):
def test_gather_grad(self):
print 'creating op'
op = create_op("gather")
print 'creating op done'
xnp = numpy.random.random((10, 20)).astype("float32")
inputs = {'X': xnp, 'Index': numpy.array([1, 3, 5]).astype("int32")}
print 'correct before check gradient'
self.check_grad(op, inputs, set("X"), "Out")
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册