diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 325a6f7532ffebb5855fd73a12307d0b32f04272..21c9a42586703d0b23831c1f48c098ab36dc361f 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -45,6 +45,7 @@ cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python backward sgd_op + gather_op add_op mul_op rowwise_add_op diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 265f06103eddab23c86aba43424b58554faf9cf9..4a35b58d763768766d44665b0e4afabf1581b221 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -43,6 +43,7 @@ USE_OP_ITSELF(recurrent_op); USE_OP(gaussian_random); USE_OP(uniform_random); USE_OP(lookup_table); +USE_CPU_ONLY_OP(gather); namespace paddle { namespace framework { diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index 157b1ab45163a94a81d859dbcb7a52ae8edae439..1829f72a87054d9e4ead97962ca1f6738e585787 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -1012,11 +1012,6 @@ void RecurrentGradientMachine::generateSequence() { /* width */ resultNum, false, /* useGpu */ false); - Matrix::resizeOrCreate(generator_.outArg.value, - /* height */ maxGenWordCount, - /* width */ 1, - false, - /* useGpu */ false); } ICpuGpuVector::resizeOrCreate(generator_.outArg.sequenceStartPositions, numSequences + 1, @@ -1026,7 +1021,7 @@ void RecurrentGradientMachine::generateSequence() { } else { oneWaySearch(numSequences); } - if (dataArgsSize_) createDataOutlink(batchMachineIdVec_); + if (dataArgsSize_) createDataOutlink(); size_t size = generator_.ids.size(); generator_.outArg.ids->resize(size); @@ -1106,6 +1101,7 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) { } batchMachineIdVec_.clear(); + batchMachineStartPos_.clear(); int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false); starts[0] = 0; generator_.ids.clear(); @@ -1312,13 +1308,20 @@ void RecurrentGradientMachine::fillGenOutputs() { finalPaths_[i].resize(minFinalPathsSize); } - batchMachineIdVec_.clear(); generator_.ids.clear(); int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false); starts[0] = 0; if (numResults > 1) { - real* probs = generator_.outArg.in->getData(); + int idsProbSaveSize = 0; + for (auto inSeq : finalPaths_) { + for (auto path : inSeq) idsProbSaveSize += path.ids.size(); + idsProbSaveSize += inSeq.size(); + } + Matrix::resizeOrCreate( + generator_.outArg.value, idsProbSaveSize, 1, false, false); real* idsProb = generator_.outArg.value->getData(); + + real* probs = generator_.outArg.in->getData(); size_t curPos = 0; for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t j = 0; j < finalPaths_[i].size(); ++j) { @@ -1333,24 +1336,16 @@ void RecurrentGradientMachine::fillGenOutputs() { curPos += genLen; idsProb[curPos++] = -1.0; probs[i * numResults + j] = path.logProb; - - if (!j && dataArgsSize_) { - // in beam search, here only reserved the top 1 generated result - // for out_links that are not the generated word indices. - batchMachineIdVec_.insert(batchMachineIdVec_.end(), - path.machineIdVec.begin(), - path.machineIdVec.end()); - } } starts[i + 1] = generator_.ids.size(); } } else { for (size_t i = 0; i < finalPaths_.size(); ++i) { CHECK(!finalPaths_[i].empty()); - generator_.ids.insert(generator_.ids.begin(), - finalPaths_[i][0].ids.begin(), - finalPaths_[i][0].ids.end()); - starts[i + 1] = starts[i] + finalPaths_[i][0].ids.size(); + Path& path = finalPaths_[i][0]; + generator_.ids.insert( + generator_.ids.begin(), path.ids.begin(), path.ids.end()); + starts[i + 1] = starts[i] + path.ids.size(); } } } @@ -1364,25 +1359,76 @@ void RecurrentGradientMachine::copyDataOutlinkFrame(size_t machineCur) { } } -void RecurrentGradientMachine::createDataOutlink( - std::vector& machineIdVec) { - size_t seqNum = - getBeamSize() > 1UL ? finalPaths_.size() : finalPaths_[0].size(); - std::vector starts(seqNum + 1, 0); - for (size_t i = 0; i < seqNum; ++i) { - size_t seqLen = getBeamSize() > 1UL ? finalPaths_[i][0].ids.size() - : finalPaths_[0][i].ids.size(); - starts[i + 1] = starts[i] + seqLen; +void RecurrentGradientMachine::createDataOutlinkSelRowsInfo( + bool isSeq, std::vector& outArgs) { + batchMachineIdVec_.clear(); + + size_t seqIdx = 0; + for (size_t i = 0; i < finalPaths_.size(); ++i) { + for (size_t j = 0; j < finalPaths_[i].size(); ++j) { + std::vector& machineIdVec = finalPaths_[i][j].machineIdVec; + if (isSeq) { + for (size_t i = 0; i < machineIdVec.size(); ++i) { + size_t rowId = machineIdVec[i]; + int* seqPos = + outArgs[i].sequenceStartPositions->getMutableData(false); + batchMachineIdVec_.push_back(seqPos[rowId]); + } + } else { + batchMachineIdVec_.insert( + batchMachineIdVec_.end(), machineIdVec.begin(), machineIdVec.end()); + } + seqIdx++; + } + } +} + +void RecurrentGradientMachine::createDataOutlinkCopySizeInfo( + bool isSeq, std::vector& outArgs, std::vector& copySize) { + size_t totalSeqNum = std::accumulate( + finalPaths_.begin(), + finalPaths_.end(), + 0UL, + [](size_t a, const std::vector& b) { return a + b.size(); }); + copySize.resize(totalSeqNum, 1); + + batchMachineStartPos_.resize(totalSeqNum + 1, 0); + if (isSeq) { + ICpuGpuVectorPtr inputSeqStartPos = outArgs[0].sequenceStartPositions; + CHECK_EQ(static_cast(inputSeqStartPos->getSize() - 1), + getBeamSize() > 1 ? finalPaths_.size() : finalPaths_[0].size()); + int* starts = inputSeqStartPos->getMutableData(false); + int seqId = 0; + for (int i = 0; i < finalPaths_.size(); ++i) { + for (int j = 0; j < finalPaths_[i].size(); ++j) { + copySize[seqId] = getBeamSize() > 1 ? starts[i + 1] - starts[i] + : starts[j + 1] - starts[j]; + batchMachineStartPos_[seqId + 1] = + batchMachineStartPos_[seqId] + finalPaths_[i][j].ids.size(); + seqId++; + } + } + } else { + for (size_t i = 0; i < finalPaths_[0].size(); ++i) + batchMachineStartPos_[i + 1] = + batchMachineStartPos_[i] + finalPaths_[0][i].ids.size(); } +} +void RecurrentGradientMachine::createDataOutlink() { for (size_t i = 0; i < dataArgsSize_; i++) { + bool isSeq = dataArgsFrame_[i][0].hasSeq(); + std::vector copySize; + createDataOutlinkCopySizeInfo(isSeq, dataArgsFrame_[i], copySize); + createDataOutlinkSelRowsInfo(isSeq, dataArgsFrame_[i]); + dataArgs_[i].concat(dataArgsFrame_[i], - machineIdVec, - starts, + batchMachineIdVec_, + batchMachineStartPos_, + copySize, useGpu_, HPPL_STREAM_1, PASS_TEST); - auto dataAgent = dynamic_cast(outFrameLines_[i + 1].agentLayer.get()); CHECK_NOTNULL(dataAgent); diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h index fb3fc5877ac96323e891f800db80af83b6809831..c16fae6d1770e616fdcfabd440624c9be9753c91 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h @@ -190,7 +190,7 @@ public: std::vector ids; /** - * @brief idsProb, log probability of each generated words. + * @brief idsProb, log probability of each generated word. */ std::vector idsProb; @@ -472,15 +472,43 @@ private: void copyDataOutlinkFrame(size_t machineCur); /* - * @brief In generation, if the layer group has more than 1 outlink, outlinks - * except the first one are data outlinks. This function creates the data - * outlinks. - * @note In beam search, only one generated sequence with the hightest log - * probabilites are retained. - * @param machineIdVec : select a row of output matrix in each frame - * that the generation process expanded. + * @brief In generation, if the layer group has more than 1 outlink, outlink + * except the first one is a data outlink. In RecurrentLayerGroup, each time + * step is a separate Network, outputs of a layer inside the + * RecurrentLayerGroup are stored in separate Arguments. If one layer is + * specified as an outlink of RecurrentLayerGroup. This function will + * collect outputs in each time step of each generated sequence which are + * dispersed in separate Arguments to form a new single Argument as output of + * RecurrentLayerGroup. */ - void createDataOutlink(std::vector& machineIdVec); + void createDataOutlink(); + + /* + * @brief decide to select how many rows from the Matrix stored the forward + * pass results from a start position. + * + * @param isSeq: a flag indicating whetehr the layer to be output of the + * RecurrentGradientMachine is a sequence or not + * @param outArgs: all of the the returned Arguments of the forward pass + * during the generation process. + * @param copySize: the returned result, number of rows to select from the + * Matrix stored the forward pass results from a start position. + */ + void createDataOutlinkCopySizeInfo(bool isSeq, + std::vector& outArgs, + std::vector& copySize); + + /* + * @brief decide index of the start row for each time step of a generated + * sequence in Matrix stored the entire beam search batch's forward pass + * results. + * + * @param isSeq: a flag indicating whether the layer to be output of the + * RecurrentGradientMachine is a sequence or not + * @param outArgs: all of the returned Arguments of the forward pass + * during the generation process. + */ + void createDataOutlinkSelRowsInfo(bool isSeq, std::vector& outArgs); /* * @brief used in beam search, connect previous frame to form recurrent link @@ -543,6 +571,7 @@ private: std::vector topIds_; std::vector seqIds_; std::vector batchMachineIdVec_; + std::vector batchMachineStartPos_; std::vector> finalPaths_; std::vector minFinalPathLogProb_; BeamSearchControlCallbacks* beamSearchCtrlCallbacks_; diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 8d2d8a1141188ca86c7f904947422eb1bb23f72f..ba896b80c55637978858975b1755318012dc3c75 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -44,6 +44,7 @@ endfunction() add_subdirectory(math) cc_test(gather_test SRCS gather_test.cc DEPS tensor) +op_library(gather_op SRCS gather_op.cc gather_op.cu) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h index d6e6990394e46ba06c4bacfe33ca522f3ff1413a..92fb51ec17709bc6f8abb2f516a9240fb5dc3a77 100644 --- a/paddle/operators/gather.h +++ b/paddle/operators/gather.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include "paddle/framework/ddim.h" +#include "paddle/framework/eigen.h" #include "paddle/framework/tensor.h" #include "paddle/platform/place.h" @@ -25,13 +26,13 @@ namespace operators { // Implementation of CPU copy template -void CPUGather(const T* params, const int* indices, const int slice_size, +void CPUGather(const T* src, const int* indices, const int slice_size, const int index_size, T* output) { const size_t slice_bytes = slice_size * sizeof(T); for (int i = 0; i < index_size; ++i) { int index_ = indices[i]; - memcpy(output + i * slice_size, params + index_ * slice_size, slice_bytes); + memcpy(output + i * slice_size, src + index_ * slice_size, slice_bytes); } } @@ -55,7 +56,7 @@ void Gather(const platform::Place& place, const paddle::framework::Tensor* src, int index_size = index->dims()[0]; auto src_dims = src->dims(); - paddle::framework::DDim output_dims(src_dims); + framework::DDim output_dims(src_dims); output_dims[0] = index_size; // slice size diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..123bed296c462c30bddd3bfbd530098fdbfe4856 --- /dev/null +++ b/paddle/operators/gather_op.cc @@ -0,0 +1,72 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/gather_op.h" +#include "paddle/framework/ddim.h" + +namespace paddle { +namespace operators { + +class GatherOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + int batch_size = ctx.Input("Index")->dims()[0]; + PADDLE_ENFORCE_GE(batch_size, 0, "Batch size must be >0"); + framework::DDim output_dims(ctx.Input("X")->dims()); + output_dims[0] = batch_size; + ctx.Output("Out")->Resize(output_dims); + } +}; + +class GatherGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &ctx) const override { + auto X_grad = ctx.Output(framework::GradVarName("X")); + auto X = ctx.Input("X"); + + X_grad->Resize(X->dims()); + } +}; + +class GatherOpMaker : public framework::OpProtoAndCheckerMaker { + public: + GatherOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "The source input of gather op"); + AddInput("Index", "The index input of gather op"); + AddOutput("Out", "The output of add op"); + AddComment(R"DOC( +Gather Operator by selecting from the first axis, + +Out = X[Index] +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(gather, ops::GatherOp, ops::GatherOpMaker, gather_grad, + ops::GatherGradOp); +REGISTER_OP_CPU_KERNEL(gather, + ops::GatherOpKernel); +REGISTER_OP_CPU_KERNEL( + gather_grad, + ops::GatherGradientOpKernel); diff --git a/paddle/operators/gather_op.cu b/paddle/operators/gather_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..3f04a7b3f8142106917975cd1e0413fa1633a298 --- /dev/null +++ b/paddle/operators/gather_op.cu @@ -0,0 +1,20 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/gather_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(gather, + ops::GatherOpKernel); diff --git a/paddle/operators/gather_op.h b/paddle/operators/gather_op.h new file mode 100644 index 0000000000000000000000000000000000000000..381854f301870beadb72d9e9b4eb17ff199960fb --- /dev/null +++ b/paddle/operators/gather_op.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "gather.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" +#include "scatter.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class GatherOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto *X = ctx.Input("X"); + auto *Index = ctx.Input("Index"); + auto *Y = ctx.Output("Out"); + + Y->mutable_data(ctx.GetPlace()); + Gather(ctx.GetPlace(), X, Index, Y); + } +}; + +template +class GatherGradientOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + auto *Index = ctx.Input("Index"); + auto *dX = ctx.Output(framework::GradVarName("X")); + auto *dO = ctx.Input(framework::GradVarName("Out")); + + dX->mutable_data(ctx.GetPlace()); + ScatterUpdate(ctx.GetPlace(), dO, Index, dX); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index 0547ac93cd183afbcede41d280c6b4b16ed7dab1..79d2158334269be2c644c74b202724fabc21a07b 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -276,17 +276,21 @@ int32_t Argument::resizeAndCopyFrom(const Argument& src, void Argument::concat(const std::vector& args, const std::vector& selectRows, const std::vector& seqStartPos, + const std::vector& copySize, bool useGpu, hl_stream_t stream, PassType passType) { CHECK(!subSequenceStartPositions) << "undefined behavior for subsequence positions"; - size_t batchSize = selectRows.size(); + size_t batchSize = 0; + for (size_t i = 0; i < copySize.size(); ++i) + batchSize += copySize[i] * (seqStartPos[i + 1] - seqStartPos[i]); + auto copyArg = [batchSize, stream](MatrixPtr& dst, MatrixPtr src, - int startRow, - int pos, + int desStartRow, + int srcStartRow, int size, bool useGpu) { if (!src) { @@ -300,14 +304,14 @@ void Argument::concat(const std::vector& args, dst->resize(batchSize, width); } - MatrixPtr tmpMatrix = dst->subMatrix(startRow, size); - tmpMatrix->copyFrom(*src->subMatrix(pos, size), stream); + MatrixPtr tmpMatrix = dst->subMatrix(desStartRow, size); + tmpMatrix->copyFrom(*src->subMatrix(srcStartRow, size), stream); }; auto copyIds = [batchSize, stream](IVectorPtr& dst, const IVectorPtr& src, - int startRow, - int pos, + int desStartRow, + int srcStartRow, int size, bool useGpu) { if (!src) { @@ -315,13 +319,14 @@ void Argument::concat(const std::vector& args, return; } IVector::resizeOrCreate(dst, batchSize, useGpu); - dst->subVec(startRow, size)->copyFrom(*src->subVec(pos, size), stream); + dst->subVec(desStartRow, size) + ->copyFrom(*src->subVec(srcStartRow, size), stream); }; auto copyStrs = [batchSize, stream](SVectorPtr& dst, const SVectorPtr& src, - int startRow, - int pos, + int desStartRow, + int srcStartRow, int size, bool useGpu) { if (!src) { @@ -333,30 +338,31 @@ void Argument::concat(const std::vector& args, } else { dst->resize(batchSize); } - std::copy( - src->begin() + pos, src->begin() + pos + size, dst->begin() + startRow); + std::copy(src->begin() + srcStartRow, + src->begin() + srcStartRow + size, + dst->begin() + desStartRow); }; dataId = args[0].dataId; CHECK_NE(seqStartPos.size(), 0UL); - size_t sampleNum = seqStartPos.size() - 1; - for (size_t i = 0; i < sampleNum; ++i) { + int desStartRow = 0; + for (size_t i = 0; i < copySize.size(); ++i) { int startPos = seqStartPos[i]; int endPos = seqStartPos[i + 1]; CHECK_GE(args.size(), static_cast(endPos - startPos)); for (int j = startPos; j < endPos; ++j) { const Argument& arg = args[j - startPos]; - CHECK_EQ(arg.dataId, dataId) << "Arguments in concat should have" - << " same dataId"; - const int copySize = 1; - const int rowIdx = selectRows[j]; - copyArg(in, arg.in, j, rowIdx, copySize, useGpu); - copyArg(value, arg.value, j, rowIdx, copySize, useGpu); + CHECK_EQ(arg.dataId, dataId) << "Arguments to concatenate should have " + << "the same dataId."; + const int srcStartRow = selectRows[j]; + copyArg(in, arg.in, desStartRow, srcStartRow, copySize[i], useGpu); + copyArg(value, arg.value, desStartRow, srcStartRow, copySize[i], useGpu); if (passType != PASS_TEST) { - copyArg(grad, arg.grad, j, rowIdx, copySize, useGpu); + copyArg(grad, arg.grad, desStartRow, srcStartRow, copySize[i], useGpu); } - copyIds(ids, arg.ids, j, rowIdx, copySize, useGpu); - copyStrs(strs, arg.strs, j, rowIdx, copySize, useGpu); + copyIds(ids, arg.ids, desStartRow, srcStartRow, copySize[i], useGpu); + copyStrs(strs, arg.strs, desStartRow, srcStartRow, copySize[i], useGpu); + desStartRow += copySize[i]; } } ICpuGpuVector::resizeOrCreate( diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index d8d7a4398f99a2794c5d25528a7d582f5ed629ba..38797a76f55c311070192bd307103143d67cabca 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -240,6 +240,7 @@ struct Argument { void concat(const std::vector& args, const std::vector& selectRows, const std::vector& seqStartPos, + const std::vector& copySize, bool useGpu, hl_stream_t stream, PassType passType); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 8d71629faa332fc1e099001950c25e879cbedff3..b3d5ef95cc816e5d8493ee88097d9e4b6d45daf8 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -338,7 +338,8 @@ def RecurrentLayerGroupWithoutOutLinksBegin(name, in_links_count += 1 layer_name = MakeLayerNameInParentSubmodel(name) layer = g_layer_map[layer_name] - ScatterAgentLayer(name=name, size=layer.size) + ScatterAgentLayer( + name=name, size=layer.size, width=layer.width, height=layer.height) pair = g_current_submodel.in_links.add() pair.layer_name = layer_name @@ -2197,8 +2198,8 @@ class MaxOutLayer(LayerBase): maxout_conf = self.config.inputs[0].maxout_conf parse_maxout(self.inputs[0].maxout, input_layer.name, maxout_conf) out_channels = maxout_conf.image_conf.channels / maxout_conf.groups - self.set_cnn_layer(name, g_layer_map[input_layer.name].height, - g_layer_map[input_layer.name].width, out_channels) + self.set_cnn_layer(name, maxout_conf.image_conf.img_size_y, + maxout_conf.image_conf.img_size, out_channels) @config_layer('row_conv') @@ -2405,9 +2406,11 @@ class GatherAgentLayer(LayerBase): @config_layer('scatter_agent') class ScatterAgentLayer(LayerBase): - def __init__(self, name, size, device=None): + def __init__(self, name, size, width=None, height=None, device=None): super(ScatterAgentLayer, self).__init__( name, 'scatter_agent', size, inputs=[], device=device) + if height and width: + self.set_layer_height_width(height, width) @config_layer('multiplex') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index c9e3ded65cb8fc8d00d3decac6af2aff2b67a37d..be854c38f76af4cd6d5f628785b3e4dbfc6ebacb 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -16,11 +16,13 @@ import functools import collections import inspect +import paddle.trainer.config_parser as cp from paddle.trainer.config_parser import * from .activations import LinearActivation, SigmoidActivation, TanhActivation, \ ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation from .evaluators import * -from .poolings import MaxPooling, AvgPooling, BasePoolingType +from .poolings import MaxPooling, AvgPooling, BasePoolingType, \ + CudnnAvgPooling, CudnnMaxPooling from .attrs import * from .default_decorators import * @@ -330,6 +332,14 @@ class LayerOutput(object): self.outputs = outputs self.reverse = reverse + @property + def width(self): + return cp.g_layer_map[self.full_name].width + + @property + def height(self): + return cp.g_layer_map[self.full_name].height + def set_input(self, input): """ Set the input for a memory layer. Can only be used for memory layer @@ -911,7 +921,13 @@ def data_layer(name, size, height=None, width=None, layer_attr=None): width=width, **ExtraLayerAttribute.to_kwargs(layer_attr)) - return LayerOutput(name, LayerType.DATA, size=size) + num_filters = None + if height is not None and width is not None: + num_filters = size / (width * height) + assert num_filters * width * height == size, \ + "size=%s width=%s height=%s" % (size, width, height) + + return LayerOutput(name, LayerType.DATA, size=size, num_filters=num_filters) @wrap_name_default("embedding") @@ -2571,6 +2587,10 @@ def img_pool_layer(input, assert input.num_filters is not None num_channels = input.num_filters + assert type(pool_type) in [AvgPooling, MaxPooling, CudnnAvgPooling, + CudnnMaxPooling], \ + "only (Cudnn)AvgPooling, (Cudnn)MaxPooling are supported" + if pool_type is None: pool_type = MaxPooling() elif isinstance(pool_type, AvgPooling): @@ -2580,7 +2600,6 @@ def img_pool_layer(input, if ( isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \ else pool_type.name - pool_size_y = pool_size if pool_size_y is None else pool_size_y stride_y = stride if stride_y is None else stride_y padding_y = padding if padding_y is None else padding_y @@ -4204,8 +4223,7 @@ def conv_operator(img, num_channels = img.num_filters assert isinstance(filter, LayerOutput) - if filter.size is not None: - filter.size = filter_size * filter_size_y * num_filters * num_channels + assert filter.size is not None opCls = ConvTransOperator if trans else ConvOperator @@ -4916,7 +4934,6 @@ def maxout_layer(input, groups, num_channels=None, name=None, layer_attr=None): :return: LayerOutput object. :rtype: LayerOutput """ - assert input.layer_type == LayerType.CONV_LAYER assert isinstance(input.activation, LinearActivation) assert groups > 1 if num_channels is None: @@ -6219,11 +6236,11 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1): @wrap_bias_attr_default() def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None): """ - A layer applies a linear transformation to each element in each row of - the input matrix. For each element, the layer first re-scale it and then + A layer applies a linear transformation to each element in each row of + the input matrix. For each element, the layer first re-scale it and then adds a bias to it. - This layer is very like the SlopeInterceptLayer, except the scale and + This layer is very like the SlopeInterceptLayer, except the scale and bias are trainable. .. math:: diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 3c4d239e4517433bab30903a27e7fe373bcf3c84..96a89281c8fa74fcde7c896425a77f81f296e552 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -13,6 +13,7 @@ py_test(test_add_two_op SRCS test_add_two_op.py) py_test(test_sigmoid_op SRCS test_sigmoid_op.py) py_test(test_softmax_op SRCS test_softmax_op.py) py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py) +py_test(test_gather_op SRCS test_gather_op.py) py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py) py_test(gradient_checker SRCS gradient_checker.py) diff --git a/python/paddle/v2/framework/tests/test_gather_op.py b/python/paddle/v2/framework/tests/test_gather_op.py new file mode 100644 index 0000000000000000000000000000000000000000..e86898304252d08be718e40fed46c5e921596af7 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_gather_op.py @@ -0,0 +1,34 @@ +import unittest +from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op +import numpy +import paddle.v2.framework.core as core +from paddle.v2.framework.op import Operator + + +class TestGatherOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = "gather" + xnp = numpy.random.random((10, 20)).astype("float32") + self.inputs = { + 'X': xnp, + 'Index': numpy.array([1, 3, 5]).astype("int32") + } + self.outputs = {'Out': self.inputs['X'][self.inputs['Index']]} + + +class TestGatherGradOp(GradientChecker): + def test_gather_grad(self): + print 'creating op' + op = create_op("gather") + print 'creating op done' + xnp = numpy.random.random((10, 20)).astype("float32") + inputs = {'X': xnp, 'Index': numpy.array([1, 3, 5]).astype("int32")} + print 'correct before check gradient' + self.check_grad(op, inputs, set("X"), "Out") + + +if __name__ == "__main__": + unittest.main()