diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index cb330ea5e1b914587a725c9b90a33053f3fbbc3d..a4a843c610feb2b378c22c4b4097cd238ccd61ab 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -362,6 +362,11 @@ trans .. autoclass:: paddle.v2.layer.trans :noindex: +scale_shift +----------- +.. autoclass:: paddle.v2.layer.scale_shift + :noindex: + Sampling Layers =============== diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 9d30887224fe0020ff5665f362e7403bf5c724ee..bfda18724cc8ed23a40e0626ff07a290d26aa9d2 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -110,7 +110,7 @@ static std::unique_ptr BackwardRecursive( dup_output_ops[out].emplace_back(local_op_id); return false; }); - net->AddOp(std::move(bwd)); + net->AppendOp(std::move(bwd)); } // Get unique ID for this method. auto uid = uniq_id++; @@ -163,8 +163,9 @@ static std::unique_ptr BackwardRecursive( // If part of input gradient of that operator is not calculated, fill // zero variables to that input gradient. - net->AddOp(OpRegistry::CreateOp("fill_zeros_like", {{"Src", {prefix}}}, - {{"Dst", {grad_input}}}, {})); + net->AppendOp(OpRegistry::CreateOp("fill_zeros_like", + {{"Src", {prefix}}}, + {{"Dst", {grad_input}}}, {})); } return false; }); @@ -195,7 +196,7 @@ static std::unique_ptr BackwardRecursive( if (net->ops_.empty()) { // Current no aux op is added to network return grad_op; } - net->AddOp(std::move(grad_op)); + net->AppendOp(std::move(grad_op)); } net->SetType("@GENERATED_BACKWARD@"); net->CompleteAddOp(); diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 2c5ec76dfeb8b8485951e4d94896b6758e0cb930..b93ab66f2f5b9cffa6d51b6e36afe552125970e4 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -75,13 +75,13 @@ class FcOp : public operators::NetOp { FcOp(const std::string &type, const VarNameMap &inputs, const VarNameMap &outputs, const AttributeMap &attrs) : NetOp(type, inputs, outputs, attrs) { - AddOp(OpRegistry::CreateOp("mul", - {{"X", {Input("X")}}, {"Y", {Input("W")}}}, - {{"Out", {Output("mul_result")}}}, {})); + AppendOp(OpRegistry::CreateOp("mul", + {{"X", {Input("X")}}, {"Y", {Input("W")}}}, + {{"Out", {Output("mul_result")}}}, {})); auto input_b = Inputs("b"); std::string before_act = "mul_result"; if (input_b.size() != 0) { - AddOp(OpRegistry::CreateOp( + AppendOp(OpRegistry::CreateOp( "rowwise_add", {{"X", {Output("mul_result")}}, {"b", {input_b[0]}}}, {{"Out", {Output("add_result")}}}, {})); before_act = "add_result"; @@ -92,8 +92,8 @@ class FcOp : public operators::NetOp { } } - AddOp(OpRegistry::CreateOp("sigmoid", {{"X", {Output(before_act)}}}, - {{"Out", {Output("Out")}}}, {})); + AppendOp(OpRegistry::CreateOp("sigmoid", {{"X", {Output(before_act)}}}, + {{"Out", {Output("Out")}}}, {})); CompleteAddOp(false); } }; @@ -234,13 +234,13 @@ TEST(Backward, net_fc_backward_not_have_b) { TEST(Backward, net_input_of_network_not_need_grad) { ops::NetOp net; - net.AddOp(f::OpRegistry::CreateOp( + net.AppendOp(f::OpRegistry::CreateOp( "fc", {{"X", {"x"}}, {"W", {"W1"}}, {"b", {"b1"}}}, {{"mul_result", {"mul_tmp_0"}}, {"add_result", {"add_tmp_0"}}, {"Out", {"hidden0"}}}, {})); - net.AddOp(f::OpRegistry::CreateOp( + net.AppendOp(f::OpRegistry::CreateOp( "fc", {{"X", {"hidden0"}}, {"W", {"W2"}}, {"b", {"b2"}}}, {{"mul_result", {"mul_tmp_1"}}, {"add_result", {"add_tmp_1"}}, @@ -273,10 +273,10 @@ TEST(Backward, net_input_of_network_not_need_grad) { TEST(Backward, net_shared_weight) { ops::NetOp net; - net.AddOp(f::OpRegistry::CreateOp("mul", {{"X", {"x"}}, {"Y", {"w"}}}, - {{"Out", {"out"}}}, {})); - net.AddOp(f::OpRegistry::CreateOp("mul", {{"X", {"out"}}, {"Y", {"w"}}}, - {{"Out", {"FinalOut"}}}, {})); + net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"x"}}, {"Y", {"w"}}}, + {{"Out", {"out"}}}, {})); + net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"out"}}, {"Y", {"w"}}}, + {{"Out", {"FinalOut"}}}, {})); net.CompleteAddOp(); auto bwd = f::Backward(net, {}); @@ -357,19 +357,19 @@ TEST(Backward, op_part_of_input_are_not_need) { TEST(Backward, linear_net_intermediate_variable_has_no_grad) { ops::NetOp net; - net.AddOp(f::OpRegistry::CreateOp( + net.AppendOp(f::OpRegistry::CreateOp( "fc", {{"X", {"x1"}}, {"W", {"w1"}}, {"b", {"b1"}}}, {{"mul_result", {"mul_out1"}}, {"add_result", {"add_out1"}}, {"Out", {"out1"}}}, {})); - net.AddOp(f::OpRegistry::CreateOp( + net.AppendOp(f::OpRegistry::CreateOp( "fc", {{"X", {"out1"}}, {"W", {"w2"}}, {"b", {"b2"}}}, {{"mul_result", {"mul_out2"}}, {"add_result", {"tmp_out2"}}, {"Out", {"out2"}}}, {})); - net.AddOp(f::OpRegistry::CreateOp( + net.AppendOp(f::OpRegistry::CreateOp( "fc", {{"X", {"out2"}}, {"W", {"w3"}}, {"b", {"b3"}}}, {{"mul_result", {"mul_out3"}}, {"add_result", {"tmp_out3"}}, diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 68c5526bbb9ce39c2bb1cdbaac51eaa75d09e1d1..83649d9562a9f825600b7529ee8240e1cd0ac92b 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -223,8 +223,8 @@ All parameter, weight, gradient are variables in Paddle. retv->SetType("plain_net"); return retv; }) - .def("add_op", [](operators::NetOp &self, - const OperatorBase &op) { self.AddOp(op); }) + .def("append_op", [](operators::NetOp &self, + const OperatorBase &op) { self.AppendOp(op); }) .def("complete_add_op", &operators::NetOp::CompleteAddOp) .def("complete_add_op", [](std::shared_ptr &self) { self->CompleteAddOp(); diff --git a/paddle/function/GemmFunctor.cpp b/paddle/function/GemmFunctor.cpp index dc83278d8e3b70101962521b76b902046629c2dd..9e25ee58a12490a1454436b3fe4a89176478d5c0 100644 --- a/paddle/function/GemmFunctor.cpp +++ b/paddle/function/GemmFunctor.cpp @@ -84,7 +84,7 @@ struct BlasGemm { } }; -template class BlasGemm; -template class BlasGemm; +template struct BlasGemm; +template struct BlasGemm; } // namespace paddle diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index cfa80a89365af5111746eec9599d16e37532a9f7..26cff3e67710b2f38d93572c5d58849aa94a5135 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -202,7 +202,7 @@ void NeuralNetwork::prefetch(const std::vector& inArgs) { auto mat = dynamic_cast( para->getMat(PARAMETER_VALUE).get()); para->clearGradient(); - mat->clearIndices(); + if (mat) mat->clearIndices(); } } } diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index f98bf95064fa539b990309dfe0bff10c1e99d096..157b1ab45163a94a81d859dbcb7a52ae8edae439 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -184,7 +184,7 @@ public: } void backward(const UpdateCallback& callback) override { - if (biases_) { + if (biases_ && biases_->getWGrad()) { backwardActivation(); biases_->getWGrad()->collectBias(*getOutputGrad(), 1); biases_->getParameterPtr()->incUpdate(callback); diff --git a/paddle/gserver/layers/ScaleShiftLayer.cpp b/paddle/gserver/layers/ScaleShiftLayer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..35fd038ab43a8a8b08bc328b3d1b08a7bbedd0a1 --- /dev/null +++ b/paddle/gserver/layers/ScaleShiftLayer.cpp @@ -0,0 +1,107 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" + +namespace paddle { + +/** + * A layer applies a linear transformation to each element in each row of + * the input matrix. For each element, the layer first re-scale it and then + * adds a bias to it. + * + * \f[ + * y = wx + b + * \f] + * + * Here, w is the scale and b is the bias. Both w and b are trainable scalars. + * + */ + +class ScaleShiftLayer : public Layer { +protected: + std::unique_ptr scale_; + std::unique_ptr offset_; + +public: + explicit ScaleShiftLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; +}; + +REGISTER_LAYER(scale_shift, ScaleShiftLayer); + +bool ScaleShiftLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + CHECK_EQ(inputLayers_.size(), 1U); + scale_.reset(new Weight(1, 1, parameters_[0])); + if (biasParameter_.get() != NULL) { + offset_ = std::unique_ptr(new Weight(1, 1, biasParameter_)); + } + return true; +} + +void ScaleShiftLayer::forward(PassType passType) { + Layer::forward(passType); + + MatrixPtr inV = getInputValue(0); + resetOutput(inV->getHeight(), inV->getWidth()); + MatrixPtr outV = getOutputValue(); + real scaleValue = scale_->getW()->getElement(0, 0); + outV->mulScalar(*inV, scaleValue); + if (offset_) { + real offsetValue = offset_->getW()->getElement(0, 0); + outV->add(offsetValue); + } +} + +void ScaleShiftLayer::backward(const UpdateCallback& callback) { + MatrixPtr inV = getInputValue(0); + MatrixPtr inG = getInputGrad(0); + MatrixPtr outV = getOutputValue(); + MatrixPtr outG = getOutputGrad(); + + /* Calculate the parameter gradient for the current layer */ + if (scale_->getWGrad()) { + MatrixPtr rowSumMtx; + Matrix::resizeOrCreate(rowSumMtx, outG->getHeight(), 1, false, useGpu_); + // this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij} * c_{ij} + rowSumMtx->sumOfProducts( + /* b= */ *inV, /* c= */ *outG, /* scaleSum= */ 1, /* scaleDest= */ 0.); + // this_i = scaleDest * this_i + scaleSum * \sum_j b_{ji} + scale_->getWGrad()->sumCols( + /* b= */ *rowSumMtx, /* scaleSum= */ 1., /* scaleDest= */ 1.); + scale_->getParameterPtr()->incUpdate(callback); + } + if (offset_ && offset_->getWGrad()) { + MatrixPtr rowSumMtx; + Matrix::resizeOrCreate(rowSumMtx, outG->getHeight(), 1, false, useGpu_); + rowSumMtx->sumRows(*outG, 1., 0.); + offset_->getWGrad()->sumCols(*rowSumMtx, 1., 1.); + offset_->getParameterPtr()->incUpdate(callback); + } + + /* Calculate the input layers error */ + if (inG) { + real scaleValue = scale_->getW()->getElement(0, 0); + inG->add(*outG, scaleValue); + } +} + +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 0f312b6ca50bc1e6317251ba785f1c61a224b54e..dd2c955e6a4660a1811f205ec5c5861798291912 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -2007,6 +2007,21 @@ TEST(Layer, RowL2NormLayer) { } } +TEST(Layer, ScaleShiftLayer) { + const size_t batchSize = 16; + const size_t size = 32; + TestConfig config; + config.layerConfig.set_type("scale_shift"); + config.layerConfig.set_size(size); + config.biasSize = 1; + config.inputDefs.push_back( + {INPUT_DATA, "input", /* dim= */ size, /* paraSize= */ 1}); + config.layerConfig.add_inputs(); + for (auto useGpu : {false, true}) { + testLayerGrad(config, "scale_shift", batchSize, false, useGpu, false); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/paddle/gserver/tests/test_NetworkCompare.cpp b/paddle/gserver/tests/test_NetworkCompare.cpp index f930c72fde3f5e0a6a45cb6bfd3507a4f48028fc..d36f72360f8ebd2033fb3e8c0e1b30911abba362 100644 --- a/paddle/gserver/tests/test_NetworkCompare.cpp +++ b/paddle/gserver/tests/test_NetworkCompare.cpp @@ -269,7 +269,8 @@ TEST(Compare, img_conv2) { bool useGpu = FLAGS_use_gpu; double eps = FLAGS_checkgrad_eps; FLAGS_use_gpu = true; - FLAGS_checkgrad_eps = 1e-2; + // Sometimes, this unit test will fail with 1e-2 + FLAGS_checkgrad_eps = 4e-2; compareNetwork(config_file_a, config_file_b); FLAGS_use_gpu = useGpu; FLAGS_checkgrad_eps = eps; diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index f30bbce9586d61063b4b61d98695bb568ef73c8d..dcd2237459d3bbabf449d30e4279b637c569555b 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -1,11 +1,8 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,25 +16,25 @@ namespace paddle { namespace operators { template -class GaussianRandomKernel : public framework::OpKernel { +class CPUGaussianRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { float mean = context.op_.GetAttr("mean"); float std = context.op_.GetAttr("std"); - auto* tensor = context.Output(0); + auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); - // TODO(dzh): attribute does not support unsigned int. - // And we need a global random seed configuration. - int seed = context.op_.GetAttr("seed"); + unsigned int seed = + static_cast(context.op_.GetAttr("seed")); + std::minstd_rand engine; if (seed == 0) { seed = std::random_device()(); } - std::mt19937 g(seed); - std::normal_distribution distribution(mean, std); + engine.seed(seed); + std::normal_distribution dist(mean, std); ssize_t size = framework::product(tensor->dims()); - for (int i = 0; i < size; ++i) { - data[i] = distribution(g); + for (ssize_t i = 0; i < size; ++i) { + data[i] = dist(engine); } } }; @@ -48,7 +45,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext& context) const override { - auto* tensor = context.Output(0); + auto* tensor = context.Output("Out"); auto dims = GetAttr>("dims"); PADDLE_ENFORCE(dims.size() > 0UL, "dims can be one int or array. dims must be set."); @@ -68,8 +65,8 @@ Use to initialize tensor with gaussian random generator. )DOC"); AddAttr>("dims", "The dimension of random tensor."); - AddAttr("mean", "mean value of random.").SetDefault(.0f); - AddAttr("std", "minimum value of random value.").SetDefault(1.0f); + AddAttr("mean", "mean of random tensor.").SetDefault(.0f); + AddAttr("std", "std of random tensor.").SetDefault(1.0f); AddAttr("seed", "Random seed of generator." "0 means use system wide seed") @@ -83,4 +80,4 @@ Use to initialize tensor with gaussian random generator. namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(gaussian_random, ops::GaussianRandomOp, ops::GaussianRandomOpMaker); -REGISTER_OP_CPU_KERNEL(gaussian_random, ops::GaussianRandomKernel); +REGISTER_OP_CPU_KERNEL(gaussian_random, ops::CPUGaussianRandomKernel); \ No newline at end of file diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index 1340b1e1e9f19fd96ced9e57fab75fe9d33bc84e..018a4bfcb26b9008c054000c91edf01e371fd82b 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -1,53 +1,65 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include -#include "paddle/platform/dynload/curand.h" -#include "paddle/platform/gpu_info.h" - +#include +#include +#include +#include #include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" namespace paddle { namespace operators { template -class GaussianRandomKernel : public framework::OpKernel { +struct GaussianGenerator { + T mean_, std_; + unsigned int seed_; + + __host__ __device__ GaussianGenerator(T mean, T std, int seed) + : mean_(mean), std_(std), seed_(seed) {} + + __host__ __device__ T operator()(const unsigned int n) const { + thrust::minstd_rand rng; + rng.seed(seed_); + thrust::normal_distribution dist(mean_, std_); + rng.discard(n); + return dist(rng); + } +}; + +template +class GPUGaussianRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - float mean = context.op_.GetAttr("mean"); - float std = context.op_.GetAttr("std"); - auto* tensor = context.Output(0); + auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); - - int seed = context.op_.GetAttr("seed"); + unsigned int seed = + static_cast(context.op_.GetAttr("seed")); if (seed == 0) { std::random_device rd; seed = rd(); } - curandGenerator_t g; - PADDLE_ENFORCE(platform::dynload::curandCreateGenerator( - &g, CURAND_RNG_PSEUDO_DEFAULT)); - PADDLE_ENFORCE( - platform::dynload::curandSetPseudoRandomGeneratorSeed(g, seed)); - platform::dynload::curandGenerateNormal( - g, data, framework::product(tensor->dims()), mean, std); + T mean = static_cast(context.op_.GetAttr("mean")); + T std = static_cast(context.op_.GetAttr("std")); + thrust::counting_iterator index_sequence_begin(0); + ssize_t N = framework::product(tensor->dims()); + thrust::transform(index_sequence_begin, index_sequence_begin + N, + thrust::device_ptr(data), + GaussianGenerator(mean, std, seed)); } }; } // namespace operators } // namespace paddle -namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(gaussian_random, ops::GaussianRandomKernel); +REGISTER_OP_GPU_KERNEL(gaussian_random, + paddle::operators::GPUGaussianRandomKernel); diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index 460e458ca4f7f40746f0dbf7e258a165faa88e1a..173cc3850ca9d97200e272ec59d1bd3fe09b5053 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -13,7 +13,6 @@ limitations under the License. */ #include "paddle/operators/mul_op.h" -#include "paddle/operators/math/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index 885ac6eeca65998dea62c1db40b9261cceb97805..3d3f996ef52b6c1136425ca9de0f60e7e155458f 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -84,13 +84,14 @@ class NetOp : public framework::OperatorBase { return true; } - void AddOp(const framework::OperatorBase& op) { AddOp(op.Clone()); } + void AppendOp(const framework::OperatorBase& op) { AppendOp(op.Clone()); } /** * @brief Add an operator by ptr */ - void AddOp(std::unique_ptr op) { - PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed"); + void AppendOp(std::unique_ptr op) { + PADDLE_ENFORCE(!add_op_done_, + "Cannot AppendOp when this network is sealed"); PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); ops_.push_back(std::move(op)); } diff --git a/paddle/operators/net_op_test.cc b/paddle/operators/net_op_test.cc index e9598610c0a74e08a613a397109ad65994821498..99019754a965e5e7aeb74c6bfc10c9646289651b 100644 --- a/paddle/operators/net_op_test.cc +++ b/paddle/operators/net_op_test.cc @@ -38,10 +38,10 @@ TEST(OpKernel, all) { auto net = std::make_shared(); ASSERT_NE(net, nullptr); - net->AddOp(std::unique_ptr( + net->AppendOp(std::unique_ptr( new TestOp("test", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}}, {{"Out", {"y"}}}, {}))); - net->AddOp(std::unique_ptr( + net->AppendOp(std::unique_ptr( new TestOp("test", {{"X", {"y"}}, {"W", {"w2"}}, {"b", {"b2"}}}, {{"Out", {"z"}}}, {}))); @@ -61,7 +61,7 @@ TEST(NetOp, insert_op) { auto op1 = std::unique_ptr( new framework::NOP("empty", {{"X", {"x"}}, {"W", {"w1"}}, {"b", {"b1"}}}, {{"Out", {"y"}}}, {})); - net.AddOp(*op1); + net.AppendOp(*op1); net.InsertOp(0, *op1); ASSERT_EQ(2UL, net.ops_.size()); net.InsertOp(2, std::move(op1)); @@ -70,9 +70,9 @@ TEST(NetOp, insert_op) { TEST(NetOp, Clone) { NetOp net; - net.AddOp( + net.AppendOp( std::unique_ptr(new framework::NOP{"empty", {}, {}, {}})); - net.AddOp(std::unique_ptr( + net.AppendOp(std::unique_ptr( new framework::NOP{"empty2", {}, {}, {}})); net.CompleteAddOp(true); auto new_net_op = net.Clone(); diff --git a/paddle/operators/rowwise_add_op.h b/paddle/operators/rowwise_add_op.h index 232135c38de68d4002e044972b282b43a1374c72..1cbd8bb31ad90a32d8a4e3bb59617d0b5384e470 100644 --- a/paddle/operators/rowwise_add_op.h +++ b/paddle/operators/rowwise_add_op.h @@ -1,16 +1,16 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once #include "paddle/framework/eigen.h" @@ -63,7 +63,7 @@ class RowwiseAddGradKernel : public framework::OpKernel { // https://eigen.tuxfamily.org/dox/unsupported/TensorBase_8h_source.html // colwise add - Eigen::array dims{{1}}; /* dimension to reduce */ + Eigen::array dims{{0}}; /* dimension to reduce */ EigenVector::Flatten(*db).device(place) = OutGrad.sum(dims); } }; diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index a0a0d4d914b37fca4250e5218a953f573611a086..876b3ef5575325daed8bebc0c964bd80064a7bb3 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -1,11 +1,8 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -39,7 +36,8 @@ class CPUUniformRandomKernel : public framework::OpKernel { std::uniform_real_distribution dist( static_cast(context.op_.GetAttr("min")), static_cast(context.op_.GetAttr("max"))); - for (ssize_t i = 0; i < framework::product(tensor->dims()); ++i) { + ssize_t size = framework::product(tensor->dims()); + for (ssize_t i = 0; i < size; ++i) { data[i] = dist(engine); } } @@ -66,7 +64,6 @@ class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker { : framework::OpProtoAndCheckerMaker(proto, op_checker) { AddOutput("Out", "The output tensor of uniform random op"); AddComment(R"DOC(Uniform random operator. - Used to initialize tensor with uniform random generator. )DOC"); AddAttr>("dims", "the dimension of random tensor"); @@ -84,4 +81,4 @@ Used to initialize tensor with uniform random generator. REGISTER_OP_WITHOUT_GRADIENT(uniform_random, paddle::operators::UniformRandomOp, paddle::operators::UniformRandomOpMaker); REGISTER_OP_CPU_KERNEL(uniform_random, - paddle::operators::CPUUniformRandomKernel); + paddle::operators::CPUUniformRandomKernel); \ No newline at end of file diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu index 7a243555b6385af690e9632dfa81bf96d70f925d..6716b7c7f20371f2970efaebe94cb1381983783c 100644 --- a/paddle/operators/uniform_random_op.cu +++ b/paddle/operators/uniform_random_op.cu @@ -1,11 +1,8 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -68,4 +65,4 @@ class GPUUniformRandomKernel : public framework::OpKernel { } // namespace paddle REGISTER_OP_GPU_KERNEL(uniform_random, - paddle::operators::GPUUniformRandomKernel); + paddle::operators::GPUUniformRandomKernel); \ No newline at end of file diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h index e31cbc3dee6c57851c241e117dbbd9b701db9d2c..321f4275d8e68d7d3fbbc19acf0afacf689474e5 100644 --- a/paddle/parameter/Parameter.h +++ b/paddle/parameter/Parameter.h @@ -65,7 +65,10 @@ public: size_t getSize() const { return config_.size(); } bool isFullSize() const { - return this->getSize() == bufs_[PARAMETER_VALUE]->getSize(); + if (bufs_[PARAMETER_VALUE]) { + return this->getSize() == bufs_[PARAMETER_VALUE]->getSize(); + } + return false; } inline bool useGpu() const { return useGpu_; } diff --git a/paddle/platform/device_context.cc b/paddle/platform/device_context.cc index f92c15ae450e94de44d27e77763e791e6bae4426..ad212c5b2c47312743362db4926c80bf056e100d 100644 --- a/paddle/platform/device_context.cc +++ b/paddle/platform/device_context.cc @@ -114,9 +114,6 @@ CUDADeviceContext::~CUDADeviceContext() { PADDLE_ENFORCE(dynload::cudnnDestroy(cudnn_handle_)); } - if (curand_generator_) { - PADDLE_ENFORCE(dynload::curandDestroyGenerator(curand_generator_)); - } eigen_stream_.reset(); eigen_device_.reset(); PADDLE_ENFORCE(cudaStreamDestroy(stream_)); @@ -152,19 +149,6 @@ cudnnHandle_t CUDADeviceContext::cudnn_handle() { cudaStream_t CUDADeviceContext::stream() { return stream_; } -curandGenerator_t CUDADeviceContext::curand_generator() { - if (!curand_generator_) { - SetDeviceId(place_.device); - PADDLE_ENFORCE(dynload::curandCreateGenerator(&curand_generator_, - CURAND_RNG_PSEUDO_DEFAULT)); - PADDLE_ENFORCE( - dynload::curandSetPseudoRandomGeneratorSeed(curand_generator_, seed_)); - - PADDLE_ENFORCE(dynload::curandSetStream(curand_generator_, stream_)); - } - return curand_generator_; -} - #endif // PADDLE_ONLY_CPU } // namespace platform diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index c5042ae33e47e04521e59e0d91ddd8d4efffe50a..11528e1194e4516891034fa8febdac3ba6eed204 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -17,7 +17,6 @@ limitations under the License. */ #ifndef PADDLE_ONLY_CPU #include "paddle/platform/dynload/cublas.h" #include "paddle/platform/dynload/cudnn.h" -#include "paddle/platform/dynload/curand.h" #include "paddle/platform/gpu_info.h" #define EIGEN_USE_GPU #endif @@ -40,7 +39,7 @@ class DeviceContext { class CPUDeviceContext : public DeviceContext { public: CPUDeviceContext(); - explicit CPUDeviceContext(CPUPlace); + explicit CPUDeviceContext(CPUPlace place); virtual ~CPUDeviceContext() {} Eigen::DefaultDevice* eigen_device() const; @@ -56,7 +55,7 @@ class EigenCudaStreamDevice; class CUDADeviceContext : public DeviceContext { public: - explicit CUDADeviceContext(GPUPlace); + explicit CUDADeviceContext(GPUPlace place); virtual ~CUDADeviceContext(); /*! \brief Wait for all operations completion in the stream. */ @@ -75,9 +74,6 @@ class CUDADeviceContext : public DeviceContext { /*! \brief Return cudnn handle in the device context. */ cudnnHandle_t cudnn_handle(); - /*! \brief Return curand handle in the device context. */ - curandGenerator_t curand_generator(); - /*! \brief Return cuda stream in the device context. */ cudaStream_t stream(); // clang-format on @@ -85,18 +81,13 @@ class CUDADeviceContext : public DeviceContext { private: GPUPlace place_; - private: std::unique_ptr eigen_device_; std::unique_ptr eigen_stream_; - private: - uint64_t seed_; - // clang-format off cudaStream_t stream_{nullptr}; cudnnHandle_t cudnn_handle_{nullptr}; cublasHandle_t cublas_handle_{nullptr}; - curandGenerator_t curand_generator_{nullptr}; // clang-format on }; diff --git a/paddle/platform/device_context_test.cc b/paddle/platform/device_context_test.cc index 8b764bdcd9d92e6b2203e45160acee35ec110538..5883a55272f0f24c94d48bc43c62ddb7bef15465 100644 --- a/paddle/platform/device_context_test.cc +++ b/paddle/platform/device_context_test.cc @@ -43,8 +43,6 @@ TEST(Device, CUDADeviceContext) { ASSERT_NE(nullptr, cudnn_handle); cublasHandle_t cublas_handle = device_context->cublas_handle(); ASSERT_NE(nullptr, cublas_handle); - curandGenerator_t curand_handle = device_context->curand_generator(); - ASSERT_NE(nullptr, curand_handle); ASSERT_NE(nullptr, device_context->stream()); delete device_context; } diff --git a/paddle/pserver/ParameterClient2.cpp b/paddle/pserver/ParameterClient2.cpp index f7e391f76324a09c203dfbbb449feb050caa8fb4..54063a809a4f9e558f8d364f5c437f2b6d98925b 100644 --- a/paddle/pserver/ParameterClient2.cpp +++ b/paddle/pserver/ParameterClient2.cpp @@ -65,7 +65,6 @@ void ParameterClient2::initThreads() { LOG(INFO) << "parallel_thread_num dosent need to set"; } syncThreadPool_.reset(new SyncThreadPool(threadNum_)); - startThreads(); } @@ -224,6 +223,14 @@ void ParameterClient2::prepareSendData( request.set_cost(cost); request.set_batch_status(batchStatus); CHECK_EQ(request.blocks_size(), 0); + VLOG(10) << "request: trainer_id: " << request.trainer_id() + << " update_mode" << request.update_mode() + << " send_back_parameter: " << request.send_back_parameter() + << " send_back_parameter_type: " + << request.send_back_parameter_type() + << " num_samples: " << request.num_samples() + << " cost: " << request.cost() + << " batch_status: " << request.batch_status(); } for (const auto& segments : parameterSegments) { const auto it = parameterMap_.find(segments.id); @@ -251,11 +258,17 @@ void ParameterClient2::prepareSendData( CHECK(sendMat != nullptr) << "sendMat is nullptr"; syncThreadPool_->exec([&](int tid, size_t numThreads) { + std::lock_guard guard(sparseAutoGrowthMutex_); const auto& localIndices = prefetchMat->getLocalIndices(); /// num of sparse rows size_t nLocalBlocks = localIndices.size(); uint64_t beginDim = 0; uint64_t endDim = 0; + + // FIXME(typhoonzero): let it resize first + prefetchMat->getLocalRow(nLocalBlocks + 1); + sendMat->getLocalRow(nLocalBlocks + 1); + for (size_t row = 0; row < nLocalBlocks; ++row) { int64_t blockId = localIndices[row]; // local row -> sparse row int serverId = std::abs((blockId + nameHash) % serviceNum_); @@ -275,7 +288,6 @@ void ParameterClient2::prepareSendData( block->set_begin_pos(row * blockSize); /// block len block->set_block_size(endDim - beginDim); - if (sendingPara) { sendJob->parallelInputIovs[serverId].push_back( {sendMat->getLocalRow(row), sizeof(real) * (size_t)blockSize}); diff --git a/paddle/pserver/ParameterClient2.h b/paddle/pserver/ParameterClient2.h index 89b3ddd502151e537b81bdbb09f171dd6e13ba26..29b9eeacddf2945dd22b7b17fc87c7c74b868896 100644 --- a/paddle/pserver/ParameterClient2.h +++ b/paddle/pserver/ParameterClient2.h @@ -583,6 +583,7 @@ protected: #ifndef PADDLE_DISABLE_TIMER uint64_t forwardbackwordTime_; #endif + std::mutex sparseAutoGrowthMutex_; /// map id to parameter used for decoding protobuf data std::unordered_map parameterMap_; diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index da99e5bd53458aa0cb201a3525e28c66ab63c52d..8d71629faa332fc1e099001950c25e879cbedff3 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2232,6 +2232,20 @@ class ClipLayer(LayerBase): self.config.inputs[0].clip_conf.max = max +@config_layer('scale_shift') +class ScaleShiftLayer(LayerBase): + def __init__(self, name, inputs, bias=True, **xargs): + super(ScaleShiftLayer, self).__init__( + name, 'scale_shift', 0, inputs=inputs, **xargs) + config_assert( + len(self.inputs) == 1, + 'ScaleShiftLayer must have one and only one input.') + input_layer = self.get_input_layer(0) + self.set_layer_size(input_layer.size) + self.create_input_parameter(0, 1, [1, 1]) + self.create_bias_parameter(bias, 1) + + # key: cost type # value: cost class g_cost_map = {} diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 1bc55c869601551aff5fc0311458f906385522d2..c9e3ded65cb8fc8d00d3decac6af2aff2b67a37d 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -133,6 +133,7 @@ __all__ = [ 'clip_layer', 'slice_projection', 'kmax_sequence_score_layer', + 'scale_shift_layer', ] @@ -230,6 +231,7 @@ class LayerType(object): CLIP_LAYER = 'clip' KMAX_SEQ_SCORE = 'kmax_seq_score' + SCALE_SHIFT_LAYER = 'scale_shift' @staticmethod def is_layer_type(type_name): @@ -6210,3 +6212,43 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1): return LayerOutput( name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size) + + +@wrap_name_default("scale_shift") +@wrap_param_attr_default() +@wrap_bias_attr_default() +def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None): + """ + A layer applies a linear transformation to each element in each row of + the input matrix. For each element, the layer first re-scale it and then + adds a bias to it. + + This layer is very like the SlopeInterceptLayer, except the scale and + bias are trainable. + + .. math:: + + y = w * x + b + + .. code-block:: python + + scale_shift = scale_shift_layer(input=input_layer, bias_attr=False) + + :param name: The Layer Name. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput. + :param param_attr: The parameter attribute of scaling. + :type param_attr: ParameterAttribute + :param bias_attr: The parameter attribute of shifting. + :type bias_attr: ParameterAttribute + :return: LayerOutput object. + :rtype: LayerOutput + """ + Layer( + name=name, + type=LayerType.SCALE_SHIFT_LAYER, + inputs=Input(input.name, **param_attr.attr), + bias=ParamAttr.to_bias(bias_attr)) + return LayerOutput( + name, LayerType.SCALE_SHIFT_LAYER, parents=[input], size=input.size) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index a61beb871ad064c617fa141451afcb2a5ac64854..3860699f6fb76871ee8ea100fce1b3118bbc041d 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -8,6 +8,6 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer -test_kmax_seq_socre_layer test_seq_select_layers) +test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..35ade126a2586a8e3eee6f0ac3c7e49523c8f5c5 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr @@ -0,0 +1,72 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 100 + active_type: "" +} +layers { + name: "__scale_shift_0__" + type: "scale_shift" + size: 100 + active_type: "" + inputs { + input_layer_name: "data" + input_parameter_name: "___scale_shift_0__.w0" + } +} +layers { + name: "__scale_shift_1__" + type: "scale_shift" + size: 100 + active_type: "" + inputs { + input_layer_name: "data" + input_parameter_name: "___scale_shift_1__.w0" + } + bias_parameter_name: "___scale_shift_1__.wbias" +} +parameters { + name: "___scale_shift_0__.w0" + size: 1 + initial_mean: 0.0 + initial_std: 1.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___scale_shift_1__.w0" + size: 1 + initial_mean: 0.0 + initial_std: 1.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___scale_shift_1__.wbias" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "data" +output_layer_names: "__scale_shift_0__" +output_layer_names: "__scale_shift_1__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "__scale_shift_0__" + layer_names: "__scale_shift_1__" + input_layer_names: "data" + output_layer_names: "__scale_shift_0__" + output_layer_names: "__scale_shift_1__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..dd589116fa9932144ca066d3fa4c929d1433a7f1 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py @@ -0,0 +1,9 @@ +from paddle.trainer_config_helpers import * + +data = data_layer(name='data', size=100) + +scale = scale_shift_layer(input=data, bias_attr=False) + +scale_shift = scale_shift_layer(input=data) + +outputs(scale, scale_shift) diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 65c02f2cfb05a1674ecac5e26da1f34bca93e7d5..3c4d239e4517433bab30903a27e7fe373bcf3c84 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -22,7 +22,7 @@ py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py) py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py) py_test(test_operator SRCS test_operator.py) -# py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py) +py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py) py_test(test_uniform_random_op SRCS test_uniform_random_op.py) py_test(test_recurrent_op SRCS test_recurrent_op.py) py_test(test_sgd_op SRCS test_sgd_op.py) diff --git a/python/paddle/v2/framework/tests/test_net.py b/python/paddle/v2/framework/tests/test_net.py index b42cadd11ab75abbc35763c8d12e8c27e995f0dc..9339cf28dabc95b46b958777200fb1db9dcf284f 100644 --- a/python/paddle/v2/framework/tests/test_net.py +++ b/python/paddle/v2/framework/tests/test_net.py @@ -6,8 +6,8 @@ import unittest def fc(X, W, Y): ret_v = core.Net.create() - ret_v.add_op(Operator("mul", X="X", Y="W", Out="pre_activation")) - ret_v.add_op(Operator("sigmoid", X="pre_activation", Y=Y)) + ret_v.append_op(Operator("mul", X="X", Y="W", Out="pre_activation")) + ret_v.append_op(Operator("sigmoid", X="pre_activation", Y=Y)) ret_v.complete_add_op(True) return ret_v @@ -16,12 +16,12 @@ class TestNet(unittest.TestCase): def test_net_all(self): net = core.Net.create() op1 = Operator("add_two", X="X", Y="Y", Out="Out") - net.add_op(op1) + net.append_op(op1) net2 = core.Net.create() - net2.add_op(fc(X="X", W="w", Y="fc.out")) + net2.append_op(fc(X="X", W="w", Y="fc.out")) net2.complete_add_op(True) - net.add_op(net2) + net.append_op(net2) net.complete_add_op(True) expected = ''' diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index 3d4a34d8d713ff1beeeba8ac48ad95176f7a29f2..d6000ab9f9d5b969f96128b183f48d49000c8a5e 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -150,7 +150,7 @@ class TestRecurrentOp(unittest.TestCase): sig_op = Operator("sigmoid", X="sum", Y="h@alias") for op in [x_fc_op, h_fc_op, sum_op, sig_op]: - stepnet.add_op(op) + stepnet.append_op(op) stepnet.complete_add_op(True) self.rnnop.set_stepnet(stepnet) diff --git a/python/paddle/v2/framework/tests/test_rowwise_add_op.py b/python/paddle/v2/framework/tests/test_rowwise_add_op.py index 29d72e850099734a9828ccceed47cc0a57fc3d6b..45d569da29d13cf8e2a3cb9d67c2d01e8b365453 100644 --- a/python/paddle/v2/framework/tests/test_rowwise_add_op.py +++ b/python/paddle/v2/framework/tests/test_rowwise_add_op.py @@ -20,7 +20,7 @@ class RowwiseAddGradOpTest(GradientChecker): def test_rowwise_add(self): op = create_op("rowwise_add") inputs = { - "X": np.random.uniform(0.1, 1, [10, 10]).astype("float32"), + "X": np.random.uniform(0.1, 1, [5, 10]).astype("float32"), "b": np.random.uniform(0.1, 1, [10]).astype("float32") } self.check_grad(op, inputs, set(["X", "b"]), "Out")