diff --git a/doc/fluid/api/initializer.rst b/doc/fluid/api/initializer.rst index dc0b52b14fd242dfaded1cb9a8e0ab9eb66b0607..96682c8f9fbb8683ad690a7c5809865e37a1920e 100644 --- a/doc/fluid/api/initializer.rst +++ b/doc/fluid/api/initializer.rst @@ -32,6 +32,15 @@ Normal :members: :noindex: +.. _api_fluid_initializer_Normal: + +TruncatedNormal +------ + +.. autoclass:: paddle.fluid.initializer.TruncatedNormal + :members: + :noindex: + .. _api_fluid_initializer_Xavier: Xavier diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index bbd9429e30f4ae0dd56c5a89f71ad6e5391a7edf..534acffa7ffdc07bfcbfb328b540221c2b988de5 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -79,6 +79,7 @@ paddle.fluid.io.get_inference_program ArgSpec(args=['target_vars', 'main_program paddle.fluid.initializer.ConstantInitializer.__init__ ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False)) paddle.fluid.initializer.UniformInitializer.__init__ ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0)) paddle.fluid.initializer.NormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)) +paddle.fluid.initializer.TruncatedNormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0)) paddle.fluid.initializer.XavierInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'fan_out', 'seed'], varargs=None, keywords=None, defaults=(True, None, None, 0)) paddle.fluid.initializer.BilinearInitializer.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.initializer.MSRAInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0)) @@ -124,7 +125,7 @@ paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.edit_distance ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)) paddle.fluid.layers.l2_normalize ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)) -paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'name'], varargs=None, keywords=None, defaults=(False, False, None)) +paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)) paddle.fluid.layers.topk ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_times'], varargs=None, keywords=None, defaults=(0, False)) paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None) @@ -168,6 +169,7 @@ paddle.fluid.layers.stack ArgSpec(args=['x', 'axis'], varargs=None, keywords=Non paddle.fluid.layers.pad2d ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None)) paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None)) paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None)) +paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)) paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)) diff --git a/paddle/fluid/framework/data_device_transform.cc b/paddle/fluid/framework/data_device_transform.cc index 6bcfc6cd55f02f0d4f0f6e3170e7cc19ce666a28..fee6ba40047053ed5662fe044eceb0c687bd4db9 100644 --- a/paddle/fluid/framework/data_device_transform.cc +++ b/paddle/fluid/framework/data_device_transform.cc @@ -25,6 +25,10 @@ void TransDataDevice(const Tensor &in, const platform::Place &dst_place, in.place().which(), dst_place.which(), "Currently, model parallelism is only supported between CPU and CUDA"); + // NOTE(yy): TransDataDevice should wait for computation of input. + platform::DeviceContextPool::Instance().Get(in.place())->Wait(); + platform::DeviceContextPool::Instance().Get(dst_place)->Wait(); + // FIXME(zcd): TransDataDevice is used to transform data from GPU to CPU and // the enforced checkings have been done in GetDeviceContext, so the // `dev_ctx->Wait()` is necessary. But `dev_ctx->Wait()` will make the program diff --git a/paddle/fluid/framework/grad_op_desc_maker.h b/paddle/fluid/framework/grad_op_desc_maker.h index b4d3fa25c35fbf25b3d2fdd9fa1045dda0f773ec..9bccb1a32bf63b30351ef4428594691b0eef0b6a 100644 --- a/paddle/fluid/framework/grad_op_desc_maker.h +++ b/paddle/fluid/framework/grad_op_desc_maker.h @@ -129,6 +129,9 @@ class GradOpDescMakerBase { std::string ForwardOpType() const { return this->fwd_op_.Type(); } + protected: + const OpDesc& ForwardOp() const { return fwd_op_; } + private: const OpDesc& fwd_op_; const std::unordered_set& no_grad_set_; diff --git a/paddle/fluid/framework/prune.cc b/paddle/fluid/framework/prune.cc index 57c1b822d8d4f095f33cba2bfd5210f7ee19dd9f..0afcd85fe7c2e6806eb67797300e2731977573fb 100644 --- a/paddle/fluid/framework/prune.cc +++ b/paddle/fluid/framework/prune.cc @@ -183,28 +183,5 @@ void Prune(const proto::ProgramDesc& input, proto::ProgramDesc* output) { output->clear_blocks(); prune_impl(input, output, 0, -1, &dependent_vars); } - -void inference_optimize_impl(proto::ProgramDesc* input, int block_id) { - auto* op_field = input->mutable_blocks(block_id)->mutable_ops(); - for (auto& op_desc : *op_field) { - for (auto& attr : *op_desc.mutable_attrs()) { - if (attr.name() == "is_test") { - attr.set_b(true); - break; - } - } - } -} - -void InferenceOptimize(const proto::ProgramDesc& input, - proto::ProgramDesc* output) { - *output = input; - int num_blocks = output->blocks_size(); - PADDLE_ENFORCE_GT(num_blocks, 0, "ProgramDesc must have at least one block"); - for (int i = 0; i < num_blocks; ++i) { - inference_optimize_impl(output, i); - } -} - } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/prune.h b/paddle/fluid/framework/prune.h index 4c5a1dedd9950553e93dc681a40af179bdb8a30d..1be7cd25d099a18e6b7e2afaf34b1632f881b823 100644 --- a/paddle/fluid/framework/prune.h +++ b/paddle/fluid/framework/prune.h @@ -22,8 +22,5 @@ namespace framework { void Prune(const proto::ProgramDesc& input, proto::ProgramDesc* output); -void InferenceOptimize(const proto::ProgramDesc& input, - proto::ProgramDesc* output); - } // namespace framework } // namespace paddle diff --git a/paddle/fluid/operators/array_to_lod_tensor_op.cc b/paddle/fluid/operators/array_to_lod_tensor_op.cc index bc725e53574e813db94ad690c3bdc90454f68e1e..b8b8b2290a0f002fd379032e28590b84a1da38e9 100644 --- a/paddle/fluid/operators/array_to_lod_tensor_op.cc +++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc @@ -25,7 +25,7 @@ namespace operators { using LoD = framework::LoD; -class ArrayToLoDFunctor; +struct ArrayToLoDFunctor; template struct ArrayToLoDFunctorImpl { const ArrayToLoDFunctor *prev_functor_; diff --git a/paddle/fluid/operators/elementwise_mul_op.cc b/paddle/fluid/operators/elementwise_mul_op.cc index 7cd67e74de6b9c4fbc718f60b4f671ccab2f9956..86a8459a79135d1fbcba6886172acc5a2abdb88b 100644 --- a/paddle/fluid/operators/elementwise_mul_op.cc +++ b/paddle/fluid/operators/elementwise_mul_op.cc @@ -13,9 +13,45 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise_mul_op.h" +#include #include "paddle/fluid/operators/elementwise_op.h" + +namespace paddle { +namespace operators { + +class ElementwiseMulOpGradDescMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr op(new framework::OpDesc()); + op->SetType("elementwise_mul_grad"); + op->SetInput("X", Input("X")); + op->SetInput("Y", Input("Y")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetAttrMap(Attrs()); + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetOutput(framework::GradVarName("Y"), InputGrad("Y")); + return op; + } +}; + +class ElementwiseMulOpMaker : public ElementwiseOpMaker { + protected: + virtual std::string GetName() const { return "Mul"; } + virtual std::string GetEquation() const { return "Out = X \\\\odot Y"; } +}; + +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; -REGISTER_ELEMWISE_OP(elementwise_mul, "Mul", "Out = X \\\\odot Y"); +REGISTER_OPERATOR(elementwise_mul, ops::ElementwiseOp, + ops::ElementwiseMulOpMaker, ops::ElementwiseOpInferVarType, + ops::ElementwiseMulOpGradDescMaker); +REGISTER_OPERATOR(elementwise_mul_grad, ops::ElementwiseOpGrad); + REGISTER_OP_CPU_KERNEL( elementwise_mul, ops::ElementwiseMulKernel, diff --git a/paddle/fluid/operators/elementwise_mul_op.h b/paddle/fluid/operators/elementwise_mul_op.h index 4437da4d95f97b5cbbca1650badf9710c26b4380..b870d08a1a28fd3e678aeb7211f7e3ec8b2c4c65 100644 --- a/paddle/fluid/operators/elementwise_mul_op.h +++ b/paddle/fluid/operators/elementwise_mul_op.h @@ -93,8 +93,8 @@ class ElementwiseMulGradKernel : public ElemwiseGradKernel { auto* x = ctx.Input("X"); auto* y = ctx.Input("Y"); - auto* out = ctx.Input("Out"); auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* out = dout; // out is not necessary auto* dx = ctx.Output(framework::GradVarName("X")); auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index 7182149164854038bb67a9f06cdbec8a4a0f1fb2..242a1b9ae92ade0caf1b0f1fcb5458b8b7070d84 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -59,7 +59,8 @@ class MatMulKernel : public framework::OpKernel { RowMatrixFromVector(x.dims()), 0, context.Attr("transpose_X")); auto mat_dim_b = math::CreateMatrixDescriptor( ColumnMatrixFromVector(y.dims()), 0, context.Attr("transpose_Y")); - blas.MatMul(x, mat_dim_a, y, mat_dim_b, T(1), out, T(0)); + auto scale = static_cast(context.Attr("alpha")); + blas.MatMul(x, mat_dim_a, y, mat_dim_b, scale, out, T(0)); } }; @@ -185,7 +186,8 @@ class MatMulGradKernel : public framework::OpKernel { auto blas = math::GetBlas(context); auto mat_dim_a = math::CreateMatrixDescriptor(a.dims(), 0, trans_a); auto mat_dim_b = math::CreateMatrixDescriptor(b.dims(), 0, trans_b); - blas.MatMul(a, mat_dim_a, b, mat_dim_b, T(1), out, T(0)); + blas.MatMul(a, mat_dim_a, b, mat_dim_b, + static_cast(context.Attr("alpha")), out, T(0)); } void CalcInputGrad(const framework::ExecutionContext &context, @@ -334,6 +336,7 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker { R"DOC(If true, use the transpose of `Y`. )DOC") .SetDefault(false); + AddAttr("alpha", "The scale of Out").SetDefault(1.0f); AddComment(R"DOC( MatMul Operator. diff --git a/paddle/fluid/operators/mul_op.cc b/paddle/fluid/operators/mul_op.cc index 2a8e4af516ce9341772d4668dc993215b4aae24d..363abfb0e0c96e8a4d82124dff168f28e339a9ae 100644 --- a/paddle/fluid/operators/mul_op.cc +++ b/paddle/fluid/operators/mul_op.cc @@ -156,12 +156,29 @@ class MulGradOp : public framework::OperatorWithKernel { } }; +class MulOpGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + std::unique_ptr Apply() const override { + std::unique_ptr retv(new framework::OpDesc()); + retv->SetType("mul_grad"); + retv->SetInput("X", Input("X")); + retv->SetInput("Y", Input("Y")); + retv->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + retv->SetOutput(framework::GradVarName("X"), InputGrad("X")); + retv->SetOutput(framework::GradVarName("Y"), InputGrad("Y")); + retv->SetAttrMap(Attrs()); + return retv; + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; -REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker, - paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker, ops::MulOpGradMaker); REGISTER_OPERATOR(mul_grad, ops::MulGradOp); REGISTER_OP_CPU_KERNEL( mul, ops::MulKernel, diff --git a/paddle/fluid/operators/scale_op.cc b/paddle/fluid/operators/scale_op.cc index c614de2eac143b3a545c60226aefa93dd72dea4f..13be6c65be58314a75124106eb09b1300305baf0 100644 --- a/paddle/fluid/operators/scale_op.cc +++ b/paddle/fluid/operators/scale_op.cc @@ -52,6 +52,12 @@ $$Out = scale*X$$ )DOC"); AddAttr("scale", "The scaling factor of the scale operator.") .SetDefault(1.0); + AddAttr("bias", "The bias of the scale operator.").SetDefault(0.0); + AddAttr( + "bias_after_scale", + "Apply bias addition after or before scaling. It is useful for " + "numeric stability in some circumstances.") + .SetDefault(true); } }; @@ -80,6 +86,8 @@ class ScaleGradMaker : public framework::SingleGradOpDescMaker { grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetAttr("scale", GetAttr("scale")); + grad_op->SetAttr("bias", 0.0f); + grad_op->SetAttr("bias_after_scale", true); return std::unique_ptr(grad_op); } }; diff --git a/paddle/fluid/operators/scale_op.h b/paddle/fluid/operators/scale_op.h index fe035aba81dd74d21539974beed255275be3013b..d8a199bc2b860515645b4954b49d8eb59fbd02dc 100644 --- a/paddle/fluid/operators/scale_op.h +++ b/paddle/fluid/operators/scale_op.h @@ -34,6 +34,8 @@ class ScaleKernel : public framework::OpKernel { "in and out should have the same dim"); auto scale = static_cast(ctx.Attr("scale")); + auto bias = static_cast(ctx.Attr("bias")); + auto bias_after_scale = ctx.Attr("bias_after_scale"); if (in_var->IsType() && in_var != out_var) { auto& in_slr = in_var->Get(); @@ -45,7 +47,11 @@ class ScaleKernel : public framework::OpKernel { auto eigen_out = framework::EigenVector::Flatten(*out); auto eigen_in = framework::EigenVector::Flatten(*in); auto& dev = *ctx.template device_context().eigen_device(); - eigen_out.device(dev) = scale * eigen_in; + if (bias_after_scale) { + eigen_out.device(dev) = scale * eigen_in + bias; + } else { + eigen_out.device(dev) = scale * (eigen_in + bias); + } } }; diff --git a/paddle/fluid/operators/truncated_gaussian_random_op.cc b/paddle/fluid/operators/truncated_gaussian_random_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..d854e2803975543b51c50ea2bc173322d3c3ca5e --- /dev/null +++ b/paddle/fluid/operators/truncated_gaussian_random_op.cc @@ -0,0 +1,255 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +// reference: https://gist.github.com/lakshayg/d80172fe5ae3c5d2c2aedb53c250320e +template +T Erfinv(T x) { + if (x < -1 || x > 1) { + return std::numeric_limits::quiet_NaN(); + } else if (x == 1.0) { + return std::numeric_limits::infinity(); + } else if (x == -1.0) { + return -std::numeric_limits::infinity(); + } + + const T LN2 = 6.931471805599453094172321214581e-1; + + const T A0 = 1.1975323115670912564578e0; + const T A1 = 4.7072688112383978012285e1; + const T A2 = 6.9706266534389598238465e2; + const T A3 = 4.8548868893843886794648e3; + const T A4 = 1.6235862515167575384252e4; + const T A5 = 2.3782041382114385731252e4; + const T A6 = 1.1819493347062294404278e4; + const T A7 = 8.8709406962545514830200e2; + + const T B0 = 1.0000000000000000000e0; + const T B1 = 4.2313330701600911252e1; + const T B2 = 6.8718700749205790830e2; + const T B3 = 5.3941960214247511077e3; + const T B4 = 2.1213794301586595867e4; + const T B5 = 3.9307895800092710610e4; + const T B6 = 2.8729085735721942674e4; + const T B7 = 5.2264952788528545610e3; + + const T C0 = 1.42343711074968357734e0; + const T C1 = 4.63033784615654529590e0; + const T C2 = 5.76949722146069140550e0; + const T C3 = 3.64784832476320460504e0; + const T C4 = 1.27045825245236838258e0; + const T C5 = 2.41780725177450611770e-1; + const T C6 = 2.27238449892691845833e-2; + const T C7 = 7.74545014278341407640e-4; + + const T D0 = 1.4142135623730950488016887e0; + const T D1 = 2.9036514445419946173133295e0; + const T D2 = 2.3707661626024532365971225e0; + const T D3 = 9.7547832001787427186894837e-1; + const T D4 = 2.0945065210512749128288442e-1; + const T D5 = 2.1494160384252876777097297e-2; + const T D6 = 7.7441459065157709165577218e-4; + const T D7 = 1.4859850019840355905497876e-9; + + const T E0 = 6.65790464350110377720e0; + const T E1 = 5.46378491116411436990e0; + const T E2 = 1.78482653991729133580e0; + const T E3 = 2.96560571828504891230e-1; + const T E4 = 2.65321895265761230930e-2; + const T E5 = 1.24266094738807843860e-3; + const T E6 = 2.71155556874348757815e-5; + const T E7 = 2.01033439929228813265e-7; + + const T F0 = 1.414213562373095048801689e0; + const T F1 = 8.482908416595164588112026e-1; + const T F2 = 1.936480946950659106176712e-1; + const T F3 = 2.103693768272068968719679e-2; + const T F4 = 1.112800997078859844711555e-3; + const T F5 = 2.611088405080593625138020e-5; + const T F6 = 2.010321207683943062279931e-7; + const T F7 = 2.891024605872965461538222e-15; + + T abs_x = abs(x); + + if (abs_x <= 0.85) { + T r = 0.180625 - 0.25 * x * x; + T num = + (((((((A7 * r + A6) * r + A5) * r + A4) * r + A3) * r + A2) * r + A1) * + r + + A0); + T den = + (((((((B7 * r + B6) * r + B5) * r + B4) * r + B3) * r + B2) * r + B1) * + r + + B0); + return x * num / den; + } + + T r = sqrt(LN2 - log(1.0 - abs_x)); + + T num, den; + if (r <= 5.0) { + r = r - 1.6; + num = + (((((((C7 * r + C6) * r + C5) * r + C4) * r + C3) * r + C2) * r + C1) * + r + + C0); + den = + (((((((D7 * r + D6) * r + D5) * r + D4) * r + D3) * r + D2) * r + D1) * + r + + D0); + } else { + r = r - 5.0; + num = + (((((((E7 * r + E6) * r + E5) * r + E4) * r + E3) * r + E2) * r + E1) * + r + + E0); + den = + (((((((F7 * r + F6) * r + F5) * r + F4) * r + F3) * r + F2) * r + F1) * + r + + F0); + } + + if (x < 0) { + return -num / den; + } else { + return num / den; + } +} + +template +struct TruncatedNormal { + T mean, std; + T a_normal_cdf; + T b_normal_cdf; + TruncatedNormal(T mean, T std) : mean(mean), std(std) { + auto normal_cdf = [](T x) { + return (1.0 + std::erf(x / std::sqrt(2.0))) / 2.0; + }; + a_normal_cdf = normal_cdf(-2.0); + b_normal_cdf = normal_cdf(2.0); + } + + T operator()(T value) const { + auto p = a_normal_cdf + (b_normal_cdf - a_normal_cdf) * value; + return (std::sqrt(2.0) * Erfinv(2 * p - 1) + mean) * std; + } +}; + +template +class CPUTruncatedGaussianRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + float mean = context.Attr("mean"); + float std = context.Attr("std"); + auto* tensor = context.Output("Out"); + T* data = tensor->mutable_data(context.GetPlace()); + + unsigned int seed = static_cast(context.Attr("seed")); + std::minstd_rand engine; + if (seed == 0) { + seed = std::random_device()(); + } + engine.seed(seed); + std::uniform_real_distribution dist(std::numeric_limits::min(), + 1.0); + TruncatedNormal truncated_normal(mean, std); + int64_t size = tensor->numel(); + for (int64_t i = 0; i < size; ++i) { + data[i] = truncated_normal(dist(engine)); + } + } +}; + +class TruncatedGaussianRandomOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE( + ctx->HasOutput("Out"), + "Output(Out) of TruncatedGaussianRandomOp should not be null."); + auto shape = ctx->Attrs().Get>("shape"); + std::vector out_dim; + out_dim.reserve(shape.size()); + for (auto dim : shape) { + out_dim.push_back(static_cast(dim)); + } + PADDLE_ENFORCE(shape.size() > 0UL, + "shape can be one int or array. shape must be set."); + ctx->SetOutputDim("Out", framework::make_ddim(out_dim)); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + framework::LibraryType library{framework::LibraryType::kPlain}; + framework::DataLayout layout{framework::DataLayout::kAnyLayout}; + return framework::OpKernelType( + static_cast(ctx.Attr("dtype")), + ctx.device_context(), layout, library); + } +}; + +class TruncatedGaussianRandomOpMaker + : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddOutput("Out", "Output tensor of truncated gaussian random op."); + + AddAttr>("shape", + "(vector) " + "The dimension of random tensor."); + AddAttr("mean", + "(float, default 0.0) " + "mean of random tensor.") + .SetDefault(.0f); + AddAttr("std", + "(float, default 1.0) " + "std of random tensor.") + .SetDefault(1.0f); + AddAttr("seed", + "(int, default 0) " + "Random seed of generator." + "0 means use system wide seed." + "Note that if seed is not 0, this operator will always " + "generate the same random numbers every time.") + .SetDefault(0); + AddAttr("dtype", + "(int, default 5(FP32)) " + "Output data type.") + .SetDefault(framework::proto::VarType::FP32); + AddComment(R"DOC( +TruncatedGaussianRandom Operator. + +Used to initialize tensors with truncated gaussian random generator. + +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(truncated_gaussian_random, + ops::TruncatedGaussianRandomOp, + ops::TruncatedGaussianRandomOpMaker); +REGISTER_OP_CPU_KERNEL(truncated_gaussian_random, + ops::CPUTruncatedGaussianRandomKernel); diff --git a/paddle/fluid/operators/truncated_gaussian_random_op.cu b/paddle/fluid/operators/truncated_gaussian_random_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..ad2a9021bfe344d838dff2040b3fb9371274e218 --- /dev/null +++ b/paddle/fluid/operators/truncated_gaussian_random_op.cu @@ -0,0 +1,76 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" + +namespace paddle { +namespace operators { + +template +struct TruncatedNormal { + T mean, std; + T a_normal_cdf; + T b_normal_cdf; + unsigned int seed; + T numeric_min; + + __host__ __device__ TruncatedNormal(T mean, T std, T numeric_min, int seed) + : mean(mean), std(std), seed(seed), numeric_min(numeric_min) { + a_normal_cdf = (1.0 + erff(-2.0 / sqrtf(2.0))) / 2.0; + b_normal_cdf = (1.0 + erff(2.0 / sqrtf(2.0))) / 2.0; + } + + __host__ __device__ T operator()(const unsigned int n) const { + thrust::minstd_rand rng; + rng.seed(seed); + thrust::uniform_real_distribution dist(numeric_min, 1); + rng.discard(n); + T value = dist(rng); + auto p = a_normal_cdf + (b_normal_cdf - a_normal_cdf) * value; + return (std::sqrt(2.0) * erfinvf(2 * p - 1) + mean) * std; + } +}; + +template +class GPUTruncatedGaussianRandomKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* tensor = context.Output("Out"); + T* data = tensor->mutable_data(context.GetPlace()); + unsigned int seed = static_cast(context.Attr("seed")); + if (seed == 0) { + std::random_device rd; + seed = rd(); + } + T mean = static_cast(context.Attr("mean")); + T std = static_cast(context.Attr("std")); + thrust::counting_iterator index_sequence_begin(0); + int64_t size = tensor->numel(); + thrust::transform( + index_sequence_begin, index_sequence_begin + size, + thrust::device_ptr(data), + TruncatedNormal(mean, std, std::numeric_limits::min(), seed)); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_CUDA_KERNEL( + truncated_gaussian_random, + paddle::operators::GPUTruncatedGaussianRandomKernel); diff --git a/paddle/fluid/operators/while_op.cc b/paddle/fluid/operators/while_op.cc index 791138a8c0eb3c477942a8b723206a8f8a3eac77..16eac1ec2406c147fa765bc014038ae03a1416b2 100644 --- a/paddle/fluid/operators/while_op.cc +++ b/paddle/fluid/operators/while_op.cc @@ -1,16 +1,16 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include #include "paddle/fluid/framework/executor.h" @@ -138,6 +138,10 @@ class WhileGradOp : public framework::OperatorBase { auto inside_og_name = inside_og_names[i]; VLOG(8) << "Linking outside " << outside_og_name << " --> inside " << inside_og_name; + if (scope.FindVar(outside_og_name) == nullptr) { + continue; + } + auto &og_outside = detail::Ref(scope.FindVar(outside_og_name), "Cannot find Outside Gradient %s", outside_og_name); @@ -167,20 +171,46 @@ class WhileGradOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ(inside_array[j].numel(), 0); } } + } else { + PADDLE_THROW("Currently only support LoDTensor and LoDTensorArray."); } } executor.RunPreparedContext(ctx.get(), *cur_scope_iter, false, true, true); - auto &pg_names = Outputs(kXGRAD); + // The Outputs(kXGRAD) contains the names of the gradient of parameters + // and inputs. + auto &pg_ig_names = Outputs(kXGRAD); auto &p_names = Inputs(kX); - PADDLE_ENFORCE_EQ(pg_names.size(), p_names.size()); - for (size_t param_id = 0; param_id < pg_names.size(); ++param_id) { - if (pg_names[param_id] == framework::kEmptyVarName) { + PADDLE_ENFORCE_EQ(pg_ig_names.size(), p_names.size()); + for (size_t param_id = 0; param_id < pg_ig_names.size(); ++param_id) { + if (pg_ig_names[param_id] == framework::kEmptyVarName) { continue; // parameter doesn't have gradient } auto inside_grad_name = framework::GradVarName(p_names[param_id]); + // for some grad_op, their input doesn't have gradient, + // for example lookup_table_grad_op, the input(Idx) doesn't have + // gradient. + auto pg_ig_var = cur_scope.FindVar(inside_grad_name); + PADDLE_ENFORCE(pg_ig_var != nullptr); + if (pg_ig_var->IsType()) { + auto pg_ig_lod_t_arr = + pg_ig_var->GetMutable(); + bool empty = true; + for (auto &each : *pg_ig_lod_t_arr) { + if (each.numel() != 0) { + empty = false; + break; + } + } + if (empty) { + LOG(WARNING) << pg_ig_names[param_id] + << " is not found in cur_scope."; + continue; + } + } + // // TODO(tonyyang-svail): Not sure we need the following // // If does not compute gradient of that variable inside rnn, // just @@ -194,6 +224,11 @@ class WhileGradOp : public framework::OperatorBase { if (cur_scope_iter == step_scopes->rbegin()) { auto *var = (*cur_scope_iter)->FindVar(inside_grad_name); PADDLE_ENFORCE_NOT_NULL(var, "Can not find var %s", inside_grad_name); + PADDLE_ENFORCE(var->IsType() || + var->IsType(), + "Currently the type of var only can be LoDTensorArray " + "or LoDTensor."); + if (var->IsType()) { auto &inside_tensor = var->Get(); framework::AttributeMap attrs; @@ -201,7 +236,7 @@ class WhileGradOp : public framework::OperatorBase { attrs["shape"] = framework::vectorize2int(inside_tensor.dims()); attrs["value"] = 0.0f; - auto var_name = pg_names[param_id]; + auto var_name = pg_ig_names[param_id]; auto zero_op = framework::OpRegistry::CreateOp( "fill_constant", framework::VariableNameMap{}, {{"Out", {var_name}}}, attrs); @@ -213,8 +248,8 @@ class WhileGradOp : public framework::OperatorBase { } auto new_inside_name = cur_scope.Rename(inside_grad_name); auto sum_op = framework::OpRegistry::CreateOp( - "sum", {{"X", {pg_names[param_id], new_inside_name}}}, - {{"Out", {pg_names[param_id]}}}, + "sum", {{"X", {pg_ig_names[param_id], new_inside_name}}}, + {{"Out", {pg_ig_names[param_id]}}}, framework::AttributeMap{{"use_mkldnn", {false}}}); sum_op->Run(cur_scope, dev_place); cur_scope.Rename(new_inside_name, inside_grad_name); @@ -281,6 +316,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { parent_block->FindVarRecursive(input_name) != nullptr)) { continue; } + output_grads.insert(input_name); } for (auto &output_name : op->OutputArgumentNames()) { @@ -309,13 +345,13 @@ class WhileGradOpVarTypeInference : public framework::VarTypeInference { void operator()(const framework::OpDesc &op_desc, framework::BlockDesc *block) const override { auto p_names = op_desc.Input(kX); - auto pg_names = op_desc.Output(framework::GradVarName(kX)); + auto pg_ig_names = op_desc.Output(framework::GradVarName(kX)); for (size_t i = 0; i < p_names.size(); ++i) { auto &p_var = detail::Ref(block->FindVarRecursive(p_names[i])); - auto *g_var = block->FindVarRecursive(pg_names[i]); + auto *g_var = block->FindVarRecursive(pg_ig_names[i]); if (g_var != nullptr) { // Gradient could be @EMPTY@ - VLOG(5) << "Setting " << pg_names[i] << " following " << p_names[i] + VLOG(5) << "Setting " << pg_ig_names[i] << " following " << p_names[i] << " type: " << p_var.GetType(); g_var->SetType(p_var.GetType()); g_var->SetDataType(p_var.GetDataType()); @@ -333,21 +369,21 @@ class WhileGradOpShapeInference : public framework::InferShapeBase { ctx->HasInputs(framework::GradVarName(kOutputs)); auto p_names = ctx->Inputs(kX); - auto pg_names = ctx->Outputs(kXGRAD); + auto pg_ig_names = ctx->Outputs(kXGRAD); auto var_types = ctx->GetInputsVarType(kX); std::vector names_to_set; std::vector dims_to_set; for (size_t i = 0; i < p_names.size(); ++i) { - if (pg_names[i] == framework::kEmptyVarName) { + if (pg_ig_names[i] == framework::kEmptyVarName) { continue; } auto dims = ctx->GetInputsElementDim(kX, i); if (var_types[i] == framework::proto::VarType::LOD_TENSOR) { - names_to_set.push_back(pg_names[i]); + names_to_set.push_back(pg_ig_names[i]); dims_to_set.push_back(dims); } else if (var_types[i] == framework::proto::VarType::LOD_TENSOR_ARRAY) { // not sure how to set the dim of LOD_TENSOR_ARRAY - names_to_set.push_back(pg_names[i]); + names_to_set.push_back(pg_ig_names[i]); dims_to_set.push_back(dims); } } diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 8bc30fc123163983f4bddc19af489920db93e0c0..1d081f89c491ae33e34ce282d7966ad6fe1cd51f 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -396,11 +396,6 @@ All parameter, weight, gradient are variables in Paddle. Prune(*prog_with_targets.Proto(), &pruned_desc); return new ProgramDesc(pruned_desc); }); - m.def("inference_optimize", [](ProgramDesc &origin) { - proto::ProgramDesc pruned_desc; - InferenceOptimize(*(origin.Proto()), &pruned_desc); - return new ProgramDesc(pruned_desc); - }); m.def("empty_var_name", []() { return std::string(framework::kEmptyVarName); }); m.def("grad_var_suffix", diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 77b9b36e68c88eab35bcc1a88ce08a7b5940d55f..f50a68c54114d5cce15418ad22f38c83163ba866 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -108,7 +108,15 @@ function cmake_gen() { fi fi fi - + + if [ "$SYSTEM" == "Darwin" ]; then + WITH_DISTRIBUTE=${WITH_DISTRIBUTE:-ON} + WITH_AVX=${WITH_AVX:-ON} + INFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR:-~/.cache/inference_demo} + else + INFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR:-/root/.cache/inference_demo} + fi + cat <=1.12,<=1.14 #TODO:change to ">=1.12" when numpy fix bug in 1.15 and higher version protobuf==3.1 recordio>=0.1.0 -matplotlib +matplotlib==2.2.3 # TODO: let python3 paddlepaddle package use latest matplotlib rarfile scipy>=0.19.0 Pillow