未验证 提交 2c7a8b92 编写于 作者: J Jiabin Yang 提交者: GitHub

Merge branch 'develop' into mac/fix_unittest_279_395

......@@ -32,6 +32,15 @@ Normal
:members:
:noindex:
.. _api_fluid_initializer_Normal:
TruncatedNormal
------
.. autoclass:: paddle.fluid.initializer.TruncatedNormal
:members:
:noindex:
.. _api_fluid_initializer_Xavier:
Xavier
......
......@@ -79,6 +79,7 @@ paddle.fluid.io.get_inference_program ArgSpec(args=['target_vars', 'main_program
paddle.fluid.initializer.ConstantInitializer.__init__ ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False))
paddle.fluid.initializer.UniformInitializer.__init__ ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0))
paddle.fluid.initializer.NormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0))
paddle.fluid.initializer.TruncatedNormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0))
paddle.fluid.initializer.XavierInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'fan_out', 'seed'], varargs=None, keywords=None, defaults=(True, None, None, 0))
paddle.fluid.initializer.BilinearInitializer.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.initializer.MSRAInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0))
......@@ -124,7 +125,7 @@ paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name
paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.edit_distance ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None))
paddle.fluid.layers.l2_normalize ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None))
paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'name'], varargs=None, keywords=None, defaults=(False, False, None))
paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None))
paddle.fluid.layers.topk ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_times'], varargs=None, keywords=None, defaults=(0, False))
paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
......@@ -168,6 +169,7 @@ paddle.fluid.layers.stack ArgSpec(args=['x', 'axis'], varargs=None, keywords=Non
paddle.fluid.layers.pad2d ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None))
paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
......
......@@ -25,6 +25,10 @@ void TransDataDevice(const Tensor &in, const platform::Place &dst_place,
in.place().which(), dst_place.which(),
"Currently, model parallelism is only supported between CPU and CUDA");
// NOTE(yy): TransDataDevice should wait for computation of input.
platform::DeviceContextPool::Instance().Get(in.place())->Wait();
platform::DeviceContextPool::Instance().Get(dst_place)->Wait();
// FIXME(zcd): TransDataDevice is used to transform data from GPU to CPU and
// the enforced checkings have been done in GetDeviceContext, so the
// `dev_ctx->Wait()` is necessary. But `dev_ctx->Wait()` will make the program
......
......@@ -129,6 +129,9 @@ class GradOpDescMakerBase {
std::string ForwardOpType() const { return this->fwd_op_.Type(); }
protected:
const OpDesc& ForwardOp() const { return fwd_op_; }
private:
const OpDesc& fwd_op_;
const std::unordered_set<std::string>& no_grad_set_;
......
......@@ -183,28 +183,5 @@ void Prune(const proto::ProgramDesc& input, proto::ProgramDesc* output) {
output->clear_blocks();
prune_impl(input, output, 0, -1, &dependent_vars);
}
void inference_optimize_impl(proto::ProgramDesc* input, int block_id) {
auto* op_field = input->mutable_blocks(block_id)->mutable_ops();
for (auto& op_desc : *op_field) {
for (auto& attr : *op_desc.mutable_attrs()) {
if (attr.name() == "is_test") {
attr.set_b(true);
break;
}
}
}
}
void InferenceOptimize(const proto::ProgramDesc& input,
proto::ProgramDesc* output) {
*output = input;
int num_blocks = output->blocks_size();
PADDLE_ENFORCE_GT(num_blocks, 0, "ProgramDesc must have at least one block");
for (int i = 0; i < num_blocks; ++i) {
inference_optimize_impl(output, i);
}
}
} // namespace framework
} // namespace paddle
......@@ -22,8 +22,5 @@ namespace framework {
void Prune(const proto::ProgramDesc& input, proto::ProgramDesc* output);
void InferenceOptimize(const proto::ProgramDesc& input,
proto::ProgramDesc* output);
} // namespace framework
} // namespace paddle
......@@ -25,7 +25,7 @@ namespace operators {
using LoD = framework::LoD;
class ArrayToLoDFunctor;
struct ArrayToLoDFunctor;
template <typename DeviceContext>
struct ArrayToLoDFunctorImpl {
const ArrayToLoDFunctor *prev_functor_;
......
......@@ -13,9 +13,45 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/elementwise_mul_op.h"
#include <string>
#include "paddle/fluid/operators/elementwise_op.h"
namespace paddle {
namespace operators {
class ElementwiseMulOpGradDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("elementwise_mul_grad");
op->SetInput("X", Input("X"));
op->SetInput("Y", Input("Y"));
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetAttrMap(Attrs());
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetOutput(framework::GradVarName("Y"), InputGrad("Y"));
return op;
}
};
class ElementwiseMulOpMaker : public ElementwiseOpMaker {
protected:
virtual std::string GetName() const { return "Mul"; }
virtual std::string GetEquation() const { return "Out = X \\\\odot Y"; }
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_ELEMWISE_OP(elementwise_mul, "Mul", "Out = X \\\\odot Y");
REGISTER_OPERATOR(elementwise_mul, ops::ElementwiseOp,
ops::ElementwiseMulOpMaker, ops::ElementwiseOpInferVarType,
ops::ElementwiseMulOpGradDescMaker);
REGISTER_OPERATOR(elementwise_mul_grad, ops::ElementwiseOpGrad);
REGISTER_OP_CPU_KERNEL(
elementwise_mul,
ops::ElementwiseMulKernel<paddle::platform::CPUDeviceContext, float>,
......
......@@ -93,8 +93,8 @@ class ElementwiseMulGradKernel : public ElemwiseGradKernel<T> {
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* out = ctx.Input<Tensor>("Out");
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* out = dout; // out is not necessary
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dy = ctx.Output<Tensor>(framework::GradVarName("Y"));
int axis = ctx.Attr<int>("axis");
......
......@@ -59,7 +59,8 @@ class MatMulKernel : public framework::OpKernel<T> {
RowMatrixFromVector(x.dims()), 0, context.Attr<bool>("transpose_X"));
auto mat_dim_b = math::CreateMatrixDescriptor(
ColumnMatrixFromVector(y.dims()), 0, context.Attr<bool>("transpose_Y"));
blas.MatMul(x, mat_dim_a, y, mat_dim_b, T(1), out, T(0));
auto scale = static_cast<T>(context.Attr<float>("alpha"));
blas.MatMul(x, mat_dim_a, y, mat_dim_b, scale, out, T(0));
}
};
......@@ -185,7 +186,8 @@ class MatMulGradKernel : public framework::OpKernel<T> {
auto blas = math::GetBlas<DeviceContext, T>(context);
auto mat_dim_a = math::CreateMatrixDescriptor(a.dims(), 0, trans_a);
auto mat_dim_b = math::CreateMatrixDescriptor(b.dims(), 0, trans_b);
blas.MatMul(a, mat_dim_a, b, mat_dim_b, T(1), out, T(0));
blas.MatMul(a, mat_dim_a, b, mat_dim_b,
static_cast<T>(context.Attr<float>("alpha")), out, T(0));
}
void CalcInputGrad(const framework::ExecutionContext &context,
......@@ -334,6 +336,7 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
R"DOC(If true, use the transpose of `Y`.
)DOC")
.SetDefault(false);
AddAttr<float>("alpha", "The scale of Out").SetDefault(1.0f);
AddComment(R"DOC(
MatMul Operator.
......
......@@ -156,12 +156,29 @@ class MulGradOp : public framework::OperatorWithKernel {
}
};
class MulOpGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> retv(new framework::OpDesc());
retv->SetType("mul_grad");
retv->SetInput("X", Input("X"));
retv->SetInput("Y", Input("Y"));
retv->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
retv->SetOutput(framework::GradVarName("X"), InputGrad("X"));
retv->SetOutput(framework::GradVarName("Y"), InputGrad("Y"));
retv->SetAttrMap(Attrs());
return retv;
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(mul, ops::MulOp, ops::MulOpMaker, ops::MulOpGradMaker);
REGISTER_OPERATOR(mul_grad, ops::MulGradOp);
REGISTER_OP_CPU_KERNEL(
mul, ops::MulKernel<paddle::platform::CPUDeviceContext, float>,
......
......@@ -52,6 +52,12 @@ $$Out = scale*X$$
)DOC");
AddAttr<float>("scale", "The scaling factor of the scale operator.")
.SetDefault(1.0);
AddAttr<float>("bias", "The bias of the scale operator.").SetDefault(0.0);
AddAttr<bool>(
"bias_after_scale",
"Apply bias addition after or before scaling. It is useful for "
"numeric stability in some circumstances.")
.SetDefault(true);
}
};
......@@ -80,6 +86,8 @@ class ScaleGradMaker : public framework::SingleGradOpDescMaker {
grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttr("scale", GetAttr("scale"));
grad_op->SetAttr("bias", 0.0f);
grad_op->SetAttr("bias_after_scale", true);
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
......
......@@ -34,6 +34,8 @@ class ScaleKernel : public framework::OpKernel<T> {
"in and out should have the same dim");
auto scale = static_cast<T>(ctx.Attr<float>("scale"));
auto bias = static_cast<T>(ctx.Attr<float>("bias"));
auto bias_after_scale = ctx.Attr<bool>("bias_after_scale");
if (in_var->IsType<framework::SelectedRows>() && in_var != out_var) {
auto& in_slr = in_var->Get<framework::SelectedRows>();
......@@ -45,7 +47,11 @@ class ScaleKernel : public framework::OpKernel<T> {
auto eigen_out = framework::EigenVector<T>::Flatten(*out);
auto eigen_in = framework::EigenVector<T>::Flatten(*in);
auto& dev = *ctx.template device_context<DeviceContext>().eigen_device();
eigen_out.device(dev) = scale * eigen_in;
if (bias_after_scale) {
eigen_out.device(dev) = scale * eigen_in + bias;
} else {
eigen_out.device(dev) = scale * (eigen_in + bias);
}
}
};
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <limits>
#include <random>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
// reference: https://gist.github.com/lakshayg/d80172fe5ae3c5d2c2aedb53c250320e
template <typename T>
T Erfinv(T x) {
if (x < -1 || x > 1) {
return std::numeric_limits<T>::quiet_NaN();
} else if (x == 1.0) {
return std::numeric_limits<T>::infinity();
} else if (x == -1.0) {
return -std::numeric_limits<T>::infinity();
}
const T LN2 = 6.931471805599453094172321214581e-1;
const T A0 = 1.1975323115670912564578e0;
const T A1 = 4.7072688112383978012285e1;
const T A2 = 6.9706266534389598238465e2;
const T A3 = 4.8548868893843886794648e3;
const T A4 = 1.6235862515167575384252e4;
const T A5 = 2.3782041382114385731252e4;
const T A6 = 1.1819493347062294404278e4;
const T A7 = 8.8709406962545514830200e2;
const T B0 = 1.0000000000000000000e0;
const T B1 = 4.2313330701600911252e1;
const T B2 = 6.8718700749205790830e2;
const T B3 = 5.3941960214247511077e3;
const T B4 = 2.1213794301586595867e4;
const T B5 = 3.9307895800092710610e4;
const T B6 = 2.8729085735721942674e4;
const T B7 = 5.2264952788528545610e3;
const T C0 = 1.42343711074968357734e0;
const T C1 = 4.63033784615654529590e0;
const T C2 = 5.76949722146069140550e0;
const T C3 = 3.64784832476320460504e0;
const T C4 = 1.27045825245236838258e0;
const T C5 = 2.41780725177450611770e-1;
const T C6 = 2.27238449892691845833e-2;
const T C7 = 7.74545014278341407640e-4;
const T D0 = 1.4142135623730950488016887e0;
const T D1 = 2.9036514445419946173133295e0;
const T D2 = 2.3707661626024532365971225e0;
const T D3 = 9.7547832001787427186894837e-1;
const T D4 = 2.0945065210512749128288442e-1;
const T D5 = 2.1494160384252876777097297e-2;
const T D6 = 7.7441459065157709165577218e-4;
const T D7 = 1.4859850019840355905497876e-9;
const T E0 = 6.65790464350110377720e0;
const T E1 = 5.46378491116411436990e0;
const T E2 = 1.78482653991729133580e0;
const T E3 = 2.96560571828504891230e-1;
const T E4 = 2.65321895265761230930e-2;
const T E5 = 1.24266094738807843860e-3;
const T E6 = 2.71155556874348757815e-5;
const T E7 = 2.01033439929228813265e-7;
const T F0 = 1.414213562373095048801689e0;
const T F1 = 8.482908416595164588112026e-1;
const T F2 = 1.936480946950659106176712e-1;
const T F3 = 2.103693768272068968719679e-2;
const T F4 = 1.112800997078859844711555e-3;
const T F5 = 2.611088405080593625138020e-5;
const T F6 = 2.010321207683943062279931e-7;
const T F7 = 2.891024605872965461538222e-15;
T abs_x = abs(x);
if (abs_x <= 0.85) {
T r = 0.180625 - 0.25 * x * x;
T num =
(((((((A7 * r + A6) * r + A5) * r + A4) * r + A3) * r + A2) * r + A1) *
r +
A0);
T den =
(((((((B7 * r + B6) * r + B5) * r + B4) * r + B3) * r + B2) * r + B1) *
r +
B0);
return x * num / den;
}
T r = sqrt(LN2 - log(1.0 - abs_x));
T num, den;
if (r <= 5.0) {
r = r - 1.6;
num =
(((((((C7 * r + C6) * r + C5) * r + C4) * r + C3) * r + C2) * r + C1) *
r +
C0);
den =
(((((((D7 * r + D6) * r + D5) * r + D4) * r + D3) * r + D2) * r + D1) *
r +
D0);
} else {
r = r - 5.0;
num =
(((((((E7 * r + E6) * r + E5) * r + E4) * r + E3) * r + E2) * r + E1) *
r +
E0);
den =
(((((((F7 * r + F6) * r + F5) * r + F4) * r + F3) * r + F2) * r + F1) *
r +
F0);
}
if (x < 0) {
return -num / den;
} else {
return num / den;
}
}
template <typename T>
struct TruncatedNormal {
T mean, std;
T a_normal_cdf;
T b_normal_cdf;
TruncatedNormal(T mean, T std) : mean(mean), std(std) {
auto normal_cdf = [](T x) {
return (1.0 + std::erf(x / std::sqrt(2.0))) / 2.0;
};
a_normal_cdf = normal_cdf(-2.0);
b_normal_cdf = normal_cdf(2.0);
}
T operator()(T value) const {
auto p = a_normal_cdf + (b_normal_cdf - a_normal_cdf) * value;
return (std::sqrt(2.0) * Erfinv(2 * p - 1) + mean) * std;
}
};
template <typename T>
class CPUTruncatedGaussianRandomKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
float mean = context.Attr<float>("mean");
float std = context.Attr<float>("std");
auto* tensor = context.Output<framework::Tensor>("Out");
T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
std::minstd_rand engine;
if (seed == 0) {
seed = std::random_device()();
}
engine.seed(seed);
std::uniform_real_distribution<T> dist(std::numeric_limits<float>::min(),
1.0);
TruncatedNormal<T> truncated_normal(mean, std);
int64_t size = tensor->numel();
for (int64_t i = 0; i < size; ++i) {
data[i] = truncated_normal(dist(engine));
}
}
};
class TruncatedGaussianRandomOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(
ctx->HasOutput("Out"),
"Output(Out) of TruncatedGaussianRandomOp should not be null.");
auto shape = ctx->Attrs().Get<std::vector<int>>("shape");
std::vector<int64_t> out_dim;
out_dim.reserve(shape.size());
for (auto dim : shape) {
out_dim.push_back(static_cast<int64_t>(dim));
}
PADDLE_ENFORCE(shape.size() > 0UL,
"shape can be one int or array. shape must be set.");
ctx->SetOutputDim("Out", framework::make_ddim(out_dim));
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout{framework::DataLayout::kAnyLayout};
return framework::OpKernelType(
static_cast<framework::proto::VarType::Type>(ctx.Attr<int>("dtype")),
ctx.device_context(), layout, library);
}
};
class TruncatedGaussianRandomOpMaker
: public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddOutput("Out", "Output tensor of truncated gaussian random op.");
AddAttr<std::vector<int>>("shape",
"(vector<int>) "
"The dimension of random tensor.");
AddAttr<float>("mean",
"(float, default 0.0) "
"mean of random tensor.")
.SetDefault(.0f);
AddAttr<float>("std",
"(float, default 1.0) "
"std of random tensor.")
.SetDefault(1.0f);
AddAttr<int>("seed",
"(int, default 0) "
"Random seed of generator."
"0 means use system wide seed."
"Note that if seed is not 0, this operator will always "
"generate the same random numbers every time.")
.SetDefault(0);
AddAttr<int>("dtype",
"(int, default 5(FP32)) "
"Output data type.")
.SetDefault(framework::proto::VarType::FP32);
AddComment(R"DOC(
TruncatedGaussianRandom Operator.
Used to initialize tensors with truncated gaussian random generator.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(truncated_gaussian_random,
ops::TruncatedGaussianRandomOp,
ops::TruncatedGaussianRandomOpMaker);
REGISTER_OP_CPU_KERNEL(truncated_gaussian_random,
ops::CPUTruncatedGaussianRandomKernel<float>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <thrust/random.h>
#include <thrust/transform.h>
#include <limits>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
namespace paddle {
namespace operators {
template <typename T>
struct TruncatedNormal {
T mean, std;
T a_normal_cdf;
T b_normal_cdf;
unsigned int seed;
T numeric_min;
__host__ __device__ TruncatedNormal(T mean, T std, T numeric_min, int seed)
: mean(mean), std(std), seed(seed), numeric_min(numeric_min) {
a_normal_cdf = (1.0 + erff(-2.0 / sqrtf(2.0))) / 2.0;
b_normal_cdf = (1.0 + erff(2.0 / sqrtf(2.0))) / 2.0;
}
__host__ __device__ T operator()(const unsigned int n) const {
thrust::minstd_rand rng;
rng.seed(seed);
thrust::uniform_real_distribution<T> dist(numeric_min, 1);
rng.discard(n);
T value = dist(rng);
auto p = a_normal_cdf + (b_normal_cdf - a_normal_cdf) * value;
return (std::sqrt(2.0) * erfinvf(2 * p - 1) + mean) * std;
}
};
template <typename T>
class GPUTruncatedGaussianRandomKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* tensor = context.Output<framework::Tensor>("Out");
T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
if (seed == 0) {
std::random_device rd;
seed = rd();
}
T mean = static_cast<T>(context.Attr<float>("mean"));
T std = static_cast<T>(context.Attr<float>("std"));
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
int64_t size = tensor->numel();
thrust::transform(
index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data),
TruncatedNormal<T>(mean, std, std::numeric_limits<T>::min(), seed));
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_CUDA_KERNEL(
truncated_gaussian_random,
paddle::operators::GPUTruncatedGaussianRandomKernel<float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include "paddle/fluid/framework/executor.h"
......@@ -138,6 +138,10 @@ class WhileGradOp : public framework::OperatorBase {
auto inside_og_name = inside_og_names[i];
VLOG(8) << "Linking outside " << outside_og_name << " --> inside "
<< inside_og_name;
if (scope.FindVar(outside_og_name) == nullptr) {
continue;
}
auto &og_outside =
detail::Ref(scope.FindVar(outside_og_name),
"Cannot find Outside Gradient %s", outside_og_name);
......@@ -167,20 +171,46 @@ class WhileGradOp : public framework::OperatorBase {
PADDLE_ENFORCE_EQ(inside_array[j].numel(), 0);
}
}
} else {
PADDLE_THROW("Currently only support LoDTensor and LoDTensorArray.");
}
}
executor.RunPreparedContext(ctx.get(), *cur_scope_iter, false, true,
true);
auto &pg_names = Outputs(kXGRAD);
// The Outputs(kXGRAD) contains the names of the gradient of parameters
// and inputs.
auto &pg_ig_names = Outputs(kXGRAD);
auto &p_names = Inputs(kX);
PADDLE_ENFORCE_EQ(pg_names.size(), p_names.size());
for (size_t param_id = 0; param_id < pg_names.size(); ++param_id) {
if (pg_names[param_id] == framework::kEmptyVarName) {
PADDLE_ENFORCE_EQ(pg_ig_names.size(), p_names.size());
for (size_t param_id = 0; param_id < pg_ig_names.size(); ++param_id) {
if (pg_ig_names[param_id] == framework::kEmptyVarName) {
continue; // parameter doesn't have gradient
}
auto inside_grad_name = framework::GradVarName(p_names[param_id]);
// for some grad_op, their input doesn't have gradient,
// for example lookup_table_grad_op, the input(Idx) doesn't have
// gradient.
auto pg_ig_var = cur_scope.FindVar(inside_grad_name);
PADDLE_ENFORCE(pg_ig_var != nullptr);
if (pg_ig_var->IsType<framework::LoDTensorArray>()) {
auto pg_ig_lod_t_arr =
pg_ig_var->GetMutable<framework::LoDTensorArray>();
bool empty = true;
for (auto &each : *pg_ig_lod_t_arr) {
if (each.numel() != 0) {
empty = false;
break;
}
}
if (empty) {
LOG(WARNING) << pg_ig_names[param_id]
<< " is not found in cur_scope.";
continue;
}
}
// // TODO(tonyyang-svail): Not sure we need the following
// // If does not compute gradient of that variable inside rnn,
// just
......@@ -194,6 +224,11 @@ class WhileGradOp : public framework::OperatorBase {
if (cur_scope_iter == step_scopes->rbegin()) {
auto *var = (*cur_scope_iter)->FindVar(inside_grad_name);
PADDLE_ENFORCE_NOT_NULL(var, "Can not find var %s", inside_grad_name);
PADDLE_ENFORCE(var->IsType<framework::LoDTensorArray>() ||
var->IsType<LoDTensor>(),
"Currently the type of var only can be LoDTensorArray "
"or LoDTensor.");
if (var->IsType<LoDTensor>()) {
auto &inside_tensor = var->Get<framework::LoDTensor>();
framework::AttributeMap attrs;
......@@ -201,7 +236,7 @@ class WhileGradOp : public framework::OperatorBase {
attrs["shape"] = framework::vectorize2int(inside_tensor.dims());
attrs["value"] = 0.0f;
auto var_name = pg_names[param_id];
auto var_name = pg_ig_names[param_id];
auto zero_op = framework::OpRegistry::CreateOp(
"fill_constant", framework::VariableNameMap{},
{{"Out", {var_name}}}, attrs);
......@@ -213,8 +248,8 @@ class WhileGradOp : public framework::OperatorBase {
}
auto new_inside_name = cur_scope.Rename(inside_grad_name);
auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {pg_names[param_id], new_inside_name}}},
{{"Out", {pg_names[param_id]}}},
"sum", {{"X", {pg_ig_names[param_id], new_inside_name}}},
{{"Out", {pg_ig_names[param_id]}}},
framework::AttributeMap{{"use_mkldnn", {false}}});
sum_op->Run(cur_scope, dev_place);
cur_scope.Rename(new_inside_name, inside_grad_name);
......@@ -281,6 +316,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
parent_block->FindVarRecursive(input_name) != nullptr)) {
continue;
}
output_grads.insert(input_name);
}
for (auto &output_name : op->OutputArgumentNames()) {
......@@ -309,13 +345,13 @@ class WhileGradOpVarTypeInference : public framework::VarTypeInference {
void operator()(const framework::OpDesc &op_desc,
framework::BlockDesc *block) const override {
auto p_names = op_desc.Input(kX);
auto pg_names = op_desc.Output(framework::GradVarName(kX));
auto pg_ig_names = op_desc.Output(framework::GradVarName(kX));
for (size_t i = 0; i < p_names.size(); ++i) {
auto &p_var = detail::Ref(block->FindVarRecursive(p_names[i]));
auto *g_var = block->FindVarRecursive(pg_names[i]);
auto *g_var = block->FindVarRecursive(pg_ig_names[i]);
if (g_var != nullptr) { // Gradient could be @EMPTY@
VLOG(5) << "Setting " << pg_names[i] << " following " << p_names[i]
VLOG(5) << "Setting " << pg_ig_names[i] << " following " << p_names[i]
<< " type: " << p_var.GetType();
g_var->SetType(p_var.GetType());
g_var->SetDataType(p_var.GetDataType());
......@@ -333,21 +369,21 @@ class WhileGradOpShapeInference : public framework::InferShapeBase {
ctx->HasInputs(framework::GradVarName(kOutputs));
auto p_names = ctx->Inputs(kX);
auto pg_names = ctx->Outputs(kXGRAD);
auto pg_ig_names = ctx->Outputs(kXGRAD);
auto var_types = ctx->GetInputsVarType(kX);
std::vector<std::string> names_to_set;
std::vector<framework::DDim> dims_to_set;
for (size_t i = 0; i < p_names.size(); ++i) {
if (pg_names[i] == framework::kEmptyVarName) {
if (pg_ig_names[i] == framework::kEmptyVarName) {
continue;
}
auto dims = ctx->GetInputsElementDim(kX, i);
if (var_types[i] == framework::proto::VarType::LOD_TENSOR) {
names_to_set.push_back(pg_names[i]);
names_to_set.push_back(pg_ig_names[i]);
dims_to_set.push_back(dims);
} else if (var_types[i] == framework::proto::VarType::LOD_TENSOR_ARRAY) {
// not sure how to set the dim of LOD_TENSOR_ARRAY
names_to_set.push_back(pg_names[i]);
names_to_set.push_back(pg_ig_names[i]);
dims_to_set.push_back(dims);
}
}
......
......@@ -396,11 +396,6 @@ All parameter, weight, gradient are variables in Paddle.
Prune(*prog_with_targets.Proto(), &pruned_desc);
return new ProgramDesc(pruned_desc);
});
m.def("inference_optimize", [](ProgramDesc &origin) {
proto::ProgramDesc pruned_desc;
InferenceOptimize(*(origin.Proto()), &pruned_desc);
return new ProgramDesc(pruned_desc);
});
m.def("empty_var_name",
[]() { return std::string(framework::kEmptyVarName); });
m.def("grad_var_suffix",
......
......@@ -108,7 +108,15 @@ function cmake_gen() {
fi
fi
fi
if [ "$SYSTEM" == "Darwin" ]; then
WITH_DISTRIBUTE=${WITH_DISTRIBUTE:-ON}
WITH_AVX=${WITH_AVX:-ON}
INFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR:-~/.cache/inference_demo}
else
INFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR:-/root/.cache/inference_demo}
fi
cat <<EOF
========================================
Configuring cmake in /paddle/build ...
......@@ -136,7 +144,7 @@ function cmake_gen() {
-DWITH_CONTRIB=${WITH_CONTRIB:-ON}
-DWITH_INFERENCE=${WITH_INFERENCE:-ON}
-DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON}
-DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR:-/root/.cache/inference_demo}
-DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR}
-DWITH_ANAKIN=${WITH_ANAKIN:-OFF}
-DPY_VERSION=${PY_VERSION:-2.7}
========================================
......@@ -168,9 +176,10 @@ EOF
-DWITH_CONTRIB=${WITH_CONTRIB:-ON} \
-DWITH_INFERENCE=${WITH_INFERENCE:-ON} \
-DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} \
-DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR:-/root/.cache/inference_demo} \
-DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} \
-DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \
-DPY_VERSION=${PY_VERSION:-2.7}
}
function abort(){
......@@ -232,8 +241,8 @@ function build_mac() {
============================================
EOF
make clean
sudo make -j 8
sudo make install -j 8
make -j 8
make install -j 8
}
function build_android() {
......
......@@ -1738,8 +1738,6 @@ class Program(object):
Returns:
Program: The new program.
"""
# this is an alternative implement before
# core.inference_optimize being fixed.
res = Program()
res.desc = core.ProgramDesc(self.desc)
......
......@@ -20,10 +20,10 @@ import contextlib
from .core import VarDesc
__all__ = [
'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'MSRA',
'force_init_on_cpu', 'init_on_cpu', 'ConstantInitializer',
'UniformInitializer', 'NormalInitializer', 'XavierInitializer',
'BilinearInitializer', 'MSRAInitializer'
'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier', 'Bilinear',
'MSRA', 'force_init_on_cpu', 'init_on_cpu', 'ConstantInitializer',
'UniformInitializer', 'NormalInitializer', 'TruncatedNormalInitializer',
'XavierInitializer', 'BilinearInitializer', 'MSRAInitializer'
]
_force_init_on_cpu_ = False
......@@ -33,6 +33,8 @@ def force_init_on_cpu():
"""
The flag of whether force to init variables on CPU.
Returns::
Examples:
.. code-block:: python
......@@ -272,6 +274,60 @@ class NormalInitializer(Initializer):
return op
class TruncatedNormalInitializer(Initializer):
"""Implements the Random TruncatedNormal(Gaussian) distribution initializer
Args:
loc (float): mean of the normal distribution
scale (float): standard deviation of the normal distribution
seed (int): random seed
Examples:
.. code-block:: python
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.TruncatedNormal(loc=0.0, scale=2.0))
"""
def __init__(self, loc=0.0, scale=1.0, seed=0):
assert loc is not None
assert scale is not None
assert seed is not None
super(NormalInitializer, self).__init__()
self._mean = loc
self._std_dev = scale
self._seed = seed
def __call__(self, var, block):
"""Add truncated normal distribution initialization ops for a variable
Args:
var: Variable that needs to be initialized
block: The block in which initialization ops
should be added
Returns:
the initialization op
"""
assert isinstance(var, framework.Variable)
assert isinstance(block, framework.Block)
# Initialization Ops should be prepended and not appended
if self._seed == 0:
self._seed = block.program.random_seed
op = block._prepend_op(
type="truncated_gaussian_random",
outputs={"Out": var},
attrs={
"shape": var.shape,
"dtype": int(var.dtype),
"mean": self._mean,
"std": self._std_dev,
"seed": self._seed
})
var.op = op
return op
class XavierInitializer(Initializer):
"""
This class implements the Xavier weight initializer from the paper
......@@ -583,6 +639,7 @@ class BilinearInitializer(Initializer):
Constant = ConstantInitializer
Uniform = UniformInitializer
Normal = NormalInitializer
TruncatedNormal = TruncatedNormalInitializer
Xavier = XavierInitializer
MSRA = MSRAInitializer
Bilinear = BilinearInitializer
......@@ -113,6 +113,7 @@ __all__ = [
'pad2d',
'unstack',
'sequence_enumerate',
'expand',
'sequence_concat',
]
......@@ -3498,7 +3499,7 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
return out
def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None):
"""
Applies matrix multiplication to two tensors.
......@@ -3532,6 +3533,7 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
y (Variable): The input variable which is a Tensor or LoDTensor.
transpose_x (bool): Whether to transpose :math:`x` before multiplication.
transpose_y (bool): Whether to transpose :math:`y` before multiplication.
alpha (float): The scale of output. Default 1.0.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
......@@ -3599,8 +3601,11 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
inputs={'X': x,
'Y': y},
outputs={'Out': out},
attrs={'transpose_X': transpose_x,
'transpose_Y': transpose_y})
attrs={
'transpose_X': transpose_x,
'transpose_Y': transpose_y,
'alpha': alpha,
})
return out
......@@ -6118,3 +6123,53 @@ def unstack(x, axis=0, num=None):
attrs={'axis': axis,
'num': num})
return outs
def expand(x, expand_times, name=None):
"""Expand operator tiles the input by given times number. You should set times
number for each dimension by providing attribute 'expand_times'. The rank of X
should be in [1, 6]. Please note that size of 'expand_times' must be the same
with X's rank. Following is a using case:
.. code-block:: text
Input(X) is a 3-D tensor with shape [2, 3, 1]:
[
[[1], [2], [3]],
[[4], [5], [6]]
]
Attr(expand_times): [1, 2, 2]
Output(Out) is a 3-D tensor with shape [2, 6, 2]:
[
[[1, 1], [2, 2], [3, 3], [1, 1], [2, 2], [3, 3]],
[[4, 4], [5, 5], [6, 6], [4, 4], [5, 5], [6, 6]]
]
Args:
x (Variable): A tensor with rank in [1, 6].
expand_times (list|tuple): Expand times number for each dimension.
Returns:
Variable: The expanded variable which is a LoDTensor. After expanding, size of each dimension of Output(Out) is equal to ithe size of the corresponding dimension of Input(X) multiplying the corresponding value given by expand_times.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[10], dtype='float32')
out = fluid.layers.expand(x=x, expand_times=[1, 2, 2])
"""
helper = LayerHelper('expand', input=x, **locals())
dtype = helper.input_dtype(input_param_name='x')
out = helper.create_tmp_variable(dtype)
helper.append_op(
type='expand',
inputs={'X': x},
outputs={'Out': out},
attrs={'expand_times': expand_times})
return out
......@@ -72,15 +72,14 @@ endforeach(TEST_OP)
py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR} SERIAL)
if(WITH_DISTRIBUTE)
py_test_modules(test_dist_train MODULES test_dist_train SERIAL)
if (NOT APPLE)
set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20)
set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20)
if(NOT APPLE)
set_tests_properties(test_dist_mnist PROPERTIES TIMEOUT 200)
set_tests_properties(test_dist_word2vec PROPERTIES TIMEOUT 200)
py_test_modules(test_dist_se_resnext MODULES test_dist_se_resnext SERIAL)
endif(NOT APPLE)
py_test_modules(test_dist_transpiler MODULES test_dist_transpiler)
py_test_modules(test_dist_transformer MODULES test_dist_transformer SERIAL)
endif()
py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL)
py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL)
......
......@@ -144,6 +144,142 @@ class TestDynRNN(unittest.TestCase):
# loss should be small after 100 mini-batch
self.assertLess(val[0], loss_0[0])
# this unit test is just used to the two layer nested dyn_rnn.
def test_train_nested_dyn_rnn(self):
word_dict = [i for i in range(30)]
def fake_reader():
seq_len, label = [[2, 2]], [0, 1]
data = []
for ele in seq_len:
for j in ele:
data.append([numpy.random.randint(30) \
for _ in range(j)])
while True:
yield data, label
train_data = paddle.batch(fake_reader, batch_size=2)
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
sentence = fluid.layers.data(
name='word', shape=[1], dtype='int64', lod_level=2)
label = fluid.layers.data(
name='label', shape=[1], dtype='float32', lod_level=1)
rnn = fluid.layers.DynamicRNN()
with rnn.block():
in_ = rnn.step_input(sentence)
sent_emb = fluid.layers.embedding(
input=in_, size=[len(word_dict), 32], dtype='float32')
out_ = fluid.layers.fc(input=sent_emb, size=100, act='tanh')
rnn1 = fluid.layers.DynamicRNN()
with rnn1.block():
in_1 = rnn1.step_input(out_)
out_1 = fluid.layers.fc(input=[in_1], size=100, act='tanh')
rnn1.output(out_1)
last = fluid.layers.sequence_last_step(input=rnn1())
rnn.output(last)
last = rnn()
logits = fluid.layers.fc(input=last, size=1, act=None)
loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=logits, label=label)
loss = fluid.layers.mean(loss)
sgd = fluid.optimizer.SGD(1e-3)
#sgd = fluid.optimizer.Adam(1e-3)
sgd.minimize(loss=loss)
cpu = fluid.CPUPlace()
exe = fluid.Executor(cpu)
exe.run(startup_program)
feeder = fluid.DataFeeder(feed_list=[sentence, label], place=cpu)
data = next(train_data())
val = exe.run(main_program, feed=feeder.feed(data),
fetch_list=[loss])[0]
for _ in range(100):
val = exe.run(main_program,
feed=feeder.feed(data),
fetch_list=[loss])[0]
print(val)
# this unit test is just used to the two layer nested dyn_rnn.
def test_train_nested_dyn_rnn2(self):
word_dict = [i for i in range(30)]
def fake_reader():
seq_len, label = [[2, 2]], [0, 1]
data = []
for ele in seq_len:
for j in ele:
data.append([numpy.random.randint(30) \
for _ in range(j)])
while True:
yield data, label
train_data = paddle.batch(fake_reader, batch_size=2)
hidden_size = 32
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
sentence = fluid.layers.data(
name='word', shape=[1], dtype='int64', lod_level=2)
label = fluid.layers.data(
name='label', shape=[1], dtype='float32', lod_level=1)
rnn = fluid.layers.DynamicRNN()
with rnn.block():
in_ = rnn.step_input(sentence)
sent_emb = fluid.layers.embedding(
input=in_,
size=[len(word_dict), hidden_size],
dtype='float32')
input_forward_proj = fluid.layers.fc(input=sent_emb,
size=hidden_size * 4,
act=None,
bias_attr=False)
forward, _ = fluid.layers.dynamic_lstm(
input=input_forward_proj,
size=hidden_size * 4,
use_peepholes=False)
rnn1 = fluid.layers.DynamicRNN()
with rnn1.block():
in_1 = rnn1.step_input(forward)
out_1 = fluid.layers.fc(input=[in_1], size=100, act='tanh')
rnn1.output(out_1)
last = fluid.layers.sequence_last_step(input=rnn1())
rnn.output(last)
last = rnn()
logits = fluid.layers.fc(input=last, size=1, act=None)
loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=logits, label=label)
loss = fluid.layers.mean(loss)
sgd = fluid.optimizer.SGD(1e-3)
#sgd = fluid.optimizer.Adam(1e-3)
sgd.minimize(loss=loss)
cpu = fluid.CPUPlace()
exe = fluid.Executor(cpu)
exe.run(startup_program)
feeder = fluid.DataFeeder(feed_list=[sentence, label], place=cpu)
data = next(train_data())
val = exe.run(main_program, feed=feeder.feed(data),
fetch_list=[loss])[0]
for _ in range(100):
val = exe.run(main_program,
feed=feeder.feed(data),
fetch_list=[loss])[0]
if __name__ == '__main__':
unittest.main()
......@@ -565,6 +565,13 @@ class TestBook(unittest.TestCase):
out = layers.cross_entropy(x, label, False, 4)
self.assertIsNotNone(out)
def test_expand(self):
program = Program()
with program_guard(program):
x = layers.data(name="input", shape=[10], dtype='int32')
out = layers.expand(x, [1, 2])
print(str(program))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.op import Operator
from paddle.fluid.executor import Executor
class TestTrunctedGaussianRandomOp(unittest.TestCase):
def setUp(self):
self.op_type = "truncated_gaussian_random"
self.inputs = {}
self.attrs = {
"shape": [10000],
"mean": .0,
"std": 1.,
"seed": 10,
}
self.outputs = ["Out"]
def test_cpu(self):
self.gaussian_random_test(place=fluid.CPUPlace())
def test_gpu(self):
if core.is_compiled_with_cuda():
self.gaussian_random_test(place=fluid.CUDAPlace(0))
def gaussian_random_test(self, place):
program = fluid.Program()
block = program.global_block()
vout = block.create_var(name="Out")
op = block.append_op(
type=self.op_type, outputs={"Out": vout}, attrs=self.attrs)
op.desc.infer_var_type(block.desc)
op.desc.infer_shape(block.desc)
fetch_list = []
for var_name in self.outputs:
fetch_list.append(block.var(var_name))
exe = Executor(place)
outs = exe.run(program, fetch_list=fetch_list)
tensor = outs[0]
self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1)
self.assertAlmostEqual(numpy.var(tensor), 0.773, delta=0.1)
if __name__ == "__main__":
unittest.main()
......@@ -2,7 +2,7 @@ requests==2.9.2
numpy>=1.12,<=1.14 #TODO:change to ">=1.12" when numpy fix bug in 1.15 and higher version
protobuf==3.1
recordio>=0.1.0
matplotlib
matplotlib==2.2.3 # TODO: let python3 paddlepaddle package use latest matplotlib
rarfile
scipy>=0.19.0
Pillow
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册