From 313bff6ba83843dde4a5a90f5af22e05ee2a523c Mon Sep 17 00:00:00 2001 From: Chang Xu Date: Thu, 17 Mar 2022 19:04:17 +0800 Subject: [PATCH] Revert "Fix truncated norm operator (#40287)" (#40614) This reverts commit 0c3335433525d4f156ee7afc475274df75a34736. --- .../ps/table/depends/initializers.h | 11 +++----- .../operators/truncated_gaussian_random_op.h | 17 ++++++++++-- .../truncated_gaussian_random_op_npu.cc | 9 ++----- .../truncated_gaussian_random_op_xpu.cc | 9 ++----- .../cpu/truncated_gaussian_random_kernel.cc | 9 ++----- .../gpu/truncated_gaussian_random_kernel.cu | 27 +++++++------------ .../truncated_gaussian_random_kernel.h | 14 ++++++++-- 7 files changed, 47 insertions(+), 49 deletions(-) diff --git a/paddle/fluid/distributed/ps/table/depends/initializers.h b/paddle/fluid/distributed/ps/table/depends/initializers.h index 5ac0c08f97d..f46e659a88b 100644 --- a/paddle/fluid/distributed/ps/table/depends/initializers.h +++ b/paddle/fluid/distributed/ps/table/depends/initializers.h @@ -23,6 +23,7 @@ #include "gflags/gflags.h" #include "paddle/fluid/framework/generator.h" + #include "paddle/fluid/operators/truncated_gaussian_random_op.h" namespace paddle { @@ -117,13 +118,9 @@ class TruncatedGaussianInitializer : public Initializer { seed_ = static_cast(std::stoi(attrs[1])); mean_ = std::stof(attrs[2]); std_ = std::stof(attrs[3]); - auto normal_cdf = [](float x) { - return (1.0 + std::erf(x / std::sqrt(2.0))) / 2.0; - }; - float a_normal_cdf = normal_cdf((-2.0 - mean_) / std_); - float b_normal_cdf = normal_cdf((2.0 - mean_) / std_); - std::uniform_real_distribution dist_(2.0 * a_normal_cdf - 1.0, - 2.0 * b_normal_cdf - 1.0); + + std::uniform_real_distribution dist_( + std::numeric_limits::min(), 1.0); random_engine_ = framework::GetCPURandomEngine(seed_); } diff --git a/paddle/fluid/operators/truncated_gaussian_random_op.h b/paddle/fluid/operators/truncated_gaussian_random_op.h index 8af6e281424..a6ff2f686cb 100644 --- a/paddle/fluid/operators/truncated_gaussian_random_op.h +++ b/paddle/fluid/operators/truncated_gaussian_random_op.h @@ -1,8 +1,11 @@ /* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -137,9 +140,19 @@ T Erfinv(T x) { template struct TruncatedNormal { T mean, std; - TruncatedNormal(T mean, T std) : mean(mean), std(std) {} + T a_normal_cdf; + T b_normal_cdf; + TruncatedNormal(T mean, T std) : mean(mean), std(std) { + auto normal_cdf = [](T x) { + return (1.0 + std::erf(x / std::sqrt(2.0))) / 2.0; + }; + a_normal_cdf = normal_cdf(-2.0); + b_normal_cdf = normal_cdf(2.0); + } + T operator()(T value) const { - return std::sqrt(2.0) * Erfinv(value) * std + mean; + auto p = a_normal_cdf + (b_normal_cdf - a_normal_cdf) * value; + return std::sqrt(2.0) * Erfinv(2 * p - 1) * std + mean; } }; diff --git a/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc b/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc index 4ed0dd22ec0..261d9cee2d5 100644 --- a/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc +++ b/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc @@ -84,13 +84,8 @@ class NPUTruncatedGaussianRandomKernel : public framework::OpKernel { Tensor cpu_tensor(tensor->dtype()); cpu_tensor.Resize(tensor->dims()); T* cpu_data = cpu_tensor.mutable_data(platform::CPUPlace()); - auto normal_cdf = [](float x) { - return (1.0 + std::erf(x / std::sqrt(2.0))) / 2.0; - }; - float a_normal_cdf = normal_cdf((-2.0 - mean) / std); - float b_normal_cdf = normal_cdf((2.0 - mean) / std); - std::uniform_real_distribution dist(2.0 * a_normal_cdf - 1.0, - 2.0 * b_normal_cdf - 1.0); + std::uniform_real_distribution dist(std::numeric_limits::min(), + 1.0); TruncatedNormal truncated_normal(mean, std); int64_t size = tensor->numel(); diff --git a/paddle/fluid/operators/truncated_gaussian_random_op_xpu.cc b/paddle/fluid/operators/truncated_gaussian_random_op_xpu.cc index 984d9f397cc..803b61fbe81 100644 --- a/paddle/fluid/operators/truncated_gaussian_random_op_xpu.cc +++ b/paddle/fluid/operators/truncated_gaussian_random_op_xpu.cc @@ -32,13 +32,8 @@ class XPUTruncatedGaussianRandomKernel : public framework::OpKernel { auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); - auto normal_cdf = [](float x) { - return (1.0 + std::erf(x / std::sqrt(2.0))) / 2.0; - }; - float a_normal_cdf = normal_cdf((-2.0 - mean) / std); - float b_normal_cdf = normal_cdf((2.0 - mean) / std); - std::uniform_real_distribution dist(2.0 * a_normal_cdf - 1.0, - 2.0 * b_normal_cdf - 1.0); + std::uniform_real_distribution dist(std::numeric_limits::min(), + 1.0); TruncatedNormal truncated_normal(mean, std); int64_t size = tensor->numel(); diff --git a/paddle/phi/kernels/cpu/truncated_gaussian_random_kernel.cc b/paddle/phi/kernels/cpu/truncated_gaussian_random_kernel.cc index ab3d3c2376b..4247e597ace 100644 --- a/paddle/phi/kernels/cpu/truncated_gaussian_random_kernel.cc +++ b/paddle/phi/kernels/cpu/truncated_gaussian_random_kernel.cc @@ -37,13 +37,8 @@ void TruncatedGaussianRandomKernel(const Context& dev_ctx, T* data = dev_ctx.template Alloc(tensor); - auto normal_cdf = [](float x) { - return (1.0 + std::erf(x / std::sqrt(2.0))) / 2.0; - }; - float a_normal_cdf = normal_cdf((-2.0 - mean) / std); - float b_normal_cdf = normal_cdf((2.0 - mean) / std); - std::uniform_real_distribution dist(2.0 * a_normal_cdf - 1.0, - 2.0 * b_normal_cdf - 1.0); + std::uniform_real_distribution dist(std::numeric_limits::min(), + 1.0); TruncatedNormal truncated_normal(mean, std); int64_t size = tensor->numel(); diff --git a/paddle/phi/kernels/gpu/truncated_gaussian_random_kernel.cu b/paddle/phi/kernels/gpu/truncated_gaussian_random_kernel.cu index bb04e7ee851..f27b32ca7b8 100644 --- a/paddle/phi/kernels/gpu/truncated_gaussian_random_kernel.cu +++ b/paddle/phi/kernels/gpu/truncated_gaussian_random_kernel.cu @@ -33,27 +33,23 @@ struct GPUTruncatedNormal { T mean, std; T a_normal_cdf; T b_normal_cdf; - unsigned int seed; T numeric_min; __host__ __device__ GPUTruncatedNormal(T mean, T std, T numeric_min, int seed) : mean(mean), std(std), seed(seed), numeric_min(numeric_min) { - auto normal_cdf = [](float x) { - return (1.0 + std::erf(x / std::sqrt(2.0))) / 2.0; - }; - a_normal_cdf = normal_cdf((-2.0 - mean) / std); - b_normal_cdf = normal_cdf((2.0 - mean) / std); + a_normal_cdf = (1.0 + erff(-2.0 / sqrtf(2.0))) / 2.0; + b_normal_cdf = (1.0 + erff(2.0 / sqrtf(2.0))) / 2.0; } __host__ __device__ T operator()(const unsigned int n) const { thrust::minstd_rand rng; rng.seed(seed); - thrust::uniform_real_distribution dist(2.0 * a_normal_cdf - 1.0, - 2.0 * b_normal_cdf - 1.0); + thrust::uniform_real_distribution dist(numeric_min, 1); rng.discard(n); T value = dist(rng); - return std::sqrt(2.0) * erfinvf(value) * std + mean; + auto p = a_normal_cdf + (b_normal_cdf - a_normal_cdf) * value; + return std::sqrt(2.0) * erfinvf(2 * p - 1) * std + mean; } }; @@ -73,21 +69,18 @@ struct TruncatedNormalOffset { seed(seed), numeric_min(numeric_min), offset_(offset) { - auto normal_cdf = [](float x) { - return (1.0 + std::erf(x / std::sqrt(2.0))) / 2.0; - }; - a_normal_cdf = normal_cdf((-2.0 - mean) / std); - b_normal_cdf = normal_cdf((2.0 - mean) / std); + a_normal_cdf = (1.0 + erff(-2.0 / sqrtf(2.0))) / 2.0; + b_normal_cdf = (1.0 + erff(2.0 / sqrtf(2.0))) / 2.0; } __host__ __device__ T operator()(const unsigned int n) const { thrust::minstd_rand rng; rng.seed(seed); - thrust::uniform_real_distribution dist(2.0 * a_normal_cdf - 1.0, - 2.0 * b_normal_cdf - 1.0); + thrust::uniform_real_distribution dist(numeric_min, 1); rng.discard(n + offset_); T value = dist(rng); - return std::sqrt(2.0) * erfinvf(value) * std + mean; + auto p = a_normal_cdf + (b_normal_cdf - a_normal_cdf) * value; + return std::sqrt(2.0) * erfinvf(2 * p - 1) * std + mean; } }; diff --git a/paddle/phi/kernels/truncated_gaussian_random_kernel.h b/paddle/phi/kernels/truncated_gaussian_random_kernel.h index c4c13578a98..f8547ced419 100644 --- a/paddle/phi/kernels/truncated_gaussian_random_kernel.h +++ b/paddle/phi/kernels/truncated_gaussian_random_kernel.h @@ -141,9 +141,19 @@ T Erfinv(T x) { template struct TruncatedNormal { T mean, std; - TruncatedNormal(T mean, T std) : mean(mean), std(std) {} + T a_normal_cdf; + T b_normal_cdf; + TruncatedNormal(T mean, T std) : mean(mean), std(std) { + auto normal_cdf = [](T x) { + return (1.0 + std::erf(x / std::sqrt(2.0))) / 2.0; + }; + a_normal_cdf = normal_cdf(-2.0); + b_normal_cdf = normal_cdf(2.0); + } + T operator()(T value) const { - return std::sqrt(2.0) * Erfinv(value) * std + mean; + auto p = a_normal_cdf + (b_normal_cdf - a_normal_cdf) * value; + return std::sqrt(2.0) * Erfinv(2 * p - 1) * std + mean; } }; -- GitLab