diff --git a/.gitignore b/.gitignore index b92bb9cc129659fa502b4a9b55548992412e5429..90138f996cf9cacc3c1cbff0cf2600eefca3f305 100644 --- a/.gitignore +++ b/.gitignore @@ -25,5 +25,6 @@ third_party/ bazel-* third_party/ +build_* # clion workspace. cmake-build-* diff --git a/CMakeLists.txt b/CMakeLists.txt index 24262c1821dab88bdf7202349fcde0e9dd6a0820..df00e977ebb547980e69ee421779c57717d771a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,7 +72,7 @@ option(WITH_INFERENCE "Compile fluid inference library" ON) option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF) option(WITH_SYSTEM_BLAS "Use system blas library" OFF) option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION}) -option(WITH_FAST_MATH "Make use of fast math library" OFF) +option(WITH_FAST_MATH "Make use of fast math library, might affect the precision to some extent" ON) # PY_VERSION if(NOT PY_VERSION) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 6418da2a7e51c51575ff56aeabedff5452458fbc..c6dd919a93d119723b389d3a695f0af82d711a06 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -198,6 +198,9 @@ paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None) +paddle.fluid.layers.has_inf ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) +paddle.fluid.layers.has_nan ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) +paddle.fluid.layers.isfinite ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)) paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)) diff --git a/paddle/fluid/framework/data_type.h b/paddle/fluid/framework/data_type.h index 8ad2fb5f3ffd9641932bbbb024a31e81d31dc9bb..d5be43b33edab7871e1bba930a4fc6cd1e293825 100644 --- a/paddle/fluid/framework/data_type.h +++ b/paddle/fluid/framework/data_type.h @@ -17,7 +17,6 @@ limitations under the License. */ #include #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/platform/enforce.h" - #include "paddle/fluid/platform/float16.h" namespace paddle { diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 17f942571d0141537e992be9ab73847d2a794698..b29ac44699463312a1fdcea55e003daa75997302 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -50,6 +50,27 @@ class CompileTimeInferShapeContext : public InferShapeContext { const std::vector &Outputs( const std::string &name) const override; + void ShareDim(const std::string &in, const std::string &out, size_t i = 0, + size_t j = 0) override { + PADDLE_ENFORCE_LT(i, Inputs(in).size()); + PADDLE_ENFORCE_LT(j, Outputs(out).size()); + const std::string &input_n = Inputs(in)[i]; + const std::string &output_n = Outputs(out)[j]; + + PADDLE_ENFORCE(input_n != framework::kEmptyVarName, "The %s[%d] is @EMPTY@", + in, i); + PADDLE_ENFORCE(output_n != framework::kEmptyVarName, + "The %s[%d] is @EMPTY@", out, j); + + auto *in_var = block_.FindVarRecursive(input_n); + auto *out_var = block_.FindVarRecursive(output_n); + + PADDLE_ENFORCE(in_var->GetType() == out_var->GetType(), + "The type of %s and %s is not the same.", input_n, output_n); + + SetDim(output_n, GetDim(input_n)); + } + void ShareLoD(const std::string &in, const std::string &out, size_t i = 0, size_t j = 0) const override { PADDLE_ENFORCE_LT(i, Inputs(in).size()); diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 6666dd8e60a509c3f82f66f832ecce03811e9fb6..9f930065324f13f5aa79c214e820fb6fc2f3a166 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -542,6 +542,36 @@ class RuntimeInferShapeContext : public InferShapeContext { return op_.Outputs(name); } + void ShareDim(const std::string& in, const std::string& out, size_t i = 0, + size_t j = 0) override { + PADDLE_ENFORCE_LT(i, Inputs(in).size()); + PADDLE_ENFORCE_LT(j, Outputs(out).size()); + const std::string& input_n = Inputs(in)[i]; + const std::string& output_n = Outputs(out)[j]; + + Variable* in_var = scope_.FindVar(input_n); + Variable* out_var = scope_.FindVar(output_n); + PADDLE_ENFORCE(in_var->Type() == out_var->Type(), + "The type of %s and %s is not the same.", output_n, + GetDim(input_n)); + + if (in_var->IsType()) { + auto& in_sele_rows = in_var->Get(); + auto out_sele_rows = out_var->GetMutable(); + out_sele_rows->mutable_value()->Resize(in_sele_rows.value().dims()); + out_sele_rows->set_rows(in_sele_rows.rows()); + out_sele_rows->set_height(in_sele_rows.height()); + } else if (in_var->IsType()) { + auto& in_lod_tensor = in_var->Get(); + auto* out_lod_tensor = out_var->GetMutable(); + out_lod_tensor->Resize(in_lod_tensor.dims()); + } else { + PADDLE_THROW( + "Currently, the input type of ShareDim only can be LoDTensor " + "or SelectedRows."); + } + } + void ShareLoD(const std::string& in, const std::string& out, size_t i = 0, size_t j = 0) const override { const std::vector& inputs = Inputs(in); diff --git a/paddle/fluid/framework/shape_inference.h b/paddle/fluid/framework/shape_inference.h index 5f497cafa0f75f7c23d550ef767d55274de7c900..280bc19dce7b604d67aefdc572de96b479b8d2d7 100644 --- a/paddle/fluid/framework/shape_inference.h +++ b/paddle/fluid/framework/shape_inference.h @@ -56,6 +56,9 @@ class InferShapeContext { virtual const std::vector &Outputs( const std::string &name) const = 0; + virtual void ShareDim(const std::string &in, const std::string &out, + size_t i = 0, size_t j = 0) = 0; + virtual void ShareLoD(const std::string &in, const std::string &out, size_t i = 0, size_t j = 0) const = 0; diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index 05c4a17a01c6fabe48f3fe18544c13153feb0673..1d7a2eb5b38255531880fe3d2e5321024caf0c6b 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -165,10 +165,12 @@ inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor, } template -struct AnyVisitor : public boost::static_visitor { +class AnyVisitor : public boost::static_visitor { + private: const framework::Tensor& tensor_; Predicate predicate_; + public: AnyVisitor(const framework::Tensor& tensor, Predicate predicate) : tensor_(tensor), predicate_(std::move(predicate)) {} @@ -206,6 +208,27 @@ struct AnyVisitor : public boost::static_visitor { } }; +template +class AnyOutVisitor : public boost::static_visitor<> { + private: + const framework::Tensor& tensor_; + mutable framework::Tensor* out_; + Predicate predicate_; + + public: + AnyOutVisitor(const framework::Tensor& tensor, Predicate predicate, + framework::Tensor* out) + : tensor_(tensor), out_(out), predicate_(std::move(predicate)) {} + + template + void operator()(const Place& place) const { + auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place); + out_->Resize({1}); + out_->mutable_data(place); + AnyImpl(predicate_, tensor_, *ctx, out_); + } +}; + template inline bool Any(const framework::Tensor& tensor, Predicate predicate) { AnyVisitor visitor(tensor, predicate); @@ -213,6 +236,14 @@ inline bool Any(const framework::Tensor& tensor, Predicate predicate) { return platform::VisitPlace(place, visitor); } +template +inline void Any(const framework::Tensor& tensor, Predicate predicate, + framework::Tensor* out) { + AnyOutVisitor visitor(tensor, predicate, out); + auto place = tensor.place(); + platform::VisitPlace(place, visitor); +} + struct ContainsNANPredicate { template auto operator()(const T& eigen_vec) const @@ -227,6 +258,12 @@ bool TensorContainsNAN(const framework::Tensor& tensor) { return Any(tensor, predicate); } +void TensorContainsNAN(const framework::Tensor& tensor, + framework::Tensor* out) { + ContainsNANPredicate predicate; + Any(tensor, predicate, out); +} + struct ContainsInfPredicate { template auto operator()(const T& eigen_vec) const @@ -241,6 +278,71 @@ bool TensorContainsInf(const framework::Tensor& tensor) { return Any(tensor, predicate); } +void TensorContainsInf(const framework::Tensor& tensor, + framework::Tensor* out) { + ContainsInfPredicate predicate; + Any(tensor, predicate, out); +} + +// NOTE(dzhwinter): +// Isfinite need a AllVisitor to loop through all the elements. +// We choose two cuda call instead of one allvisitor. The AllVisitor +// should be implemented if the performance hurts. +bool TensorIsfinite(const framework::Tensor& tensor) { + ContainsInfPredicate pred_inf; + ContainsNANPredicate pred_nan; + return !Any(tensor, pred_inf) && !Any(tensor, pred_nan); +} + +#ifdef PADDLE_WITH_CUDA +template +static inline void __global__ BothFalse(const T* cmp, T* out) { + out[0] = (!cmp[0]) && (!out[0]); +} +#endif + +struct BothFalseVisitor : public boost::static_visitor<> { + const framework::Tensor& in_; + mutable framework::Tensor* out_; + BothFalseVisitor(const framework::Tensor& in, framework::Tensor* out) + : in_(in), out_(out) {} + + template + void operator()(const Place& place) const { + VisitorImpl(place); + } + + void VisitorImpl(const platform::CUDAPlace& gpu) const { +#ifdef PADDLE_WITH_CUDA + auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(gpu); + BothFalse<<<1, 1, 0, ctx->stream()>>>(in_.data(), + out_->mutable_data(gpu)); +#endif + } + + void VisitorImpl(const platform::CPUPlace& cpu) const { + bool lhs = !in_.data()[0]; + bool rhs = !out_->mutable_data(cpu)[0]; + out_->mutable_data(cpu)[0] = lhs && rhs; + } + + void VisitorImpl( + const platform::CUDAPinnedPlace& cpu /* equals to cpu*/) const { + bool lhs = !in_.data()[0]; + bool rhs = !out_->mutable_data(cpu)[0]; + out_->mutable_data(cpu)[0] = lhs && rhs; + } +}; + +void TensorIsfinite(const framework::Tensor& tensor, framework::Tensor* out) { + framework::Tensor tmp; + TensorContainsInf(tensor, &tmp); + TensorContainsNAN(tensor, out); + BothFalseVisitor visitor(tmp, out); + auto place = tensor.place(); + platform::VisitPlace(place, visitor); +} + void TensorToStream(std::ostream& os, const Tensor& tensor, const platform::DeviceContext& dev_ctx) { { // the 1st field, uint32_t version diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index 4457382ade37a12f5f3613fc4113fbf1f6f91124..cab6d9b67e4e64335be0a386bfffb7ebe4373b3e 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -57,8 +57,15 @@ void TensorToVector(const Tensor& src, const platform::DeviceContext& ctx, template void TesnorToVector(const Tensor& src, std::vector* dst); +// copy the result bool to cpu bool TensorContainsNAN(const framework::Tensor& tensor); bool TensorContainsInf(const framework::Tensor& tensor); +bool TensorIsfinite(const framework::Tensor& tensor); + +// store the result bool in gpu tensor, async operation. Faster than above ones. +void TensorContainsNAN(const framework::Tensor& tensor, framework::Tensor* out); +void TensorContainsInf(const framework::Tensor& tensor, framework::Tensor* out); +void TensorIsfinite(const framework::Tensor& tensor, framework::Tensor* out); void TensorToStream(std::ostream& os, const Tensor& tensor, const platform::DeviceContext& dev_ctx); diff --git a/paddle/fluid/framework/tensor_util_test.cc b/paddle/fluid/framework/tensor_util_test.cc index 6e10885890cd2d4a0d77834944b37e291197b637..a1e5b967a86d10f3439db662af54bb82888027b9 100644 --- a/paddle/fluid/framework/tensor_util_test.cc +++ b/paddle/fluid/framework/tensor_util_test.cc @@ -36,7 +36,7 @@ TEST(TensorCopy, Tensor) { TensorCopy(src_tensor, *cpu_place, &dst_tensor); const int* dst_ptr = dst_tensor.data(); - ASSERT_NE(src_ptr, dst_ptr); + EXPECT_NE(src_ptr, dst_ptr); for (size_t i = 0; i < 9; ++i) { EXPECT_EQ(src_ptr[i], dst_ptr[i]); } @@ -47,7 +47,7 @@ TEST(TensorCopy, Tensor) { TensorCopy(slice_tensor, *cpu_place, &dst_tensor); const int* slice_ptr = slice_tensor.data(); dst_ptr = dst_tensor.data(); - ASSERT_NE(dst_ptr, slice_ptr); + EXPECT_NE(dst_ptr, slice_ptr); for (size_t i = 0; i < 3; ++i) { EXPECT_EQ(dst_ptr[i], slice_ptr[i]); } @@ -77,7 +77,7 @@ TEST(TensorCopy, Tensor) { // Sync before Compare Tensors gpu_ctx.Wait(); const int* dst_ptr = dst_tensor.data(); - ASSERT_NE(src_ptr, dst_ptr); + EXPECT_NE(src_ptr, dst_ptr); for (size_t i = 0; i < 9; ++i) { EXPECT_EQ(src_ptr[i], dst_ptr[i]); } @@ -94,7 +94,7 @@ TEST(TensorCopy, Tensor) { gpu_ctx.Wait(); const int* slice_ptr = slice_tensor.data(); dst_ptr = dst_tensor.data(); - ASSERT_NE(dst_ptr, slice_ptr); + EXPECT_NE(dst_ptr, slice_ptr); for (size_t i = 0; i < 3; ++i) { EXPECT_EQ(dst_ptr[i], slice_ptr[i]); } @@ -117,7 +117,7 @@ TEST(TensorFromVector, Tensor) { // Compare Tensors const int* cpu_ptr = cpu_tensor.data(); const int* src_ptr = src_vec.data(); - ASSERT_NE(src_ptr, cpu_ptr); + EXPECT_NE(src_ptr, cpu_ptr); for (size_t i = 0; i < 9; ++i) { EXPECT_EQ(src_ptr[i], cpu_ptr[i]); } @@ -127,7 +127,7 @@ TEST(TensorFromVector, Tensor) { paddle::framework::TensorFromVector(src_vec, &cpu_tensor); cpu_ptr = cpu_tensor.data(); src_ptr = src_vec.data(); - ASSERT_NE(src_ptr, cpu_ptr); + EXPECT_NE(src_ptr, cpu_ptr); for (size_t i = 0; i < 5; ++i) { EXPECT_EQ(src_ptr[i], cpu_ptr[i]); } @@ -161,8 +161,8 @@ TEST(TensorFromVector, Tensor) { const int* src_ptr = src_vec.data(); const int* cpu_ptr = cpu_tensor.data(); const int* dst_ptr = dst_tensor.data(); - ASSERT_NE(src_ptr, cpu_ptr); - ASSERT_NE(src_ptr, dst_ptr); + EXPECT_NE(src_ptr, cpu_ptr); + EXPECT_NE(src_ptr, dst_ptr); for (size_t i = 0; i < 9; ++i) { EXPECT_EQ(src_ptr[i], cpu_ptr[i]); EXPECT_EQ(src_ptr[i], dst_ptr[i]); @@ -181,8 +181,8 @@ TEST(TensorFromVector, Tensor) { src_ptr = src_vec.data(); cpu_ptr = cpu_tensor.data(); dst_ptr = dst_tensor.data(); - ASSERT_NE(src_ptr, cpu_ptr); - ASSERT_NE(src_ptr, dst_ptr); + EXPECT_NE(src_ptr, cpu_ptr); + EXPECT_NE(src_ptr, dst_ptr); for (size_t i = 0; i < 5; ++i) { EXPECT_EQ(src_ptr[i], cpu_ptr[i]); EXPECT_EQ(src_ptr[i], dst_ptr[i]); @@ -235,9 +235,9 @@ TEST(TensorContainsNAN, CPU) { buf[0] = 0.0; buf[1] = NAN; buf[2] = 0.0; - ASSERT_TRUE(paddle::framework::TensorContainsNAN(src)); + EXPECT_TRUE(paddle::framework::TensorContainsNAN(src)); buf[1] = 0.0; - ASSERT_FALSE(paddle::framework::TensorContainsNAN(src)); + EXPECT_FALSE(paddle::framework::TensorContainsNAN(src)); } { @@ -248,9 +248,9 @@ TEST(TensorContainsNAN, CPU) { buf[0] = 0.0; buf[1].x = 0x7fff; buf[2] = 0.0; - ASSERT_TRUE(paddle::framework::TensorContainsNAN(src)); + EXPECT_TRUE(paddle::framework::TensorContainsNAN(src)); buf[1] = 0.0; - ASSERT_FALSE(paddle::framework::TensorContainsNAN(src)); + EXPECT_FALSE(paddle::framework::TensorContainsNAN(src)); } } @@ -261,9 +261,9 @@ TEST(TensorContainsInf, CPU) { buf[0] = 1.0; buf[1] = INFINITY; buf[2] = 0.0; - ASSERT_TRUE(paddle::framework::TensorContainsInf(src)); + EXPECT_TRUE(paddle::framework::TensorContainsInf(src)); buf[1] = 1.0; - ASSERT_FALSE(paddle::framework::TensorContainsInf(src)); + EXPECT_FALSE(paddle::framework::TensorContainsInf(src)); } { @@ -274,9 +274,55 @@ TEST(TensorContainsInf, CPU) { buf[0] = 1.0; buf[1].x = 0x7c00; buf[2] = 0.0; - ASSERT_TRUE(paddle::framework::TensorContainsInf(src)); + EXPECT_TRUE(paddle::framework::TensorContainsInf(src)); buf[1] = 1.0; - ASSERT_FALSE(paddle::framework::TensorContainsInf(src)); + EXPECT_FALSE(paddle::framework::TensorContainsInf(src)); + } +} + +TEST(TensorIsfinite, CPU) { + { + paddle::framework::Tensor src, out; + double* buf = src.mutable_data({3}, paddle::platform::CPUPlace()); + buf[0] = 1.0; + buf[1] = INFINITY; + buf[2] = 0.0; + paddle::framework::TensorIsfinite(src, &out); + EXPECT_EQ(out.data()[0], false); + buf[1] = 1.0; + paddle::framework::TensorIsfinite(src, &out); + EXPECT_EQ(out.data()[0], true); + } + + { + paddle::framework::Tensor src, out; + double* buf = src.mutable_data({3}, paddle::platform::CPUPlace()); + buf[0] = 1.0; + buf[1] = NAN; + buf[2] = 0.0; + paddle::framework::TensorIsfinite(src, &out); + EXPECT_EQ(out.data()[0], false); + buf[1] = 1.0; + paddle::framework::TensorIsfinite(src, &out); + EXPECT_EQ(out.data()[0], true); + } + + { + paddle::framework::Tensor src, out; + paddle::platform::float16* buf = + src.mutable_data( + {3}, paddle::platform::CPUPlace()); + buf[0] = 1.0; + buf[1].x = 0x7c00; + buf[2] = 0.0; + paddle::framework::TensorIsfinite(src, &out); + EXPECT_EQ(out.data()[0], false); + buf[1] = 1.0; + paddle::framework::TensorIsfinite(src, &out); + EXPECT_EQ(out.data()[0], true); + buf[1].x = 0x7fff; + paddle::framework::TensorIsfinite(src, &out); + EXPECT_EQ(out.data()[0], false); } } @@ -299,9 +345,9 @@ TEST(Tensor, FromAndToStream) { TensorFromStream(iss, &dst_tensor, cpu_ctx); int* dst_ptr = dst_tensor.mutable_data(platform::CPUPlace()); for (int i = 0; i < 5; ++i) { - ASSERT_EQ(dst_ptr[i], array[i]); + EXPECT_EQ(dst_ptr[i], array[i]); } - ASSERT_EQ(dst_tensor.dims(), src_tensor.dims()); + EXPECT_EQ(dst_tensor.dims(), src_tensor.dims()); delete place; } #ifdef PADDLE_WITH_CUDA @@ -323,7 +369,7 @@ TEST(Tensor, FromAndToStream) { int* dst_ptr = dst_tensor.mutable_data(platform::CPUPlace()); for (int i = 0; i < 6; ++i) { - ASSERT_EQ(dst_ptr[i], array[i]); + EXPECT_EQ(dst_ptr[i], array[i]); } delete gpu_place; } diff --git a/paddle/fluid/framework/tensor_util_test.cu b/paddle/fluid/framework/tensor_util_test.cu index b4cff1e6c2293fa44f0fd0bb398a538c08dd4fb1..a51f74199e714b8606c9766c57bc6b1dc4c73c65 100644 --- a/paddle/fluid/framework/tensor_util_test.cu +++ b/paddle/fluid/framework/tensor_util_test.cu @@ -27,9 +27,9 @@ static __global__ void FillNAN(float* buf) { } static __global__ void FillInf(float* buf) { - buf[0] = 0.0; - buf[1] = INFINITY; - buf[2] = 0.5; + buf[0] = INFINITY; + buf[1] = 0.1; + buf[2] = 0.2; } static __global__ void FillNAN(platform::float16* buf) { @@ -44,6 +44,18 @@ static __global__ void FillInf(platform::float16* buf) { buf[2] = 0.5; } +static __global__ void FillFinite(float* buf) { + buf[0] = 0.0; + buf[1] = 0.1; + buf[2] = 0.2; +} + +static __global__ void FillFinite(platform::float16* buf) { + buf[0] = 0.0; + buf[1] = 0.1; + buf[2] = 0.2; +} + TEST(TensorContainsNAN, GPU) { paddle::platform::CUDAPlace gpu(0); auto& pool = paddle::platform::DeviceContextPool::Instance(); @@ -86,5 +98,163 @@ TEST(TensorContainsInf, GPU) { } } +TEST(TensorIsfinite, GPU) { + paddle::platform::CUDAPlace gpu(0); + using paddle::platform::float16; + auto& pool = paddle::platform::DeviceContextPool::Instance(); + auto* cuda_ctx = pool.GetByPlace(gpu); + // contains inf + { + Tensor tensor; + float* buf = tensor.mutable_data({3}, gpu); + FillInf<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + EXPECT_TRUE(!TensorIsfinite(tensor)); + } + { + Tensor tensor; + float16* buf = tensor.mutable_data({3}, gpu); + FillInf<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + EXPECT_TRUE(!TensorIsfinite(tensor)); + } + + // contains nan + { + Tensor tensor; + float* buf = tensor.mutable_data({3}, gpu); + FillNAN<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + EXPECT_TRUE(!TensorIsfinite(tensor)); + } + { + Tensor tensor; + float16* buf = tensor.mutable_data({3}, gpu); + FillNAN<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + EXPECT_TRUE(!TensorIsfinite(tensor)); + } + + // all element are finite + { + Tensor tensor; + float* buf = tensor.mutable_data({3}, gpu); + FillFinite<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + EXPECT_TRUE(TensorIsfinite(tensor)); + } + { + Tensor tensor; + float16* buf = tensor.mutable_data({3}, gpu); + FillFinite<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + EXPECT_TRUE(TensorIsfinite(tensor)); + } +} + +TEST(TensorContainsInf, GPUWithoutWait) { + paddle::platform::CUDAPlace gpu(0); + auto& pool = paddle::platform::DeviceContextPool::Instance(); + auto* cuda_ctx = pool.GetByPlace(gpu); + { + Tensor tensor, out; + float* buf = tensor.mutable_data({3}, gpu); + FillInf<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + TensorContainsInf(tensor, &out); + platform::CPUPlace cpu; + Tensor tmp; + TensorCopy(out, cpu, *cuda_ctx, &tmp); + cuda_ctx->Wait(); + ASSERT_EQ(tmp.data()[0], true); + } + { + Tensor tensor, out; + paddle::platform::float16* buf = + tensor.mutable_data({3}, gpu); + FillInf<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + TensorContainsInf(tensor, &out); + platform::CPUPlace cpu; + Tensor tmp; + TensorCopy(out, cpu, *cuda_ctx, &tmp); + cuda_ctx->Wait(); + ASSERT_EQ(tmp.data()[0], true); + } +} + +TEST(TensorContainsNAN, GPUWithoutWait) { + paddle::platform::CUDAPlace gpu(0); + auto& pool = paddle::platform::DeviceContextPool::Instance(); + auto* cuda_ctx = pool.GetByPlace(gpu); + { + Tensor tensor, out; + float* buf = tensor.mutable_data({3}, gpu); + FillNAN<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + TensorContainsNAN(tensor, &out); + platform::CPUPlace cpu; + Tensor tmp; + TensorCopy(out, cpu, *cuda_ctx, &tmp); + cuda_ctx->Wait(); + ASSERT_EQ(tmp.data()[0], true); + } + { + Tensor tensor, out; + paddle::platform::float16* buf = + tensor.mutable_data({3}, gpu); + FillNAN<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + TensorContainsNAN(tensor, &out); + platform::CPUPlace cpu; + Tensor tmp; + TensorCopy(out, cpu, *cuda_ctx, &tmp); + cuda_ctx->Wait(); + ASSERT_EQ(tmp.data()[0], true); + } +} + +TEST(TensorIsfinite, GPUWithoutWait) { + paddle::platform::CUDAPlace gpu(0); + auto& pool = paddle::platform::DeviceContextPool::Instance(); + auto* cuda_ctx = pool.GetByPlace(gpu); + { + Tensor tensor, out; + float* buf = tensor.mutable_data({3}, gpu); + FillInf<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + TensorIsfinite(tensor, &out); + platform::CPUPlace cpu; + Tensor tmp; + TensorCopy(out, cpu, *cuda_ctx, &tmp); + cuda_ctx->Wait(); + EXPECT_EQ(tmp.data()[0], false); + } + { + Tensor tensor, out; + float* buf = tensor.mutable_data({3}, gpu); + FillNAN<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + TensorIsfinite(tensor, &out); + platform::CPUPlace cpu; + Tensor tmp; + TensorCopy(out, cpu, *cuda_ctx, &tmp); + cuda_ctx->Wait(); + EXPECT_EQ(tmp.data()[0], false); + } + { + Tensor tensor, out; + float* buf = tensor.mutable_data({3}, gpu); + FillFinite<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + TensorIsfinite(tensor, &out); + platform::CPUPlace cpu; + Tensor tmp; + TensorCopy(out, cpu, *cuda_ctx, &tmp); + cuda_ctx->Wait(); + EXPECT_EQ(tmp.data()[0], true); + } +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc index 360f924810a570422db5a00b13939813fa73e2fa..8058d7e881025b1d806efe187d4523adadff367d 100644 --- a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc +++ b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc @@ -22,8 +22,8 @@ limitations under the License. */ #include #include #include //NOLINT + #include "paddle/fluid/inference/paddle_inference_api.h" -#include "paddle/fluid/platform/enforce.h" DEFINE_string(dirname, "", "Directory of the inference model."); DEFINE_bool(use_gpu, false, "Whether use gpu."); @@ -62,17 +62,17 @@ void Main(bool use_gpu) { CHECK(predictor->Run(slots, &outputs)); //# 4. Get output. - PADDLE_ENFORCE(outputs.size(), 1UL); + CHECK_EQ(outputs.size(), 1UL); // Check the output buffer size and result of each tid. - PADDLE_ENFORCE(outputs.front().data.length(), 33168UL); + CHECK_EQ(outputs.front().data.length(), 33168UL); float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815, 0.000932706}; const size_t num_elements = outputs.front().data.length() / sizeof(float); // The outputs' buffers are in CPU memory. for (size_t i = 0; i < std::min(static_cast(5), num_elements); i++) { - PADDLE_ENFORCE(static_cast(outputs.front().data.data())[i], - result[i]); + CHECK_NEAR(static_cast(outputs.front().data.data())[i], result[i], + 0.001); } } } @@ -108,9 +108,9 @@ void MainThreads(int num_threads, bool use_gpu) { CHECK(predictor->Run(inputs, &outputs)); // 4. Get output. - PADDLE_ENFORCE(outputs.size(), 1UL); + CHECK_EQ(outputs.size(), 1UL); // Check the output buffer size and result of each tid. - PADDLE_ENFORCE(outputs.front().data.length(), 33168UL); + CHECK_EQ(outputs.front().data.length(), 33168UL); float result[5] = {0.00129761, 0.00151112, 0.000423564, 0.00108815, 0.000932706}; const size_t num_elements = @@ -118,8 +118,8 @@ void MainThreads(int num_threads, bool use_gpu) { // The outputs' buffers are in CPU memory. for (size_t i = 0; i < std::min(static_cast(5), num_elements); i++) { - PADDLE_ENFORCE(static_cast(outputs.front().data.data())[i], - result[i]); + CHECK_NEAR(static_cast(outputs.front().data.data())[i], + result[i], 0.001); } } }); diff --git a/paddle/fluid/inference/api/demo_ci/vis_demo.cc b/paddle/fluid/inference/api/demo_ci/vis_demo.cc index 3800d49b34738d5a272033d75cb415ae9ad1fb8f..fb59cea457027854a099574c867299450690e61c 100644 --- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc +++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc @@ -17,11 +17,12 @@ limitations under the License. */ */ #include -#include // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files. +#include // use glog instead of CHECK to avoid importing other paddle header files. #include #include + +// #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/inference/demo_ci/utils.h" -#include "paddle/fluid/platform/enforce.h" #ifdef PADDLE_WITH_CUDA DECLARE_double(fraction_of_gpu_memory_to_use); @@ -78,18 +79,17 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) { size_t numel = output.data.length() / PaddleDtypeSize(output.dtype); VLOG(3) << "predictor output numel " << numel; VLOG(3) << "reference output numel " << refer.data.size(); - PADDLE_ENFORCE_EQ(numel, refer.data.size()); + CHECK_EQ(numel, refer.data.size()); switch (output.dtype) { case PaddleDType::INT64: { for (size_t i = 0; i < numel; ++i) { - PADDLE_ENFORCE_EQ(static_cast(output.data.data())[i], - refer.data[i]); + CHECK_EQ(static_cast(output.data.data())[i], refer.data[i]); } break; } case PaddleDType::FLOAT32: for (size_t i = 0; i < numel; ++i) { - PADDLE_ENFORCE_LT( + CHECK_LT( fabs(static_cast(output.data.data())[i] - refer.data[i]), 1e-5); } diff --git a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc index 8add7a59da613564d657a6fce329a089bab7e799..290fb007d8ba94a2d121947fe67c6474586ac0e0 100644 --- a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc @@ -27,9 +27,6 @@ void SetConfig(AnalysisConfig *cfg) { cfg->device = 0; cfg->enable_ir_optim = true; cfg->specify_input_name = true; -#ifdef PADDLE_WITH_MKLDNN - cfg->_use_mkldnn = true; -#endif } void SetInput(std::vector> *inputs) { diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index c091476d6d132db17a656d5c8dee65e3a88d9ac2..bbf52bea1358c32596ab6f14eeaa419735d19fc6 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -80,7 +80,7 @@ class ActivationOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); } @@ -91,12 +91,26 @@ class ActivationOp : public framework::OperatorWithKernel { } }; +class ActivationOpInferVarType : public framework::VarTypeInference { + public: + void operator()(const framework::OpDesc& op_desc, + framework::BlockDesc* block) const override { + auto x_name = op_desc.Input("X")[0]; + auto out_name = op_desc.Output("Out")[0]; + auto& x = block->FindRecursiveOrCreateVar(x_name); + auto& out = block->FindRecursiveOrCreateVar(out_name); + out.SetType(x.GetType()); + out.SetDataType(x.GetDataType()); + } +}; + class ActivationOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("Out")); + ctx->ShareDim("Out", framework::GradVarName("X")); + ctx->ShareLoD("Out", framework::GradVarName("X")); } protected: @@ -525,12 +539,14 @@ namespace ops = paddle::operators; #define REGISTER_INPLACE_ACTIVATION_OP(OP_NAME, KERNEL_TYPE) \ REGISTER_OPERATOR(KERNEL_TYPE, ::paddle::operators::ActivationOp, \ ::paddle::operators::OP_NAME##OpMaker, \ + ::paddle::operators::ActivationOpInferVarType, \ ::paddle::operators::OP_NAME##GradMaker); \ REGISTER_OPERATOR(KERNEL_TYPE##_grad, ::paddle::operators::ActivationOpGrad) #define REGISTER_ACTIVATION_OP(OP_NAME, KERNEL_TYPE) \ REGISTER_OPERATOR(KERNEL_TYPE, ::paddle::operators::ActivationOp, \ ::paddle::operators::OP_NAME##OpMaker, \ + ::paddle::operators::ActivationOpInferVarType, \ ::paddle::framework::DefaultGradOpDescMaker); \ REGISTER_OPERATOR(KERNEL_TYPE##_grad, ::paddle::operators::ActivationOpGrad) diff --git a/paddle/fluid/operators/argsort_op.cc b/paddle/fluid/operators/argsort_op.cc index a2f5a2545701991263c1ef842e9275b1edbfd2ca..d25160f4232b5a621d16b9f469f56bd5aa7c88e3 100644 --- a/paddle/fluid/operators/argsort_op.cc +++ b/paddle/fluid/operators/argsort_op.cc @@ -42,8 +42,8 @@ class ArgsortOp : public framework::OperatorWithKernel { "-rank(Input(X)) (%d).", axis, num_dims); - ctx->SetOutputDim("Out", in_dims); - ctx->SetOutputDim("Indices", in_dims); + ctx->ShareDim("X", "Out"); + ctx->ShareDim("X", "Indices"); ctx->ShareLoD("X", "Out"); ctx->ShareLoD("X", "Indices"); } diff --git a/paddle/fluid/operators/conv_shift_op.cc b/paddle/fluid/operators/conv_shift_op.cc index f2549e814d6f3b5674fe2eec1139f1c3dc6fa0b4..08506ddd18ed35831702814e70962cb36ec958b1 100644 --- a/paddle/fluid/operators/conv_shift_op.cc +++ b/paddle/fluid/operators/conv_shift_op.cc @@ -44,7 +44,7 @@ class ConvShiftOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LE(y_dims[1], x_dims[1], "The 2nd dimension of Input(Y) should be less than or " "equal to the 2nd dimension of Input(X)."); - ctx->SetOutputDim("Out", x_dims); + ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); } }; diff --git a/paddle/fluid/operators/cub_reduce.h b/paddle/fluid/operators/cub_reduce.h index 16fdad775f7befaac04b1ac59a601f04e0ab2bdc..afd3922b8d6537ee16dc5041a838858089adbdb1 100644 --- a/paddle/fluid/operators/cub_reduce.h +++ b/paddle/fluid/operators/cub_reduce.h @@ -22,6 +22,7 @@ #include // NOLINT #include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/tensor_util.h" namespace paddle { namespace operators { @@ -293,7 +294,12 @@ void TensorReduce(const framework::Tensor& x, framework::Tensor* y, } auto x_data = x.data(); auto y_data = y->mutable_data(x.place()); - if (reduce_num == 1) return; + if (reduce_num == 1) { + auto out_dims = y->dims(); + framework::TensorCopy(x, y->place(), y); + y->Resize(out_dims); + return; + } #define CUB_BLOCK_DIM_CASE(block_dim) \ case block_dim: { \ diff --git a/paddle/fluid/operators/elementwise_op.h b/paddle/fluid/operators/elementwise_op.h index 94df11bee70dec44f19ee9ffff04ca92d5990ee8..7e5975ead64ab39a9c618a33e300c4fce55a5b22 100644 --- a/paddle/fluid/operators/elementwise_op.h +++ b/paddle/fluid/operators/elementwise_op.h @@ -41,7 +41,8 @@ class ElementwiseOp : public framework::OperatorWithKernel { auto y_dim = ctx->GetInputDim("Y"); PADDLE_ENFORCE_GE(x_dim.size(), y_dim.size(), "Rank of first input must >= rank of second input."); - ctx->SetOutputDim("Out", x_dim); + + ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); } @@ -70,6 +71,7 @@ class ElementwiseOpInferVarType : public framework::VarTypeInference { auto& x = block->FindRecursiveOrCreateVar(x_name); auto& out = block->FindRecursiveOrCreateVar(out_name); out.SetType(x.GetType()); + out.SetDataType(x.GetDataType()); } }; @@ -157,10 +159,12 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { auto x_grad_name = framework::GradVarName("X"); auto y_grad_name = framework::GradVarName("Y"); if (ctx->HasOutput(x_grad_name)) { - ctx->SetOutputDim(x_grad_name, x_dims); + ctx->ShareDim("X", /*->*/ x_grad_name); + ctx->ShareLoD("X", /*->*/ x_grad_name); } if (ctx->HasOutput(y_grad_name)) { - ctx->SetOutputDim(y_grad_name, y_dims); + ctx->ShareDim("Y", /*->*/ y_grad_name); + ctx->ShareLoD("Y", /*->*/ y_grad_name); } } @@ -193,14 +197,15 @@ class ElementwiseOpExplicitGrad : public ElementwiseOpGrad { auto x_grad_name = framework::GradVarName("X"); if (ctx->HasOutput(x_grad_name)) { - auto out_dims = ctx->GetInputDim(framework::GradVarName("Out")); - ctx->SetOutputDim(x_grad_name, out_dims); + ctx->ShareDim(framework::GradVarName("Out"), /*->*/ x_grad_name); + ctx->ShareLoD(framework::GradVarName("Out"), /*->*/ x_grad_name); } auto y_grad_name = framework::GradVarName("Y"); if (ctx->HasOutput(y_grad_name)) { PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null"); - auto y_dims = ctx->GetInputDim("Y"); - ctx->SetOutputDim(y_grad_name, y_dims); + + ctx->ShareDim("Y", /*->*/ y_grad_name); + ctx->ShareLoD("Y", /*->*/ y_grad_name); } } }; diff --git a/paddle/fluid/operators/fake_dequantize_op.cc b/paddle/fluid/operators/fake_dequantize_op.cc index 2008e7027524ffd1f80a6eede015801b8a0b0254..5d6488c67e0db440c8d4609736523643dd666dcc 100644 --- a/paddle/fluid/operators/fake_dequantize_op.cc +++ b/paddle/fluid/operators/fake_dequantize_op.cc @@ -48,7 +48,8 @@ class FakeDequantizeMaxAbsOp : public framework::OperatorWithKernel { "Input(X) of FakeDequantizeMaxAbsOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of FakeDequantizeMaxAbsOp should not be null."); - ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + + ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); } }; diff --git a/paddle/fluid/operators/isfinite_op.cc b/paddle/fluid/operators/isfinite_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..248c7793560db99c0af06421bf74808422016061 --- /dev/null +++ b/paddle/fluid/operators/isfinite_op.cc @@ -0,0 +1,113 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/isfinite_op.h" +#include +#include + +namespace paddle { +namespace operators { + +class OverflowOp : public framework::OperatorWithKernel { + public: + OverflowOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInputs("X"), "Inputs(X) should not be null"); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of OverflowOp should not be null."); + + ctx->SetOutputDim("Out", {1}); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + int dtype = -1; + auto *x_var = ctx.InputVar("X"); + if (x_var->IsType()) { + dtype = framework::ToDataType(x_var->Get().type()); + } else if (x_var->IsType()) { + dtype = framework::ToDataType( + x_var->Get().value().type()); + } else { + PADDLE_THROW("Cannot find the input data type by all input data"); + } + return framework::OpKernelType(framework::proto::VarType::Type(dtype), + ctx.GetPlace()); + } +}; + +class OverflowOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "(Tensor) The input tensors of overflow operator."); + AddOutput("Out", + "(Tensor) 1-dim tensor, contains a bool scalar. The output " + "tensor of overflow operator."); + AddComment(string::Sprintf(R"DOC( +Overflow operator. + +$$Out = any(X)$$ + +If any X contains Inf or Nan, the Out will generate a indicator. +Out = Inf if any X contains Inf, +Out = Nan if any X contains Nan, +Out = 0 if no Inf/Nan detected. +If X contains both Inf/Nan, it will return the first indicator it meeted. +)DOC", + GetName(), GetComments())); + } + + protected: + virtual std::string GetName() const = 0; + virtual std::string GetComments() const = 0; +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +#define REGISTER_OP_MAKER(op_type, comment) \ + namespace paddle { \ + namespace operators { \ + class _##op_type##OverflowOpMaker \ + : public ::paddle::operators::OverflowOpMaker { \ + protected: \ + std::string GetName() const { return #op_type; } \ + std::string GetComments() const { return comment; } \ + }; \ + } \ + } \ + REGISTER_OPERATOR(op_type, ops::OverflowOp, \ + ops::_##op_type##OverflowOpMaker, \ + paddle::framework::EmptyGradOpMaker) + +#define REGISTER_OVERFLOW_CPU_KERNEL(op_type, functor) \ + REGISTER_OP_CPU_KERNEL( \ + op_type, ops::OverflowKernel, \ + ops::OverflowKernel, \ + ops::OverflowKernel); + +REGISTER_OP_MAKER(isinf, "isinf(X)"); +REGISTER_OP_MAKER(isnan, "isnan(X)"); +REGISTER_OP_MAKER(isfinite, "isfinite(X)"); +FOR_EACH_KERNEL_FUNCTOR(REGISTER_OVERFLOW_CPU_KERNEL); diff --git a/paddle/fluid/operators/isfinite_op.cu b/paddle/fluid/operators/isfinite_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..8d1268b18c6fec03063051f545075209a6fcde27 --- /dev/null +++ b/paddle/fluid/operators/isfinite_op.cu @@ -0,0 +1,33 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#define EIGEN_USE_GPU +#include "paddle/fluid/operators/isfinite_op.h" +#include "paddle/fluid/platform/float16.h" + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +#define REGISTER_OVERFLOW_CUDA_KERNEL(op_type, functor) \ + REGISTER_OP_CUDA_KERNEL( \ + op_type, ops::OverflowKernel, \ + ops::OverflowKernel, \ + ops::OverflowKernel, \ + ops::OverflowKernel); + +FOR_EACH_KERNEL_FUNCTOR(REGISTER_OVERFLOW_CUDA_KERNEL); diff --git a/paddle/fluid/operators/isfinite_op.h b/paddle/fluid/operators/isfinite_op.h new file mode 100644 index 0000000000000000000000000000000000000000..83b080856366ac3332c5856a19b721893bb80eb3 --- /dev/null +++ b/paddle/fluid/operators/isfinite_op.h @@ -0,0 +1,71 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/platform/float16.h" +#include "paddle/fluid/platform/transform.h" + +namespace paddle { +namespace operators { + +struct InfinityFunctor { + void operator()(const framework::Tensor& tensor, framework::Tensor* out) { + framework::TensorContainsInf(tensor, out); + } +}; + +struct NANFunctor { + void operator()(const framework::Tensor& tensor, framework::Tensor* out) { + framework::TensorContainsNAN(tensor, out); + } +}; + +struct IsfiniteFunctor { + void operator()(const framework::Tensor& tensor, framework::Tensor* out) { + framework::TensorIsfinite(tensor, out); + } +}; + +template +class OverflowKernel : public framework::OpKernel { + public: + virtual void Compute(const framework::ExecutionContext& ctx) const { + auto* x = ctx.InputVar("X"); + auto* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); + Functor functor; + if (x->IsType()) { + auto* in = ctx.Input("X"); + functor(*in, out); + } else if (x->IsType()) { + auto& in = ctx.Input("X")->value(); + functor(in, out); + } else { + PADDLE_THROW("Unsupported input type."); + } + } +}; + +} // namespace operators +} // namespace paddle + +#define FOR_EACH_KERNEL_FUNCTOR(__macro) \ + __macro(isinf, InfinityFunctor); \ + __macro(isnan, NANFunctor); \ + __macro(isfinite, IsfiniteFunctor); diff --git a/paddle/fluid/operators/lookup_table_op.cc b/paddle/fluid/operators/lookup_table_op.cc index d77b095c5d783a2a9fab87eb8b458117a6a3d225..b9ac54e446811889b647397ae1fbb11c28f46777 100644 --- a/paddle/fluid/operators/lookup_table_op.cc +++ b/paddle/fluid/operators/lookup_table_op.cc @@ -137,6 +137,7 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference { << " is set to LoDTensor"; block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR); } + block->Var(out_var_name)->SetDataType(block->Var("W")->GetDataType()); } }; diff --git a/paddle/fluid/operators/prelu_op.cc b/paddle/fluid/operators/prelu_op.cc index e0c4c81bdd5b5d0af3bafe632a2fa033efd08050..58cfbb76e93a1c15c9b7cf9f9e596066c29b7ebb 100644 --- a/paddle/fluid/operators/prelu_op.cc +++ b/paddle/fluid/operators/prelu_op.cc @@ -49,7 +49,7 @@ class PReluOp : public framework::OperatorWithKernel { } else { PADDLE_THROW("Unkown mode %s", mode); } - ctx->SetOutputDim("Out", x_dim); + ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); } diff --git a/paddle/fluid/operators/rnn_memory_helper_op.cc b/paddle/fluid/operators/rnn_memory_helper_op.cc index 13df1d4b4bb6c240610f96ccc8f223fc984d63f7..0fb7776fd9dbf437673820c7cf9411644272626c 100644 --- a/paddle/fluid/operators/rnn_memory_helper_op.cc +++ b/paddle/fluid/operators/rnn_memory_helper_op.cc @@ -54,7 +54,7 @@ class RNNMemoryHelperOpShapeInference : public framework::InferShapeBase { "Input(X) of rnn_memory_helper op should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output of rnn_memory_helper op should not be null."); - ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); } }; diff --git a/paddle/fluid/operators/sequence_conv_op.cc b/paddle/fluid/operators/sequence_conv_op.cc index ec6cb24350ae276724aae339590d40be1e9ea400..95a21a5d3ee6d8037431083edc25d1cddf05dedb 100644 --- a/paddle/fluid/operators/sequence_conv_op.cc +++ b/paddle/fluid/operators/sequence_conv_op.cc @@ -90,8 +90,8 @@ class SequenceConvGradOp : public framework::OperatorWithKernel { ctx->GetInputDim("PaddingData")); } if (ctx->HasOutput(framework::GradVarName("X"))) { - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); - ctx->ShareLoD("X", framework::GradVarName("X")); + ctx->ShareDim("X", /*->*/ framework::GradVarName("X")); + ctx->ShareLoD("X", /*->*/ framework::GradVarName("X")); } if (ctx->HasOutput(framework::GradVarName("Filter"))) { ctx->SetOutputDim(framework::GradVarName("Filter"), diff --git a/paddle/fluid/operators/sequence_pool_op.cc b/paddle/fluid/operators/sequence_pool_op.cc index 5c6fd13d42e43e3502a1cab85a56e019420c708d..15d3f064eb7b025dc9a85b2aabad24186061cbd4 100644 --- a/paddle/fluid/operators/sequence_pool_op.cc +++ b/paddle/fluid/operators/sequence_pool_op.cc @@ -102,8 +102,9 @@ class SequencePoolGradOp : public framework::OperatorWithKernel { for (int64_t i = 1; i < og_dims.size(); ++i) { PADDLE_ENFORCE_EQ(og_dims[i], x_dims[i], "The dimension mismatch."); } - ctx->SetOutputDim(framework::GradVarName("X"), x_dims); - ctx->ShareLoD("X", framework::GradVarName("X")); + + ctx->ShareDim("X", /*->*/ framework::GradVarName("X")); + ctx->ShareLoD("X", /*->*/ framework::GradVarName("X")); } protected: diff --git a/paddle/fluid/operators/sequence_reshape_op.cc b/paddle/fluid/operators/sequence_reshape_op.cc index ef5e6f3210234d59298fcf04c812390643c693d0..31d28d723498892f287246ba228df757d5b9f6c8 100644 --- a/paddle/fluid/operators/sequence_reshape_op.cc +++ b/paddle/fluid/operators/sequence_reshape_op.cc @@ -92,7 +92,7 @@ class SequenceReshapeGradOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of SequenceReshapeGradOp should not be null."); - ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + ctx->ShareDim("X", /*->*/ framework::GradVarName("X")); ctx->ShareLoD("X", /*->*/ framework::GradVarName("X")); } }; diff --git a/paddle/fluid/operators/sequence_softmax_op.cc b/paddle/fluid/operators/sequence_softmax_op.cc index c44f8206eb5079fef969e3e527552512eebd0f1a..ada3e0c8dbba38729c2b9c8b02335327835f2ef4 100644 --- a/paddle/fluid/operators/sequence_softmax_op.cc +++ b/paddle/fluid/operators/sequence_softmax_op.cc @@ -27,7 +27,8 @@ class SequenceSoftmaxOp : public framework::OperatorWithKernel { "Input(X) of SequenceSoftmaxOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of SequenceSoftmaxOp should not be null."); - ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + + ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); } diff --git a/paddle/fluid/operators/shrink_rnn_memory_op.cc b/paddle/fluid/operators/shrink_rnn_memory_op.cc index 29d2fb989754f5621222768a279a1c898ea1c355..e1c74c3a2f89235ba92c396d1a548271bb7d939d 100644 --- a/paddle/fluid/operators/shrink_rnn_memory_op.cc +++ b/paddle/fluid/operators/shrink_rnn_memory_op.cc @@ -151,9 +151,9 @@ class ShrinkRNNMemoryGradInferShape : public framework::InferShapeBase { void operator()(framework::InferShapeContext *context) const override { PADDLE_ENFORCE(context->HasInput("X")); PADDLE_ENFORCE(context->HasOutput(framework::GradVarName("X"))); - context->SetOutputDim(framework::GradVarName("X"), - context->GetInputDim("X")); - context->ShareLoD("X", framework::GradVarName("X")); + + context->ShareDim("X", /*->*/ framework::GradVarName("X")); + context->ShareLoD("X", /*->*/ framework::GradVarName("X")); } }; diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc index c3b0fe32098cb4b41ccc155db58809ef9f1bf46b..193de05422bb78572c0e5eaf4cd46744c3bcb113 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc @@ -40,7 +40,7 @@ class SigmoidCrossEntropyWithLogitsOp : public framework::OperatorWithKernel { "The 2nd dimension of Input(X) and Input(Label) should " "be equal."); - ctx->SetOutputDim("Out", x_dims); + ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); } }; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 295af1c5837d70c32b522cc47c8c3e12d5bd61c7..311cd944603e9bdfefef4daa3a9c690df5b30235 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -620,7 +620,23 @@ All parameter, weight, gradient are variables in Paddle. // -- python binds for parallel executor. py::class_ pe(m, "ParallelExecutor"); - py::class_ exec_strategy(pe, "ExecutionStrategy"); + py::class_ exec_strategy(pe, "ExecutionStrategy", R"DOC( + ExecutionStrategy allows the user to more preciously control how to run + the program in ParallelExecutor by setting the property. + + The available properties include: + use_cuda (bool): Whether to use CUDA or not. Default True. + num_threads (int): The number of threads that used to run the + operators in ParallelExecutor. If it is not set, it will be + set in ParallelExecutor according to the device count. + Default 0. + allow_op_delay (bool): Whether to delay the communication operators + to run. Default False. + num_iteration_per_drop_scope (int): how many iterations between + the two dropping local scopes. Default 100. + + )DOC"); + exec_strategy.def(py::init()) .def_property( "num_threads", @@ -658,7 +674,25 @@ All parameter, weight, gradient are variables in Paddle. : ExecutionStrategy::kDefault; }); - py::class_ build_strategy(pe, "BuildStrategy"); + py::class_ build_strategy(pe, "BuildStrategy", R"DOC( + BuildStrategy allows the user to more preciously control how to + build the SSA Graph in ParallelExecutor by setting the property. + + The available properties include: + reduce_strategy (str): There are two reduce strategies, 'AllReduce' + and 'Reduce'. If you want that all parameters will be optimized + on all devices, you can choose 'AllReduce'; if you choose + 'Reduce', all parameters will be evenly allocated to different + devices for optimization, and then broadcast the optimized + parameter to other devices. Default 'AllReduce'. + gradient_scale_strategy (str): There are two ways of defining loss@grad, + 'CoeffNumDevice' and 'Customized'. By default, ParallelExecutor + sets the loss@grad according to the number of devices. If you want + to customize loss@grad, you can choose 'Customized'. + Default 'CoeffNumDevice'. + debug_graphviz_path (str): Whether to write the SSA Graph to file in the + form of graphviz. It is useful for debugging. Default "". +)DOC"); py::enum_(build_strategy, "ReduceStrategy") .value("Reduce", BuildStrategy::ReduceStrategy::kReduce) diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index b434c9f08e96abe16c1194e69af40392e6e4ca0c..e133323ae420ba68d90215767ab940aed744acd6 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -600,7 +600,7 @@ EOF if [[ ${WITH_GPU} == "ON" ]]; then NCCL_DEPS="apt-get install -y --allow-downgrades libnccl2=2.2.13-1+cuda${CUDA_MAJOR} libnccl-dev=2.2.13-1+cuda${CUDA_MAJOR} || true" else - NCCL_DEPS="" + NCCL_DEPS="true" fi if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]]; then diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 1c5ded943b3814688af1f177503d3bdc35073c3f..0d29f2ad209296688582924ae16e495930830bd4 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -60,7 +60,7 @@ add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python - COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_PYTHON_BUILD_DIR}/lib* ${PADDLE_PYTHON_BUILD_DIR}/lib-python + COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_PYTHON_BUILD_DIR}/lib.* ${PADDLE_PYTHON_BUILD_DIR}/lib-python DEPENDS gen_proto_py copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) set(paddle_python_deps ${PADDLE_PYTHON_BUILD_DIR}/.timestamp ${MKL_DEPENDS}) diff --git a/python/paddle/dataset/flowers.py b/python/paddle/dataset/flowers.py index 0d4e7f1ee46ff97912d010cdb268cc4898d99f58..57c5e83c82d216f55a33e568849d87689f86270f 100644 --- a/python/paddle/dataset/flowers.py +++ b/python/paddle/dataset/flowers.py @@ -35,16 +35,15 @@ import itertools import functools from .common import download import tarfile -import six import scipy.io as scio from paddle.dataset.image import * from paddle.reader import * +from paddle import compat as cpt import os import numpy as np from multiprocessing import cpu_count import six from six.moves import cPickle as pickle -from six.moves import zip __all__ = ['train', 'test', 'valid'] DATA_URL = 'http://paddlemodels.cdn.bcebos.com/flowers/102flowers.tgz' @@ -126,9 +125,11 @@ def reader_creator(data_file, batch = pickle.load(f) else: batch = pickle.load(f, encoding='bytes') + if six.PY3: + batch = cpt.to_text(batch) data = batch['data'] labels = batch['label'] - for sample, label in zip(data, batch['label']): + for sample, label in six.moves.zip(data, batch['label']): yield sample, int(label) - 1 if not cycle: break diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 04e71497aa762e390c4123c0bf3d7f111a772dd4..44b92af7acc012f89b271c74b026d18e1a4075f8 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -24,21 +24,10 @@ from .layer_function_generator import templatedoc import numpy __all__ = [ - 'create_tensor', - 'create_parameter', - 'create_global_var', - 'cast', - 'concat', - 'sums', - 'assign', - 'fill_constant_batch_size_like', - 'fill_constant', - 'argmin', - 'argmax', - 'argsort', - 'ones', - 'zeros', - 'reverse', + 'create_tensor', 'create_parameter', 'create_global_var', 'cast', 'concat', + 'sums', 'assign', 'fill_constant_batch_size_like', 'fill_constant', + 'argmin', 'argmax', 'argsort', 'ones', 'zeros', 'reverse', 'has_inf', + 'has_nan', 'isfinite' ] @@ -652,3 +641,52 @@ def load_combine(out, file_path): inputs={}, output={"Out": out}, args={"file_path": file_path}) + + +def has_inf(x): + """ + Test if any of x contains an infinity number + + Args: + x(variable): The Tensor/LoDTensor to be checked. + + Returns: + Variable: The tensor variable storing the output, only a bool value. + """ + helper = LayerHelper("isinf", **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op(type="isinf", inputs={"X": x}, outputs={"Out": out}) + return out + + +def has_nan(x): + """ + Test if any of x contains a NAN + + Args: + x(variable): The Tensor/LoDTensor to be checked. + + Returns: + Variable: The tensor variable storing the output, only a bool value. + """ + helper = LayerHelper("isnan", **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op(type="isnan", inputs={"X": x}, outputs={"Out": out}) + return out + + +def isfinite(x): + """ + Test if any of x contains an infinity/NAN number. If all the elements are finite, + returns true, else false. + + Args: + x(variable): The Tensor/LoDTensor to be checked. + + Returns: + Variable: The tensor variable storing the output, contains a bool value. + """ + helper = LayerHelper("isfinite", **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op(type="isfinite", inputs={"X": x}, outputs={"Out": out}) + return out diff --git a/python/paddle/fluid/lod_tensor.py b/python/paddle/fluid/lod_tensor.py index a9de09f31f4ed04ba1aa003e85b25fc5a91557e4..b91566fa6fb2449a8becc694b978c30039bf30ed 100644 --- a/python/paddle/fluid/lod_tensor.py +++ b/python/paddle/fluid/lod_tensor.py @@ -74,7 +74,7 @@ def create_lod_tensor(data, recursive_seq_lens, place): assert [ new_recursive_seq_lens ] == recursive_seq_lens, "data and recursive_seq_lens do not match" - flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = np.concatenate(data, axis=0) flattened_data = flattened_data.reshape([len(flattened_data), 1]) return create_lod_tensor(flattened_data, recursive_seq_lens, place) elif isinstance(data, np.ndarray): diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py index 775c2253ab3b27708b745b85fc007fcb504d1aed..6a129b6df9bf1830fdf5eb5cb9ae0c5e4f7bb4ec 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py @@ -16,6 +16,8 @@ from __future__ import print_function import unittest import numpy as np from op_test import OpTest +import paddle.fluid.core as core +from paddle.fluid.op import Operator class ElementwiseMulOp(OpTest): @@ -115,5 +117,56 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): } +class TestElementWiseMulSelectedRows(OpTest): + def setUp(self): + self.rows = [0, 1, 2, 3, 4, 5, 6] + self.feature = 12 + self.height = 100 + self.input_shape = (len(self.rows), self.feature) + + def prepare_input(self, scope, place): + self.input = { + "X": np.random.random(self.input_shape).astype("float32"), + "Y": np.random.random(self.input_shape).astype("float32") + } + + def init_input(in_name): + x_selected_rows = scope.var(in_name).get_selected_rows() + x_selected_rows.set_height(self.height) + x_selected_rows.set_rows(self.rows) + x_array = self.input[in_name] + x_tensor = x_selected_rows.get_tensor() + x_tensor.set(x_array, place) + + init_input("X") + init_input("Y") + + def create_out_selected_row(self, scope): + return scope.var('Out').get_selected_rows() + + def check_result(self, out_selected_rows): + assert out_selected_rows.height() == self.height + assert out_selected_rows.rows() == self.rows + out_tensor = np.array(out_selected_rows.get_tensor()) + assert out_tensor.shape == self.input_shape + + def check_with_place(self, place): + scope = core.Scope() + self.prepare_input(scope, place) + + out_selected_rows = self.create_out_selected_row(scope) + out_selected_rows.set_height(0) + out_selected_rows.set_rows([]) + + elementwise_mul = Operator("elementwise_mul", X='X', Y='Y', Out='Out') + elementwise_mul.run(scope, place) + self.check_result(out_selected_rows) + + def test_elewisemul_with_selected_rows_input(self): + places = [core.CPUPlace()] + for place in places: + self.check_with_place(place) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_isfinite_op.py b/python/paddle/fluid/tests/unittests/test_isfinite_op.py new file mode 100644 index 0000000000000000000000000000000000000000..d96ae15c7288c9a8d585d8d70d2aa8922b8f22b3 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_isfinite_op.py @@ -0,0 +1,97 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from op_test import OpTest + + +class TestInf(OpTest): + def setUp(self): + self.op_type = "isinf" + self.dtype = np.float32 + self.init_dtype() + + x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype) + x[0] = np.inf + x[-1] = np.inf + + self.inputs = {'X': x} + self.outputs = {'Out': np.array(True).astype(self.dtype)} + + def init_dtype(self): + pass + + def test_output(self): + self.check_output() + + +class TestFP16Inf(TestInf): + def init_dtype(self): + self.dtype = np.float16 + + +class TestNAN(OpTest): + def setUp(self): + self.op_type = "isnan" + self.dtype = np.float32 + self.init_dtype() + + x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype) + x[0] = np.nan + x[-1] = np.nan + + self.inputs = {'X': x} + self.outputs = {'Out': np.array(True).astype(self.dtype)} + + def init_dtype(self): + pass + + def test_output(self): + self.check_output() + + +class TestFP16NAN(TestNAN): + def init_dtype(self): + self.dtype = np.float16 + + +class TestIsfinite(OpTest): + def setUp(self): + self.op_type = "isfinite" + self.dtype = np.float32 + self.init_dtype() + + x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype) + x[0] = np.inf + x[-1] = np.nan + out = np.isinf(x) | np.isnan(x) + + self.inputs = {'X': x} + self.outputs = {'Out': np.array(False).astype(self.dtype)} + + def init_dtype(self): + pass + + def test_output(self): + self.check_output() + + +class TestFP16Isfinite(TestIsfinite): + def init_dtype(self): + self.dtype = np.float16 + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_reduce_op.py b/python/paddle/fluid/tests/unittests/test_reduce_op.py index 328f0f0011381b77cccb8b2d9b266aa53b259473..8fc8125a773543eea768783155ad152c475535b5 100644 --- a/python/paddle/fluid/tests/unittests/test_reduce_op.py +++ b/python/paddle/fluid/tests/unittests/test_reduce_op.py @@ -243,5 +243,87 @@ class TestKeepDimReduceSumMultiAxises(OpTest): self.check_grad(['X'], 'Out') +class TestReduceSumWithDimOne(OpTest): + def setUp(self): + self.op_type = "reduce_sum" + self.inputs = {'X': np.random.random((10, 1, 1)).astype("float64")} + self.attrs = {'dim': [1, 2], 'keep_dim': True} + self.outputs = { + 'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim']), + keepdims=True) + } + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestReduceSumWithNumelOne(OpTest): + def setUp(self): + self.op_type = "reduce_sum" + self.inputs = {'X': np.random.random((1, 1)).astype("float64")} + self.attrs = {'dim': [1], 'keep_dim': False} + self.outputs = { + 'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim']), + keepdims=False) + } + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestReduceMeanWithDimOne(OpTest): + def setUp(self): + self.op_type = "reduce_mean" + self.inputs = {'X': np.random.random((10, 1, 1)).astype("float64")} + self.attrs = {'dim': [1], 'keep_dim': False} + self.outputs = { + 'Out': self.inputs['X'].mean( + axis=tuple(self.attrs['dim']), keepdims=False) + } + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestReduceMeanWithNumelOne(OpTest): + def setUp(self): + self.op_type = "reduce_mean" + self.inputs = {'X': np.random.random((1, 1)).astype("float64")} + self.attrs = {'dim': [1], 'keep_dim': True} + self.outputs = { + 'Out': self.inputs['X'].mean( + axis=tuple(self.attrs['dim']), keepdims=True) + } + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + +class TestReduceAll(OpTest): + def setUp(self): + self.op_type = "reduce_sum" + self.inputs = {'X': np.random.random((1, 1, 1)).astype("float64")} + self.attrs = {'reduce_all': True, 'keep_dim': False} + self.outputs = {'Out': self.inputs['X'].sum()} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out') + + if __name__ == '__main__': unittest.main()