diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 8bfa41715ffa82c8f6fb321e1a562649d429672f..6788cb34fbaf5941cbb1537c7a83577c623bf76a 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -5,10 +5,18 @@ cc_library(ddim SRCS ddim.cc DEPS eigen3) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) -cc_library(tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context framework_proto) +if (WITH_GPU) + nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS ddim place paddle_memory device_context framework_proto) +else() + cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS ddim place paddle_memory device_context framework_proto) +endif () cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) -cc_test(tensor_util_test SRCS tensor_util_test.cc DEPS tensor) +if (WITH_GPU) + nv_test(tensor_util_test SRCS tensor_util_test.cc tensor_util_test.cu DEPS tensor) +else() + cc_test(tensor_util_test SRCS tensor_util_test.cc DEPS tensor) +endif() cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) diff --git a/paddle/framework/tensor_util.cc b/paddle/framework/tensor_util.cc new file mode 100644 index 0000000000000000000000000000000000000000..7efc649d0bcda67c663d148e83bcbb6789b0f371 --- /dev/null +++ b/paddle/framework/tensor_util.cc @@ -0,0 +1,119 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/tensor_util.h" + +namespace paddle { +namespace framework { +template +struct AnyDTypeVisitor { + Predicate predicate_; + const Tensor& tensor_; + const DevCtx& ctx_; + Tensor* out_; + + AnyDTypeVisitor(Predicate predicate, const Tensor& tensor, const DevCtx& ctx, + Tensor* out) + : predicate_(predicate), tensor_(tensor), ctx_(ctx), out_(out) {} + + template + void operator()() const { + auto t = EigenVector::Flatten(tensor_); + auto o = EigenScalar::From(*out_); + // return any of predicate_(t) is true. + o.device(*ctx_.eigen_device()) = predicate_(t).any(); + } +}; + +template +inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor, + const DevCtx& ctx, framework::Tensor* out) { + VisitDataType(ToDataType(tensor.type()), AnyDTypeVisitor( + predicate, tensor, ctx, out)); +} + +template +struct AnyVisitor : public boost::static_visitor { + const framework::Tensor& tensor_; + Predicate predicate_; + + AnyVisitor(const framework::Tensor& tensor, Predicate predicate) + : tensor_(tensor), predicate_(std::move(predicate)) {} + + template + bool operator()(const Place& place) const { + framework::Tensor out; + out.Resize({1}); + out.mutable_data(place); + auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place); + AnyImpl(predicate_, tensor_, *ctx, &out); + return this->GetResult(out, place); + } + + bool GetResult(const framework::Tensor& out, + const platform::CUDAPlace& gpu) const { + platform::CPUPlace cpu; + framework::Tensor tmp; + tmp.Resize({1}); + tmp.mutable_data(cpu); + auto gpuctx = platform::DeviceContextPool::Instance().Get(gpu); + gpuctx->Wait(); + CopyFrom(out, cpu, *gpuctx, &tmp); + gpuctx->Wait(); + return GetResult(tmp, cpu); + } + + bool GetResult(const framework::Tensor& out, + const platform::CPUPlace& cpu) const { + return *out.data(); + } +}; + +template +inline bool Any(const framework::Tensor& tensor, Predicate predicate) { + AnyVisitor visitor(tensor, predicate); + auto place = tensor.place(); + return platform::VisitPlace(place, visitor); +} + +struct HasNANPredicate { + template + auto operator()(const T& eigen_vec) const + -> decltype(std::declval().isnan()) { + // Cast eigen_vector to vector of bool. true if is inf. + return eigen_vec.isnan(); + } +}; + +bool HasNAN(const framework::Tensor& tensor) { + HasNANPredicate predicate; + return Any(tensor, predicate); +} + +struct HasInfPredicate { + template + auto operator()(const T& eigen_vec) const + -> decltype(std::declval().isinf()) { + // Cast eigen_vector to vector of bool. true if is inf. + return eigen_vec.isinf(); + } +}; + +bool HasInf(const framework::Tensor& tensor) { + HasInfPredicate predicate; + return Any(tensor, predicate); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/tensor_util.cu b/paddle/framework/tensor_util.cu new file mode 120000 index 0000000000000000000000000000000000000000..b00e6e59d93328bf3142597ea4de0dc225501e56 --- /dev/null +++ b/paddle/framework/tensor_util.cu @@ -0,0 +1 @@ +./tensor_util.cc \ No newline at end of file diff --git a/paddle/framework/tensor_util.h b/paddle/framework/tensor_util.h index 108006911a5f30691a01c6579a62c8111b653986..a86fab29250fd4df45a3ddd075e1f4c3a332fb42 100644 --- a/paddle/framework/tensor_util.h +++ b/paddle/framework/tensor_util.h @@ -14,8 +14,10 @@ limitations under the License. */ #pragma once #include "paddle/framework/data_type.h" +#include "paddle/framework/eigen.h" #include "paddle/framework/framework.pb.h" #include "paddle/framework/tensor.h" +#include "paddle/platform/device_context.h" namespace paddle { namespace framework { @@ -207,6 +209,12 @@ inline void CopyToVector(const Tensor& src, std::vector* dst) { src_ptr, size); } +// Returns true if a tensor contains NAN, i.e., Not A Number. +extern bool HasNAN(const framework::Tensor& tensor); + +// Returns true if a tensor contains Inf, i.e., Infinity. +extern bool HasInf(const framework::Tensor& tensor); + inline void SerializeToStream(std::ostream& os, const Tensor& tensor, const platform::DeviceContext& dev_ctx) { // TODO(typhoonzero): serialize to ostream diff --git a/paddle/framework/tensor_util_test.cc b/paddle/framework/tensor_util_test.cc index 1281e9c2a45858810d505aca6b2a2587e7a7280c..f00ce795488bd4279a6ad9e537f32d152e6131e4 100644 --- a/paddle/framework/tensor_util_test.cc +++ b/paddle/framework/tensor_util_test.cc @@ -13,6 +13,7 @@ #include "paddle/framework/tensor_util.h" #include +#include #include namespace paddle { @@ -230,6 +231,29 @@ TEST(CopyToVector, Tensor) { #endif } +TEST(IsNAN, CPU) { + using namespace paddle::framework; + using namespace paddle::platform; + Tensor src; + float* buf = src.mutable_data({3}, CPUPlace()); + buf[0] = 0.0; + buf[1] = NAN; + buf[2] = 0.0; + + ASSERT_TRUE(HasNAN(src)); +} + +TEST(IsInf, CPU) { + using namespace paddle::framework; + using namespace paddle::platform; + Tensor src; + double* buf = src.mutable_data({3}, CPUPlace()); + buf[0] = 1.0; + buf[1] = INFINITY; + buf[2] = 0.0; + ASSERT_TRUE(HasInf(src)); +} + TEST(Tensor, SerializeAndDeserialize) { framework::Tensor src_tensor; int array[6] = {1, 2, 3, 4, 5, 6}; diff --git a/paddle/framework/tensor_util_test.cu b/paddle/framework/tensor_util_test.cu new file mode 100644 index 0000000000000000000000000000000000000000..ebd35fdf6c2a1388fec23057070f723c8ef9da9c --- /dev/null +++ b/paddle/framework/tensor_util_test.cu @@ -0,0 +1,57 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "gtest/gtest.h" +#include "paddle/framework/tensor_util.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/place.h" + +namespace paddle { +namespace framework { + +static __global__ void FillNAN(float* buf) { + buf[0] = 0.0; + buf[1] = 0.1; + buf[2] = NAN; +} +static __global__ void FillInf(float* buf) { + buf[0] = 0.0; + buf[1] = INFINITY; + buf[2] = 0.5; +} + +TEST(HasNAN, GPU) { + Tensor tensor; + platform::CUDAPlace gpu(0); + auto& pool = platform::DeviceContextPool::Instance(); + auto* cuda_ctx = pool.GetByPlace(gpu); + float* buf = tensor.mutable_data({3}, gpu); + FillNAN<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + ASSERT_TRUE(HasNAN(tensor)); +} + +TEST(HasInf, GPU) { + Tensor tensor; + platform::CUDAPlace gpu(0); + auto& pool = platform::DeviceContextPool::Instance(); + auto* cuda_ctx = pool.GetByPlace(gpu); + float* buf = tensor.mutable_data({3}, gpu); + FillInf<<<1, 1, 0, cuda_ctx->stream()>>>(buf); + cuda_ctx->Wait(); + ASSERT_TRUE(HasInf(tensor)); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/platform/device_context.h b/paddle/platform/device_context.h index dfef2c16d8f2277d57cbcfe51d108402e518799b..2b366e6383d23e2d31a194edd04412892a8311eb 100644 --- a/paddle/platform/device_context.h +++ b/paddle/platform/device_context.h @@ -52,6 +52,14 @@ class CPUDeviceContext : public DeviceContext { std::unique_ptr eigen_device_; }; +template +struct DefaultDeviceContextType; + +template <> +struct DefaultDeviceContextType { + using TYPE = CPUDeviceContext; +}; + #ifdef PADDLE_WITH_CUDA class EigenCudaStreamDevice; @@ -90,6 +98,11 @@ class CUDADeviceContext : public DeviceContext { cublasHandle_t cublas_handle_; }; +template <> +struct DefaultDeviceContextType { + using TYPE = CUDADeviceContext; +}; + class CUDNNDeviceContext : public CUDADeviceContext { public: explicit CUDNNDeviceContext(CUDAPlace place); @@ -125,6 +138,13 @@ class DeviceContextPool { /*! \brief Return handle of single device context. */ const platform::DeviceContext* Get(const platform::Place& place); + template + const typename DefaultDeviceContextType::TYPE* GetByPlace( + const Place& place) { + return reinterpret_cast< + const typename DefaultDeviceContextType::TYPE*>(Get(place)); + } + private: static DeviceContextPool* pool; constexpr static int LEFT_SHIFT = 8; diff --git a/paddle/platform/place.h b/paddle/platform/place.h index d25eaa689f4a4baa951db5c61bbf99288e365ee1..76b5c502cc48431a4e9b13b07505978884576e1d 100644 --- a/paddle/platform/place.h +++ b/paddle/platform/place.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include - +#include "paddle/platform/enforce.h" #include "paddle/platform/variant.h" namespace paddle { @@ -64,5 +64,31 @@ bool places_are_same_class(const Place &, const Place &); std::ostream &operator<<(std::ostream &, const Place &); +template +struct PlaceVisitorWrapper + : public boost::static_visitor { + const Visitor &visitor_; + explicit PlaceVisitorWrapper(const Visitor &visitor) : visitor_(visitor) {} + + typename Visitor::result_type operator()(const CPUPlace &cpu) const { + return visitor_(cpu); + } + + typename Visitor::result_type operator()(const CUDAPlace &cuda) const { +#ifdef PADDLE_WITH_CUDA + return visitor_(cuda); +#else + PADDLE_THROW("Paddle is not compiled with CUDA. Cannot visit cuda device"); + return typename Visitor::result_type(); +#endif + } +}; + +template +typename Visitor::result_type VisitPlace(const Place &place, + const Visitor &visitor) { + return boost::apply_visitor(PlaceVisitorWrapper(visitor), place); +} + } // namespace platform } // namespace paddle