diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index c9ba478a099f4f9da93caa7741cfb53763568c12..79c00fd039b34bf2090865d1d4e797c24c3479e1 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -40,6 +40,7 @@ proto_library(async_executor_proto SRCS data_feed.proto) cc_library(ddim SRCS ddim.cc DEPS eigen3 boost enforce) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) +cc_test(unroll_array_ops_test SRCS unroll_array_ops_test.cc) cc_library(data_type SRCS data_type.cc DEPS framework_proto ddim device_context) cc_test(data_type_test SRCS data_type_test.cc DEPS data_type place tensor) if(WITH_GPU) diff --git a/paddle/fluid/framework/array.h b/paddle/fluid/framework/array.h index aa0abc22a6bc9fd32a7dab2096223eef140acef9..b53082986882c80a85826f10d5766525f72c0a97 100644 --- a/paddle/fluid/framework/array.h +++ b/paddle/fluid/framework/array.h @@ -26,11 +26,12 @@ class Array { public: static constexpr size_t kSize = N; - HOSTDEVICE inline Array() = default; + HOSTDEVICE inline Array() {} template HOSTDEVICE inline explicit Array(const T &val, Args... args) { - UnrollVarArgsAssign::Run(data_, val, args...); + static_assert(N == sizeof...(Args) + 1, "Invalid argument"); + UnrollVarArgsAssign::Run(data_, val, args...); } HOSTDEVICE inline void Fill(const T &val) { @@ -41,10 +42,29 @@ class Array { HOSTDEVICE inline T *GetMutable() { return data_; } - HOSTDEVICE inline T &operator[](size_t index) { return data_[index]; } + HOSTDEVICE inline T &operator[](size_t i) { return *advance(data_, i); } - HOSTDEVICE inline const T &operator[](size_t index) const { - return data_[index]; + // Writing "return data_[i]" would cause compilation warning/error: + // "array subscript is above array bound" in Python 35 CI. + // It seems that it is a false warning of GCC if we do not check the bounds + // of array index. But for better performance, we do not check in operator[] + // like what is in STL. If users want to check the bounds, use at() instead + HOSTDEVICE inline const T &operator[](size_t i) const { + return *advance(data_, i); + } + + HOSTDEVICE inline T &at(size_t i) { +#ifndef __CUDA_ARCH__ + PADDLE_ENFORCE_LT(i, N, "Array index out of bounds"); +#endif + return (*this)[i]; + } + + HOSTDEVICE inline const T &at(size_t i) const { +#ifndef __CUDA_ARCH__ + PADDLE_ENFORCE_LT(i, N, "Array index out of bounds"); +#endif + return (*this)[i]; } HOSTDEVICE constexpr size_t size() const { return N; } @@ -58,6 +78,11 @@ class Array { } private: + template + HOSTDEVICE static inline U *advance(U *ptr, size_t i) { + return ptr + i; + } + T data_[N]; }; @@ -66,7 +91,7 @@ class Array { public: static constexpr size_t kSize = 0; - HOSTDEVICE inline Array() = default; + HOSTDEVICE inline Array() {} HOSTDEVICE inline void Fill(const T &val) {} @@ -75,18 +100,28 @@ class Array { // Add constexpr to GetMutable() cause warning in MAC HOSTDEVICE inline T *GetMutable() { return nullptr; } - HOSTDEVICE inline T &operator[](size_t index) { -#ifndef __CUDA_ARCH__ + HOSTDEVICE inline T &operator[](size_t) { +#ifdef __CUDA_ARCH__ + static T obj(); + return obj; +#else PADDLE_THROW("Array has no element"); #endif } - HOSTDEVICE inline const T &operator[](size_t index) const { -#ifndef __CUDA_ARCH__ + HOSTDEVICE inline const T &operator[](size_t) const { +#ifdef __CUDA_ARCH__ + static const T obj(); + return obj; +#else PADDLE_THROW("Array has no element"); #endif } + HOSTDEVICE inline T &at(size_t i) { return (*this)[i]; } + + HOSTDEVICE inline const T &at(size_t i) const { return (*this)[i]; } + HOSTDEVICE constexpr size_t size() const { return 0; } HOSTDEVICE constexpr bool operator==(const Array &other) const { diff --git a/paddle/fluid/framework/ddim.h b/paddle/fluid/framework/ddim.h index 1fd3badbb27ba4e9f4cafa8025061ce08483d273..28cb8171f623fd1a02438b5ffacd485d14ea8875 100644 --- a/paddle/fluid/framework/ddim.h +++ b/paddle/fluid/framework/ddim.h @@ -60,9 +60,7 @@ class DDim { DDim() : rank_(1) { dim_[0] = 0; } - DDim(const DDim& ddim) : dim_(), rank_(ddim.rank_) { - dynamic_dim_assign(ddim.dim_.Get(), dim_.GetMutable(), rank_); - } + DDim(const DDim& ddim) { CopyFrom(ddim); } DDim(const int* d, int n) : rank_(n) { dynamic_dim_assign(d, dim_.GetMutable(), n); @@ -80,10 +78,12 @@ class DDim { /*implicit*/ DDim(std::initializer_list init_list) : DDim(init_list.begin(), init_list.size()) {} + inline DDim& operator=(const DDim& ddim) { return CopyFrom(ddim); } + template - inline DDim& operator=(const Dim& in) { + inline DDim& operator=(const Dim& dim) { rank_ = D; - UnsafeCast() = in; + UnsafeCast() = dim; return *this; } diff --git a/paddle/fluid/framework/unroll_array_ops.h b/paddle/fluid/framework/unroll_array_ops.h index fb0a89530f61ac3ef92298fd62260bec3d964bc6..731da74eff4d22da6730e589a1af919514f1c4b7 100644 --- a/paddle/fluid/framework/unroll_array_ops.h +++ b/paddle/fluid/framework/unroll_array_ops.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include #include "paddle/fluid/platform/hostdevice.h" @@ -52,21 +53,30 @@ struct UnrollAssign { }; template -struct UnrollVarArgsAssign { +struct UnrollVarArgsAssignImpl { template HOSTDEVICE inline static void Run(T *d, T val, Args... args) { static_assert(sizeof...(args) + 1 == kEnd - kStart, "Wrong argument"); d[kStart] = val; - UnrollVarArgsAssign::Run(d, - args...); + UnrollVarArgsAssignImpl::Run( + d, args...); } }; template -struct UnrollVarArgsAssign { +struct UnrollVarArgsAssignImpl { HOSTDEVICE inline static void Run(T *d) {} }; +template +struct UnrollVarArgsAssign { + template + HOSTDEVICE inline static void Run(T *d, Args... args) { + UnrollVarArgsAssignImpl::Run( + d, args...); + } +}; + template struct UnrollCompare { template @@ -150,8 +160,8 @@ using UnrollFillConstant = detail::UnrollFillConstant<0, N, N == 0>; template using UnrollAssign = detail::UnrollAssign<0, N, N == 0>; -template -using UnrollVarArgsAssign = detail::UnrollVarArgsAssign; +template +using UnrollVarArgsAssign = detail::UnrollVarArgsAssign; template using UnrollCompare = detail::UnrollCompare<0, N, N == 0>; diff --git a/paddle/fluid/framework/unroll_array_ops_test.cc b/paddle/fluid/framework/unroll_array_ops_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..51433c83c801765d8df10590abdd319ba60e4873 --- /dev/null +++ b/paddle/fluid/framework/unroll_array_ops_test.cc @@ -0,0 +1,108 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/unroll_array_ops.h" +#include +#include +#include +#include + +namespace paddle { +namespace framework { + +template +bool CheckEquality(const T* p, size_t n, T val) { + return std::all_of(p, p + n, [val](const T& v) { return v == val; }); +} + +template +bool FillConstantTestMain() { + static_assert(D1 >= D2, ""); + std::array arr; + arr.fill(0); + + UnrollFillConstant::Run(arr.data(), 1); + return CheckEquality(arr.data(), D2, 1) && + CheckEquality(arr.data() + D2, arr.size() - D2, 0); +} + +TEST(unroll_ops, fill_constant) { + EXPECT_TRUE((FillConstantTestMain<9, 0>())); + EXPECT_TRUE((FillConstantTestMain<9, 1>())); + EXPECT_TRUE((FillConstantTestMain<9, 4>())); + EXPECT_TRUE((FillConstantTestMain<9, 9>())); +} + +TEST(unroll_ops, assign) { + const int a[] = {1, 2, 3, 4, 5}; + int b[] = {0, 0, 0, 0, 0}; + UnrollAssign<3>::Run(a, b); + EXPECT_EQ(b[0], 1); + EXPECT_EQ(b[1], 2); + EXPECT_EQ(b[2], 3); + EXPECT_EQ(b[3], 0); + EXPECT_EQ(b[4], 0); +} + +TEST(unroll_ops, var_args_assign) { + int a[] = {0, 0, 0}; + UnrollVarArgsAssign::Run(a, 1, 2); + EXPECT_EQ(a[0], 1); + EXPECT_EQ(a[1], 2); + EXPECT_EQ(a[2], 0); +} + +TEST(unroll_ops, compare) { + int a[] = {1, 2, 3}; + int b[] = {1, 2, 4}; + EXPECT_TRUE(UnrollCompare<2>::Run(a, b)); + EXPECT_FALSE(UnrollCompare<3>::Run(a, b)); + + b[0] = -1; + EXPECT_TRUE(UnrollCompare<0>::Run(a, b)); + EXPECT_FALSE(UnrollCompare<1>::Run(a, b)); +} + +TEST(unroll_ops, add) { + int a[] = {2, 3, 4}; + int b[] = {5, 10, 102}; + int c[] = {0, 0, 0}; + UnrollAdd<2>::Run(a, b, c); + EXPECT_EQ(a[0] + b[0], c[0]); + EXPECT_EQ(a[1] + b[1], c[1]); + EXPECT_EQ(c[2], 0); +} + +TEST(unroll_ops, mul) { + int a[] = {2, 3, 4}; + int b[] = {5, 10, 102}; + int c[] = {0, 0, 0}; + UnrollMul<2>::Run(a, b, c); + EXPECT_EQ(a[0] * b[0], c[0]); + EXPECT_EQ(a[1] * b[1], c[1]); + EXPECT_EQ(c[2], 0); +} + +TEST(unroll_ops, product) { + int a[] = {2, 3, 4}; + int b[] = {5, 10, 102}; + + EXPECT_EQ(UnrollProduct<3>::Run(a), a[0] * a[1] * a[2]); + + EXPECT_EQ(UnrollProduct<3>::Run(a, b), + a[0] * b[0] + a[1] * b[1] + a[2] * b[2]); +} + +} // namespace framework +} // namespace paddle