未验证 提交 ac933235 编写于 作者: Z Zhang Ting 提交者: GitHub

[part 5]change type of function args (#38889)

上级 73742d36
...@@ -54,7 +54,7 @@ using InverseDivFunctor = pten::funcs::InverseDivideFunctor<T>; ...@@ -54,7 +54,7 @@ using InverseDivFunctor = pten::funcs::InverseDivideFunctor<T>;
// Floor Divide // Floor Divide
template <typename T> template <typename T>
struct FloorDivFunctor { struct FloorDivFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { inline HOSTDEVICE T operator()(const T a, const T b) const {
PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO); PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
return static_cast<T>(std::trunc(a / b)); return static_cast<T>(std::trunc(a / b));
} }
...@@ -62,7 +62,7 @@ struct FloorDivFunctor { ...@@ -62,7 +62,7 @@ struct FloorDivFunctor {
template <typename T> template <typename T>
struct InverseFloorDivFunctor { struct InverseFloorDivFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { inline HOSTDEVICE T operator()(const T a, const T b) const {
PADDLE_ENFORCE(a != 0, DIV_ERROR_INFO); PADDLE_ENFORCE(a != 0, DIV_ERROR_INFO);
return static_cast<T>(std::trunc(b / a)); return static_cast<T>(std::trunc(b / a));
} }
...@@ -73,7 +73,7 @@ struct InverseFloorDivFunctor { ...@@ -73,7 +73,7 @@ struct InverseFloorDivFunctor {
// Maximum // Maximum
template <typename T> template <typename T>
struct MaxFunctor { struct MaxFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { inline HOSTDEVICE T operator()(const T a, const T b) const {
return a > b ? a : b; return a > b ? a : b;
} }
}; };
...@@ -81,7 +81,7 @@ struct MaxFunctor { ...@@ -81,7 +81,7 @@ struct MaxFunctor {
// Minmum // Minmum
template <typename T> template <typename T>
struct MinFunctor { struct MinFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { inline HOSTDEVICE T operator()(const T a, const T b) const {
return a < b ? a : b; return a < b ? a : b;
} }
}; };
...@@ -119,14 +119,14 @@ struct DivGradXYFunctor<Complex<InT>, Complex<OutT>> { ...@@ -119,14 +119,14 @@ struct DivGradXYFunctor<Complex<InT>, Complex<OutT>> {
// Float div grad // Float div grad
template <typename T> template <typename T>
struct DivGradXFunctor { struct DivGradXFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; } inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; }
}; };
// Complex div grad // Complex div grad
template <typename T> template <typename T>
struct DivGradXFunctor<Complex<T>> { struct DivGradXFunctor<Complex<T>> {
inline HOSTDEVICE Complex<T> operator()(const Complex<T>& a, inline HOSTDEVICE Complex<T> operator()(const Complex<T> a,
const Complex<T>& b) const { const Complex<T> b) const {
Complex<T> b_conj(b.real, -b.imag); Complex<T> b_conj(b.real, -b.imag);
return a / b_conj; return a / b_conj;
} }
...@@ -135,7 +135,7 @@ struct DivGradXFunctor<Complex<T>> { ...@@ -135,7 +135,7 @@ struct DivGradXFunctor<Complex<T>> {
// Float mul and div // Float mul and div
template <typename T> template <typename T>
struct DivGradYFunctor { struct DivGradYFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b, const T& c) const { inline HOSTDEVICE T operator()(const T a, const T b, const T c) const {
return -a * b / c; return -a * b / c;
} }
}; };
...@@ -143,9 +143,9 @@ struct DivGradYFunctor { ...@@ -143,9 +143,9 @@ struct DivGradYFunctor {
// Complex mul and div // Complex mul and div
template <typename T> template <typename T>
struct DivGradYFunctor<Complex<T>> { struct DivGradYFunctor<Complex<T>> {
inline HOSTDEVICE Complex<T> operator()(const Complex<T>& a, inline HOSTDEVICE Complex<T> operator()(const Complex<T> a,
const Complex<T>& b, const Complex<T> b,
const Complex<T>& c) const { const Complex<T> c) const {
Complex<T> out_div_c_conj((b / c).real, -(b / c).imag); Complex<T> out_div_c_conj((b / c).real, -(b / c).imag);
return -a * out_div_c_conj; return -a * out_div_c_conj;
} }
...@@ -154,7 +154,7 @@ struct DivGradYFunctor<Complex<T>> { ...@@ -154,7 +154,7 @@ struct DivGradYFunctor<Complex<T>> {
// Fmax // Fmax
template <typename T> template <typename T>
struct FMaxFunctor { struct FMaxFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { inline HOSTDEVICE T operator()(const T a, const T b) const {
return std::fmax(a, b); return std::fmax(a, b);
} }
}; };
...@@ -162,8 +162,8 @@ struct FMaxFunctor { ...@@ -162,8 +162,8 @@ struct FMaxFunctor {
template <> template <>
struct FMaxFunctor<paddle::platform::float16> { struct FMaxFunctor<paddle::platform::float16> {
inline HOSTDEVICE paddle::platform::float16 operator()( inline HOSTDEVICE paddle::platform::float16 operator()(
const paddle::platform::float16& a, const paddle::platform::float16 a,
const paddle::platform::float16& b) const { const paddle::platform::float16 b) const {
float float_a = static_cast<float>(a); float float_a = static_cast<float>(a);
float float_b = static_cast<float>(b); float float_b = static_cast<float>(b);
auto result = std::fmax(float_a, float_b); auto result = std::fmax(float_a, float_b);
...@@ -173,7 +173,7 @@ struct FMaxFunctor<paddle::platform::float16> { ...@@ -173,7 +173,7 @@ struct FMaxFunctor<paddle::platform::float16> {
template <> template <>
struct FMaxFunctor<int> { struct FMaxFunctor<int> {
inline HOSTDEVICE int operator()(const int& a, const int& b) const { inline HOSTDEVICE int operator()(const int a, const int b) const {
float float_a = static_cast<float>(a); float float_a = static_cast<float>(a);
float float_b = static_cast<float>(b); float float_b = static_cast<float>(b);
auto result = std::fmax(float_a, float_b); auto result = std::fmax(float_a, float_b);
...@@ -183,8 +183,7 @@ struct FMaxFunctor<int> { ...@@ -183,8 +183,7 @@ struct FMaxFunctor<int> {
template <> template <>
struct FMaxFunctor<int64_t> { struct FMaxFunctor<int64_t> {
inline HOSTDEVICE int64_t operator()(const int64_t& a, inline HOSTDEVICE int64_t operator()(const int64_t a, const int64_t b) const {
const int64_t& b) const {
double double_a = static_cast<double>(a); double double_a = static_cast<double>(a);
double double_b = static_cast<double>(b); double double_b = static_cast<double>(b);
auto result = std::fmax(double_a, double_b); auto result = std::fmax(double_a, double_b);
...@@ -195,7 +194,7 @@ struct FMaxFunctor<int64_t> { ...@@ -195,7 +194,7 @@ struct FMaxFunctor<int64_t> {
// Fmin // Fmin
template <typename T> template <typename T>
struct FMinFunctor { struct FMinFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { inline HOSTDEVICE T operator()(const T a, const T b) const {
return std::fmin(a, b); return std::fmin(a, b);
} }
}; };
...@@ -203,8 +202,8 @@ struct FMinFunctor { ...@@ -203,8 +202,8 @@ struct FMinFunctor {
template <> template <>
struct FMinFunctor<paddle::platform::float16> { struct FMinFunctor<paddle::platform::float16> {
inline HOSTDEVICE paddle::platform::float16 operator()( inline HOSTDEVICE paddle::platform::float16 operator()(
const paddle::platform::float16& a, const paddle::platform::float16 a,
const paddle::platform::float16& b) const { const paddle::platform::float16 b) const {
float float_a = static_cast<float>(a); float float_a = static_cast<float>(a);
float float_b = static_cast<float>(b); float float_b = static_cast<float>(b);
auto result = std::fmin(float_a, float_b); auto result = std::fmin(float_a, float_b);
...@@ -214,7 +213,7 @@ struct FMinFunctor<paddle::platform::float16> { ...@@ -214,7 +213,7 @@ struct FMinFunctor<paddle::platform::float16> {
template <> template <>
struct FMinFunctor<int> { struct FMinFunctor<int> {
inline HOSTDEVICE int operator()(const int& a, const int& b) const { inline HOSTDEVICE int operator()(const int a, const int b) const {
float float_a = static_cast<float>(a); float float_a = static_cast<float>(a);
float float_b = static_cast<float>(b); float float_b = static_cast<float>(b);
auto result = std::fmin(float_a, float_b); auto result = std::fmin(float_a, float_b);
...@@ -224,8 +223,7 @@ struct FMinFunctor<int> { ...@@ -224,8 +223,7 @@ struct FMinFunctor<int> {
template <> template <>
struct FMinFunctor<int64_t> { struct FMinFunctor<int64_t> {
inline HOSTDEVICE int64_t operator()(const int64_t& a, inline HOSTDEVICE int64_t operator()(const int64_t a, const int64_t b) const {
const int64_t& b) const {
double double_a = static_cast<double>(a); double double_a = static_cast<double>(a);
double double_b = static_cast<double>(b); double double_b = static_cast<double>(b);
auto result = std::fmin(double_a, double_b); auto result = std::fmin(double_a, double_b);
...@@ -261,12 +259,12 @@ struct MinGradXYFunctor { ...@@ -261,12 +259,12 @@ struct MinGradXYFunctor {
template <typename T> template <typename T>
struct MulGradFunctor { struct MulGradFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; } inline HOSTDEVICE T operator()(const T a, const T b) const { return a * b; }
}; };
template <typename T> template <typename T>
struct MulGradFunctor<Complex<T>> { struct MulGradFunctor<Complex<T>> {
inline HOSTDEVICE Complex<T> operator()(const Complex<T>& a, inline HOSTDEVICE Complex<T> operator()(const Complex<T> a,
const Complex<T>& b) const { const Complex<T> b) const {
Complex<T> b_conj(b.real, -b.imag); Complex<T> b_conj(b.real, -b.imag);
return a * b_conj; return a * b_conj;
} }
...@@ -274,9 +272,9 @@ struct MulGradFunctor<Complex<T>> { ...@@ -274,9 +272,9 @@ struct MulGradFunctor<Complex<T>> {
template <typename InT, typename OutT> template <typename InT, typename OutT>
struct MulGradXYFunctor { struct MulGradXYFunctor {
inline HOSTDEVICE paddle::framework::Array<OutT, 2> operator()(const InT& a, inline HOSTDEVICE paddle::framework::Array<OutT, 2> operator()(const InT a,
const InT& b, const InT b,
const InT& c) { const InT c) {
paddle::framework::Array<OutT, 2> outs; paddle::framework::Array<OutT, 2> outs;
// dx = dout * y // dx = dout * y
outs[0] = a * b; outs[0] = a * b;
...@@ -289,7 +287,7 @@ struct MulGradXYFunctor { ...@@ -289,7 +287,7 @@ struct MulGradXYFunctor {
template <typename InT, typename OutT> template <typename InT, typename OutT>
struct MulGradXYFunctor<Complex<InT>, Complex<OutT>> { struct MulGradXYFunctor<Complex<InT>, Complex<OutT>> {
inline HOSTDEVICE paddle::framework::Array<Complex<OutT>, 2> operator()( inline HOSTDEVICE paddle::framework::Array<Complex<OutT>, 2> operator()(
const Complex<InT>& a, const Complex<InT>& b, const Complex<InT>& c) { const Complex<InT> a, const Complex<InT> b, const Complex<InT> c) {
paddle::framework::Array<Complex<OutT>, 2> outs; paddle::framework::Array<Complex<OutT>, 2> outs;
// dx = dout * y // dx = dout * y
Complex<InT> b_conj(b.real, -b.imag); Complex<InT> b_conj(b.real, -b.imag);
......
...@@ -20,31 +20,6 @@ namespace plat = paddle::platform; ...@@ -20,31 +20,6 @@ namespace plat = paddle::platform;
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T, typename Enable = void>
struct CudaModFunctor {
inline HOSTDEVICE T operator()(const T* args) const {
T res = args[0] % args[1];
// Accoding to #PR26732: in dividen % divsor
// remainder shall have the same sign as divsor.
if ((res != 0) && ((args[1] ^ res) < 0)) res += args[1];
return res;
}
};
template <typename T>
struct CudaModFunctor<
T, typename std::enable_if_t<std::is_floating_point<T>::value>> {
inline HOSTDEVICE T operator()(const T* args) const {
T res = fmod(args[0], args[1]);
// Accoding to #PR26732: in dividen % divsor
// remainder shall have the same sign as divsor.
if ((res != 0) && ((res < 0) != (args[1] < 0))) res += args[1];
return res;
}
};
template <typename T> template <typename T>
class ElementwiseModKernel<platform::CUDADeviceContext, T> class ElementwiseModKernel<platform::CUDADeviceContext, T>
: public framework::OpKernel<T> { : public framework::OpKernel<T> {
...@@ -56,7 +31,7 @@ class ElementwiseModKernel<platform::CUDADeviceContext, T> ...@@ -56,7 +31,7 @@ class ElementwiseModKernel<platform::CUDADeviceContext, T>
ctx.template device_context<platform::CUDADeviceContext>(); ctx.template device_context<platform::CUDADeviceContext>();
int axis = PackTensorsIntoVector<T>(ctx, &ins, &outs); int axis = PackTensorsIntoVector<T>(ctx, &ins, &outs);
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>( LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>(
cuda_ctx, ins, &outs, axis, CudaModFunctor<T>()); cuda_ctx, ins, &outs, axis, ModFunctor<T>());
} }
}; };
......
...@@ -19,29 +19,36 @@ limitations under the License. */ ...@@ -19,29 +19,36 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T, typename Enable = void>
struct ModFunctor { struct ModFunctor {
inline HOSTDEVICE T operator()(T a, T b) const { inline HOSTDEVICE T operator()(const T a, const T b) const {
T res = a % b; T res = a % b;
if ((res != 0) && ((res < 0) != (b < 0))) res += b;
// Accoding to #PR26732: in dividen % divsor
// remainder shall have the same sign as divsor.
if ((res != 0) && ((b ^ res) < 0)) res += b;
return res; return res;
} }
}; };
template <typename T> template <typename T>
struct InverseModFunctor { struct ModFunctor<T,
inline HOSTDEVICE T operator()(T a, T b) const { typename std::enable_if_t<std::is_floating_point<T>::value>> {
T res = b % a; inline HOSTDEVICE T operator()(const T a, const T b) const {
if ((res != 0) && ((res < 0) != (a < 0))) res += a; T res = fmod(a, b);
// Accoding to #PR26732: in dividen % divsor
// remainder shall have the same sign as divsor.
if ((res != 0) && ((res < 0) != (b < 0))) res += b;
return res; return res;
} }
}; };
template <typename T> template <typename T>
struct ModFunctorFP { struct InverseModFunctor {
inline HOSTDEVICE T operator()(T a, T b) const { inline HOSTDEVICE T operator()(T a, T b) const {
T res = fmod(a, b); T res = b % a;
if ((res != 0) && ((b < 0) != (res < 0))) res += b; if ((res != 0) && ((res < 0) != (a < 0))) res += a;
return res; return res;
} }
}; };
...@@ -79,8 +86,8 @@ void elementwise_mod_fp(const framework::ExecutionContext &ctx, ...@@ -79,8 +86,8 @@ void elementwise_mod_fp(const framework::ExecutionContext &ctx,
auto x_dims = x->dims(); auto x_dims = x->dims();
auto y_dims = y->dims(); auto y_dims = y->dims();
if (x_dims.size() >= y_dims.size()) { if (x_dims.size() >= y_dims.size()) {
ElementwiseComputeEx<ModFunctorFP<T>, DeviceContext, T>( ElementwiseComputeEx<ModFunctor<T>, DeviceContext, T>(ctx, x, y, axis,
ctx, x, y, axis, ModFunctorFP<T>(), z); ModFunctor<T>(), z);
} else { } else {
ElementwiseComputeEx<InverseModFunctorFP<T>, DeviceContext, T>( ElementwiseComputeEx<InverseModFunctorFP<T>, DeviceContext, T>(
ctx, x, y, axis, InverseModFunctorFP<T>(), z); ctx, x, y, axis, InverseModFunctorFP<T>(), z);
......
...@@ -16,26 +16,6 @@ namespace ops = paddle::operators; ...@@ -16,26 +16,6 @@ namespace ops = paddle::operators;
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T, typename Enable = void>
struct CudaPowFunctor {
inline HOSTDEVICE T operator()(const T args[]) const {
return std::pow(args[0], args[1]);
}
};
template <typename T>
struct CudaPowFunctor<
T, typename std::enable_if<std::is_integral<T>::value>::type> {
// On CUDAPlace, std::pow(3, 1) calls pow(float, float), and
// it will return a float number like 2.99... , which floor to 2
// when cast to int by default and it is wrong.
// Use llrint to cast it to the nearest integer, which is 3.
inline HOSTDEVICE T operator()(const T args[]) const {
return std::llrint(
std::pow(static_cast<double>(args[0]), static_cast<double>(args[1])));
}
};
template <typename T> template <typename T>
class ElementwisePowKernel<platform::CUDADeviceContext, T> class ElementwisePowKernel<platform::CUDADeviceContext, T>
: public framework::OpKernel<T> { : public framework::OpKernel<T> {
...@@ -48,7 +28,7 @@ class ElementwisePowKernel<platform::CUDADeviceContext, T> ...@@ -48,7 +28,7 @@ class ElementwisePowKernel<platform::CUDADeviceContext, T>
int axis = PackTensorsIntoVector<T>(ctx, &ins, &outs); int axis = PackTensorsIntoVector<T>(ctx, &ins, &outs);
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>( LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>(
cuda_ctx, ins, &outs, axis, CudaPowFunctor<T>()); cuda_ctx, ins, &outs, axis, PowFunctor<T>());
} }
}; };
......
...@@ -26,31 +26,31 @@ namespace funcs { ...@@ -26,31 +26,31 @@ namespace funcs {
// Add // Add
template <typename T> template <typename T>
struct AddFunctor { struct AddFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a + b; } inline HOSTDEVICE T operator()(const T a, const T b) const { return a + b; }
}; };
template <typename T> template <typename T>
struct InverseAddFunctor { struct InverseAddFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b + a; } inline HOSTDEVICE T operator()(const T a, const T b) const { return b + a; }
}; };
// Subtract // Subtract
template <typename T> template <typename T>
struct SubtractFunctor { struct SubtractFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a - b; } inline HOSTDEVICE T operator()(const T a, const T b) const { return a - b; }
}; };
template <typename T> template <typename T>
struct InverseSubtractFunctor { struct InverseSubtractFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b - a; } inline HOSTDEVICE T operator()(const T a, const T b) const { return b - a; }
}; };
// Multiply // Multiply
template <typename T> template <typename T>
struct MultiplyFunctor { struct MultiplyFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; } inline HOSTDEVICE T operator()(const T a, const T b) const { return a * b; }
}; };
template <typename T> template <typename T>
struct InverseMultiplyFunctor { struct InverseMultiplyFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b * a; } inline HOSTDEVICE T operator()(const T a, const T b) const { return b * a; }
}; };
// Divide // Divide
...@@ -60,14 +60,14 @@ struct InverseMultiplyFunctor { ...@@ -60,14 +60,14 @@ struct InverseMultiplyFunctor {
template <typename T, typename Enable = void> template <typename T, typename Enable = void>
struct DivideFunctor { struct DivideFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; } inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; }
}; };
template <typename T> template <typename T>
struct DivideFunctor< struct DivideFunctor<
T, T,
typename std::enable_if<std::is_integral<T>::value>::type> { typename std::enable_if<std::is_integral<T>::value>::type> {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { inline HOSTDEVICE T operator()(const T a, const T b) const {
// For int32/int64, need to check whether the divison is zero. // For int32/int64, need to check whether the divison is zero.
PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO); PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
return a / b; return a / b;
...@@ -76,7 +76,7 @@ struct DivideFunctor< ...@@ -76,7 +76,7 @@ struct DivideFunctor<
template <typename T, typename Enable = void> template <typename T, typename Enable = void>
struct InverseDivideFunctor { struct InverseDivideFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b / a; } inline HOSTDEVICE T operator()(const T a, const T b) const { return b / a; }
}; };
} // namespace funcs } // namespace funcs
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册