未验证 提交 ac933235 编写于 作者: Z Zhang Ting 提交者: GitHub

[part 5]change type of function args (#38889)

上级 73742d36
......@@ -54,7 +54,7 @@ using InverseDivFunctor = pten::funcs::InverseDivideFunctor<T>;
// Floor Divide
template <typename T>
struct FloorDivFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
return static_cast<T>(std::trunc(a / b));
}
......@@ -62,7 +62,7 @@ struct FloorDivFunctor {
template <typename T>
struct InverseFloorDivFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
PADDLE_ENFORCE(a != 0, DIV_ERROR_INFO);
return static_cast<T>(std::trunc(b / a));
}
......@@ -73,7 +73,7 @@ struct InverseFloorDivFunctor {
// Maximum
template <typename T>
struct MaxFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
return a > b ? a : b;
}
};
......@@ -81,7 +81,7 @@ struct MaxFunctor {
// Minmum
template <typename T>
struct MinFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
return a < b ? a : b;
}
};
......@@ -119,14 +119,14 @@ struct DivGradXYFunctor<Complex<InT>, Complex<OutT>> {
// Float div grad
template <typename T>
struct DivGradXFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; }
};
// Complex div grad
template <typename T>
struct DivGradXFunctor<Complex<T>> {
inline HOSTDEVICE Complex<T> operator()(const Complex<T>& a,
const Complex<T>& b) const {
inline HOSTDEVICE Complex<T> operator()(const Complex<T> a,
const Complex<T> b) const {
Complex<T> b_conj(b.real, -b.imag);
return a / b_conj;
}
......@@ -135,7 +135,7 @@ struct DivGradXFunctor<Complex<T>> {
// Float mul and div
template <typename T>
struct DivGradYFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b, const T& c) const {
inline HOSTDEVICE T operator()(const T a, const T b, const T c) const {
return -a * b / c;
}
};
......@@ -143,9 +143,9 @@ struct DivGradYFunctor {
// Complex mul and div
template <typename T>
struct DivGradYFunctor<Complex<T>> {
inline HOSTDEVICE Complex<T> operator()(const Complex<T>& a,
const Complex<T>& b,
const Complex<T>& c) const {
inline HOSTDEVICE Complex<T> operator()(const Complex<T> a,
const Complex<T> b,
const Complex<T> c) const {
Complex<T> out_div_c_conj((b / c).real, -(b / c).imag);
return -a * out_div_c_conj;
}
......@@ -154,7 +154,7 @@ struct DivGradYFunctor<Complex<T>> {
// Fmax
template <typename T>
struct FMaxFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
return std::fmax(a, b);
}
};
......@@ -162,8 +162,8 @@ struct FMaxFunctor {
template <>
struct FMaxFunctor<paddle::platform::float16> {
inline HOSTDEVICE paddle::platform::float16 operator()(
const paddle::platform::float16& a,
const paddle::platform::float16& b) const {
const paddle::platform::float16 a,
const paddle::platform::float16 b) const {
float float_a = static_cast<float>(a);
float float_b = static_cast<float>(b);
auto result = std::fmax(float_a, float_b);
......@@ -173,7 +173,7 @@ struct FMaxFunctor<paddle::platform::float16> {
template <>
struct FMaxFunctor<int> {
inline HOSTDEVICE int operator()(const int& a, const int& b) const {
inline HOSTDEVICE int operator()(const int a, const int b) const {
float float_a = static_cast<float>(a);
float float_b = static_cast<float>(b);
auto result = std::fmax(float_a, float_b);
......@@ -183,8 +183,7 @@ struct FMaxFunctor<int> {
template <>
struct FMaxFunctor<int64_t> {
inline HOSTDEVICE int64_t operator()(const int64_t& a,
const int64_t& b) const {
inline HOSTDEVICE int64_t operator()(const int64_t a, const int64_t b) const {
double double_a = static_cast<double>(a);
double double_b = static_cast<double>(b);
auto result = std::fmax(double_a, double_b);
......@@ -195,7 +194,7 @@ struct FMaxFunctor<int64_t> {
// Fmin
template <typename T>
struct FMinFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
return std::fmin(a, b);
}
};
......@@ -203,8 +202,8 @@ struct FMinFunctor {
template <>
struct FMinFunctor<paddle::platform::float16> {
inline HOSTDEVICE paddle::platform::float16 operator()(
const paddle::platform::float16& a,
const paddle::platform::float16& b) const {
const paddle::platform::float16 a,
const paddle::platform::float16 b) const {
float float_a = static_cast<float>(a);
float float_b = static_cast<float>(b);
auto result = std::fmin(float_a, float_b);
......@@ -214,7 +213,7 @@ struct FMinFunctor<paddle::platform::float16> {
template <>
struct FMinFunctor<int> {
inline HOSTDEVICE int operator()(const int& a, const int& b) const {
inline HOSTDEVICE int operator()(const int a, const int b) const {
float float_a = static_cast<float>(a);
float float_b = static_cast<float>(b);
auto result = std::fmin(float_a, float_b);
......@@ -224,8 +223,7 @@ struct FMinFunctor<int> {
template <>
struct FMinFunctor<int64_t> {
inline HOSTDEVICE int64_t operator()(const int64_t& a,
const int64_t& b) const {
inline HOSTDEVICE int64_t operator()(const int64_t a, const int64_t b) const {
double double_a = static_cast<double>(a);
double double_b = static_cast<double>(b);
auto result = std::fmin(double_a, double_b);
......@@ -261,12 +259,12 @@ struct MinGradXYFunctor {
template <typename T>
struct MulGradFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return a * b; }
};
template <typename T>
struct MulGradFunctor<Complex<T>> {
inline HOSTDEVICE Complex<T> operator()(const Complex<T>& a,
const Complex<T>& b) const {
inline HOSTDEVICE Complex<T> operator()(const Complex<T> a,
const Complex<T> b) const {
Complex<T> b_conj(b.real, -b.imag);
return a * b_conj;
}
......@@ -274,9 +272,9 @@ struct MulGradFunctor<Complex<T>> {
template <typename InT, typename OutT>
struct MulGradXYFunctor {
inline HOSTDEVICE paddle::framework::Array<OutT, 2> operator()(const InT& a,
const InT& b,
const InT& c) {
inline HOSTDEVICE paddle::framework::Array<OutT, 2> operator()(const InT a,
const InT b,
const InT c) {
paddle::framework::Array<OutT, 2> outs;
// dx = dout * y
outs[0] = a * b;
......@@ -289,7 +287,7 @@ struct MulGradXYFunctor {
template <typename InT, typename OutT>
struct MulGradXYFunctor<Complex<InT>, Complex<OutT>> {
inline HOSTDEVICE paddle::framework::Array<Complex<OutT>, 2> operator()(
const Complex<InT>& a, const Complex<InT>& b, const Complex<InT>& c) {
const Complex<InT> a, const Complex<InT> b, const Complex<InT> c) {
paddle::framework::Array<Complex<OutT>, 2> outs;
// dx = dout * y
Complex<InT> b_conj(b.real, -b.imag);
......
......@@ -20,31 +20,6 @@ namespace plat = paddle::platform;
namespace paddle {
namespace operators {
template <typename T, typename Enable = void>
struct CudaModFunctor {
inline HOSTDEVICE T operator()(const T* args) const {
T res = args[0] % args[1];
// Accoding to #PR26732: in dividen % divsor
// remainder shall have the same sign as divsor.
if ((res != 0) && ((args[1] ^ res) < 0)) res += args[1];
return res;
}
};
template <typename T>
struct CudaModFunctor<
T, typename std::enable_if_t<std::is_floating_point<T>::value>> {
inline HOSTDEVICE T operator()(const T* args) const {
T res = fmod(args[0], args[1]);
// Accoding to #PR26732: in dividen % divsor
// remainder shall have the same sign as divsor.
if ((res != 0) && ((res < 0) != (args[1] < 0))) res += args[1];
return res;
}
};
template <typename T>
class ElementwiseModKernel<platform::CUDADeviceContext, T>
: public framework::OpKernel<T> {
......@@ -56,7 +31,7 @@ class ElementwiseModKernel<platform::CUDADeviceContext, T>
ctx.template device_context<platform::CUDADeviceContext>();
int axis = PackTensorsIntoVector<T>(ctx, &ins, &outs);
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>(
cuda_ctx, ins, &outs, axis, CudaModFunctor<T>());
cuda_ctx, ins, &outs, axis, ModFunctor<T>());
}
};
......
......@@ -19,29 +19,36 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename Enable = void>
struct ModFunctor {
inline HOSTDEVICE T operator()(T a, T b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
T res = a % b;
if ((res != 0) && ((res < 0) != (b < 0))) res += b;
// Accoding to #PR26732: in dividen % divsor
// remainder shall have the same sign as divsor.
if ((res != 0) && ((b ^ res) < 0)) res += b;
return res;
}
};
template <typename T>
struct InverseModFunctor {
inline HOSTDEVICE T operator()(T a, T b) const {
T res = b % a;
if ((res != 0) && ((res < 0) != (a < 0))) res += a;
struct ModFunctor<T,
typename std::enable_if_t<std::is_floating_point<T>::value>> {
inline HOSTDEVICE T operator()(const T a, const T b) const {
T res = fmod(a, b);
// Accoding to #PR26732: in dividen % divsor
// remainder shall have the same sign as divsor.
if ((res != 0) && ((res < 0) != (b < 0))) res += b;
return res;
}
};
template <typename T>
struct ModFunctorFP {
struct InverseModFunctor {
inline HOSTDEVICE T operator()(T a, T b) const {
T res = fmod(a, b);
if ((res != 0) && ((b < 0) != (res < 0))) res += b;
T res = b % a;
if ((res != 0) && ((res < 0) != (a < 0))) res += a;
return res;
}
};
......@@ -79,8 +86,8 @@ void elementwise_mod_fp(const framework::ExecutionContext &ctx,
auto x_dims = x->dims();
auto y_dims = y->dims();
if (x_dims.size() >= y_dims.size()) {
ElementwiseComputeEx<ModFunctorFP<T>, DeviceContext, T>(
ctx, x, y, axis, ModFunctorFP<T>(), z);
ElementwiseComputeEx<ModFunctor<T>, DeviceContext, T>(ctx, x, y, axis,
ModFunctor<T>(), z);
} else {
ElementwiseComputeEx<InverseModFunctorFP<T>, DeviceContext, T>(
ctx, x, y, axis, InverseModFunctorFP<T>(), z);
......
......@@ -16,26 +16,6 @@ namespace ops = paddle::operators;
namespace paddle {
namespace operators {
template <typename T, typename Enable = void>
struct CudaPowFunctor {
inline HOSTDEVICE T operator()(const T args[]) const {
return std::pow(args[0], args[1]);
}
};
template <typename T>
struct CudaPowFunctor<
T, typename std::enable_if<std::is_integral<T>::value>::type> {
// On CUDAPlace, std::pow(3, 1) calls pow(float, float), and
// it will return a float number like 2.99... , which floor to 2
// when cast to int by default and it is wrong.
// Use llrint to cast it to the nearest integer, which is 3.
inline HOSTDEVICE T operator()(const T args[]) const {
return std::llrint(
std::pow(static_cast<double>(args[0]), static_cast<double>(args[1])));
}
};
template <typename T>
class ElementwisePowKernel<platform::CUDADeviceContext, T>
: public framework::OpKernel<T> {
......@@ -48,7 +28,7 @@ class ElementwisePowKernel<platform::CUDADeviceContext, T>
int axis = PackTensorsIntoVector<T>(ctx, &ins, &outs);
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>(
cuda_ctx, ins, &outs, axis, CudaPowFunctor<T>());
cuda_ctx, ins, &outs, axis, PowFunctor<T>());
}
};
......
......@@ -26,31 +26,31 @@ namespace funcs {
// Add
template <typename T>
struct AddFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a + b; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return a + b; }
};
template <typename T>
struct InverseAddFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b + a; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return b + a; }
};
// Subtract
template <typename T>
struct SubtractFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a - b; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return a - b; }
};
template <typename T>
struct InverseSubtractFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b - a; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return b - a; }
};
// Multiply
template <typename T>
struct MultiplyFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return a * b; }
};
template <typename T>
struct InverseMultiplyFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b * a; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return b * a; }
};
// Divide
......@@ -60,14 +60,14 @@ struct InverseMultiplyFunctor {
template <typename T, typename Enable = void>
struct DivideFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; }
};
template <typename T>
struct DivideFunctor<
T,
typename std::enable_if<std::is_integral<T>::value>::type> {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
// For int32/int64, need to check whether the divison is zero.
PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
return a / b;
......@@ -76,7 +76,7 @@ struct DivideFunctor<
template <typename T, typename Enable = void>
struct InverseDivideFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b / a; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return b / a; }
};
} // namespace funcs
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册