未验证 提交 ac933235 编写于 作者: Z Zhang Ting 提交者: GitHub

[part 5]change type of function args (#38889)

上级 73742d36
develop 1.8.5 2.4.1 Ligoml-patch-1 ZHUI-patch-1 add_kylinv10 add_some_yaml_config bugfix-eval-frame-leakgae cherry-pick-fix-customOP-random-fail cp_2.4_fix_numpy dingjiaweiww-patch-1 dy2static enable_eager_model_test final_state_gen_python_c final_state_intermediate fix-numpy-issue fix-run-program-grad-node-mem fix_check fix_concat_slice fix_custom_device_copy_sync fix_dlpack_for fix_newexe_gc fix_rnn_docs fix_tensor_type fix_var_stop_gradient_error hack_event incuabte/new_frl incubate/frl_train_eval incubate/infrt incubate/new_frl incubate/new_frl_rc incubate/stride inplace_addto layer_norm matmul_double_grad move_embedding_to_phi move_histogram_to_pten move_sgd_to_phi move_slice_to_pten move_temporal_shift_to_phi move_yolo_box_to_phi npu_fix_alloc operator_opt pass-compile-eval-frame preln_ernie prv-md-even-more prv-onednn-2.5 prv-reshape-mkldnn-ut2 release-deleted/2.5 release-rc/2.5 release/2.3 release/2.3-fc-ernie-fix release/2.4 release/2.5 release/llm_2.5 revert-38959-Reduce_Grad revert-39143-adjust_empty revert-39227-move_trace_op_to_pten revert-39268-dev/remove_concat_fluid_kernel revert-40170-support_partial_grad revert-41056-revert-40727-move_some_activaion_to_phi revert-41065-revert-40993-mv_ele_floordiv_pow revert-41068-revert-40790-phi_new revert-41944-smaller_inference_api_test revert-42149-do-not-reset-default-stream-for-stream-safe-cuda-allocator revert-43155-fix_ut_tempfile revert-43882-revert-41944-smaller_inference_api_test revert-45808-phi/simplify_size_op revert-46827-deform_comment revert-47325-remove_cudnn_hardcode revert-47645-add_npu_storage_dims revert-48815-set_free_when_no_cache_hit_default_value_true revert-49499-test_ninja_on_ci revert-49654-prim_api_gen revert-49673-modify_get_single_cov revert-49763-fix_static_composite_gen revert-50158-fix_found_inf_bug_for_custom_optimizer revert-50188-refine_optimizer_create_accumulators revert-50335-fix_optminizer_set_auxiliary_var_bug revert-51676-flag_delete revert-51850-fix_softmaxce_dev revert-52175-dev_peak_memory revert-52186-deve revert-52523-test_py38 revert-52912-develop revert-53248-set_cmake_policy revert-54029-fix_windows_compile_bug revert-54068-support_translating_op_attribute revert-54214-modify_cmake_dependencies revert-54370-offline_pslib revert-54391-fix_cmake_md5error revert-54411-fix_cpp17_compile revert-54466-offline_pslib revert-54480-cmake-rocksdb revert-55568-fix_BF16_bug1 revert-56328-new_ir_support_vector_type_place_transfer revert-56366-fix_openssl_bug revert-56545-revert-56366-fix_openssl_bug revert-56620-fix_new_ir_ocr_bug revert-56925-check_inputs_grad_semantic revert-57005-refine_stride_flag sd_conv_linear_autocast semi-auto/rule-base support-0D-sort support_weight_transpose test_for_Filtetfiles zhiqiu-patch-1 v2.5.1 v2.5.0 v2.5.0-rc1 v2.5.0-rc0 v2.4.2 v2.4.1 v2.4.0 v2.4.0-rc0 v2.3.2 v2.3.1 v2.3.0 v2.3.0-rc0
无相关合并请求
......@@ -54,7 +54,7 @@ using InverseDivFunctor = pten::funcs::InverseDivideFunctor<T>;
// Floor Divide
template <typename T>
struct FloorDivFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
return static_cast<T>(std::trunc(a / b));
}
......@@ -62,7 +62,7 @@ struct FloorDivFunctor {
template <typename T>
struct InverseFloorDivFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
PADDLE_ENFORCE(a != 0, DIV_ERROR_INFO);
return static_cast<T>(std::trunc(b / a));
}
......@@ -73,7 +73,7 @@ struct InverseFloorDivFunctor {
// Maximum
template <typename T>
struct MaxFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
return a > b ? a : b;
}
};
......@@ -81,7 +81,7 @@ struct MaxFunctor {
// Minmum
template <typename T>
struct MinFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
return a < b ? a : b;
}
};
......@@ -119,14 +119,14 @@ struct DivGradXYFunctor<Complex<InT>, Complex<OutT>> {
// Float div grad
template <typename T>
struct DivGradXFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; }
};
// Complex div grad
template <typename T>
struct DivGradXFunctor<Complex<T>> {
inline HOSTDEVICE Complex<T> operator()(const Complex<T>& a,
const Complex<T>& b) const {
inline HOSTDEVICE Complex<T> operator()(const Complex<T> a,
const Complex<T> b) const {
Complex<T> b_conj(b.real, -b.imag);
return a / b_conj;
}
......@@ -135,7 +135,7 @@ struct DivGradXFunctor<Complex<T>> {
// Float mul and div
template <typename T>
struct DivGradYFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b, const T& c) const {
inline HOSTDEVICE T operator()(const T a, const T b, const T c) const {
return -a * b / c;
}
};
......@@ -143,9 +143,9 @@ struct DivGradYFunctor {
// Complex mul and div
template <typename T>
struct DivGradYFunctor<Complex<T>> {
inline HOSTDEVICE Complex<T> operator()(const Complex<T>& a,
const Complex<T>& b,
const Complex<T>& c) const {
inline HOSTDEVICE Complex<T> operator()(const Complex<T> a,
const Complex<T> b,
const Complex<T> c) const {
Complex<T> out_div_c_conj((b / c).real, -(b / c).imag);
return -a * out_div_c_conj;
}
......@@ -154,7 +154,7 @@ struct DivGradYFunctor<Complex<T>> {
// Fmax
template <typename T>
struct FMaxFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
return std::fmax(a, b);
}
};
......@@ -162,8 +162,8 @@ struct FMaxFunctor {
template <>
struct FMaxFunctor<paddle::platform::float16> {
inline HOSTDEVICE paddle::platform::float16 operator()(
const paddle::platform::float16& a,
const paddle::platform::float16& b) const {
const paddle::platform::float16 a,
const paddle::platform::float16 b) const {
float float_a = static_cast<float>(a);
float float_b = static_cast<float>(b);
auto result = std::fmax(float_a, float_b);
......@@ -173,7 +173,7 @@ struct FMaxFunctor<paddle::platform::float16> {
template <>
struct FMaxFunctor<int> {
inline HOSTDEVICE int operator()(const int& a, const int& b) const {
inline HOSTDEVICE int operator()(const int a, const int b) const {
float float_a = static_cast<float>(a);
float float_b = static_cast<float>(b);
auto result = std::fmax(float_a, float_b);
......@@ -183,8 +183,7 @@ struct FMaxFunctor<int> {
template <>
struct FMaxFunctor<int64_t> {
inline HOSTDEVICE int64_t operator()(const int64_t& a,
const int64_t& b) const {
inline HOSTDEVICE int64_t operator()(const int64_t a, const int64_t b) const {
double double_a = static_cast<double>(a);
double double_b = static_cast<double>(b);
auto result = std::fmax(double_a, double_b);
......@@ -195,7 +194,7 @@ struct FMaxFunctor<int64_t> {
// Fmin
template <typename T>
struct FMinFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
return std::fmin(a, b);
}
};
......@@ -203,8 +202,8 @@ struct FMinFunctor {
template <>
struct FMinFunctor<paddle::platform::float16> {
inline HOSTDEVICE paddle::platform::float16 operator()(
const paddle::platform::float16& a,
const paddle::platform::float16& b) const {
const paddle::platform::float16 a,
const paddle::platform::float16 b) const {
float float_a = static_cast<float>(a);
float float_b = static_cast<float>(b);
auto result = std::fmin(float_a, float_b);
......@@ -214,7 +213,7 @@ struct FMinFunctor<paddle::platform::float16> {
template <>
struct FMinFunctor<int> {
inline HOSTDEVICE int operator()(const int& a, const int& b) const {
inline HOSTDEVICE int operator()(const int a, const int b) const {
float float_a = static_cast<float>(a);
float float_b = static_cast<float>(b);
auto result = std::fmin(float_a, float_b);
......@@ -224,8 +223,7 @@ struct FMinFunctor<int> {
template <>
struct FMinFunctor<int64_t> {
inline HOSTDEVICE int64_t operator()(const int64_t& a,
const int64_t& b) const {
inline HOSTDEVICE int64_t operator()(const int64_t a, const int64_t b) const {
double double_a = static_cast<double>(a);
double double_b = static_cast<double>(b);
auto result = std::fmin(double_a, double_b);
......@@ -261,12 +259,12 @@ struct MinGradXYFunctor {
template <typename T>
struct MulGradFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return a * b; }
};
template <typename T>
struct MulGradFunctor<Complex<T>> {
inline HOSTDEVICE Complex<T> operator()(const Complex<T>& a,
const Complex<T>& b) const {
inline HOSTDEVICE Complex<T> operator()(const Complex<T> a,
const Complex<T> b) const {
Complex<T> b_conj(b.real, -b.imag);
return a * b_conj;
}
......@@ -274,9 +272,9 @@ struct MulGradFunctor<Complex<T>> {
template <typename InT, typename OutT>
struct MulGradXYFunctor {
inline HOSTDEVICE paddle::framework::Array<OutT, 2> operator()(const InT& a,
const InT& b,
const InT& c) {
inline HOSTDEVICE paddle::framework::Array<OutT, 2> operator()(const InT a,
const InT b,
const InT c) {
paddle::framework::Array<OutT, 2> outs;
// dx = dout * y
outs[0] = a * b;
......@@ -289,7 +287,7 @@ struct MulGradXYFunctor {
template <typename InT, typename OutT>
struct MulGradXYFunctor<Complex<InT>, Complex<OutT>> {
inline HOSTDEVICE paddle::framework::Array<Complex<OutT>, 2> operator()(
const Complex<InT>& a, const Complex<InT>& b, const Complex<InT>& c) {
const Complex<InT> a, const Complex<InT> b, const Complex<InT> c) {
paddle::framework::Array<Complex<OutT>, 2> outs;
// dx = dout * y
Complex<InT> b_conj(b.real, -b.imag);
......
......@@ -20,31 +20,6 @@ namespace plat = paddle::platform;
namespace paddle {
namespace operators {
template <typename T, typename Enable = void>
struct CudaModFunctor {
inline HOSTDEVICE T operator()(const T* args) const {
T res = args[0] % args[1];
// Accoding to #PR26732: in dividen % divsor
// remainder shall have the same sign as divsor.
if ((res != 0) && ((args[1] ^ res) < 0)) res += args[1];
return res;
}
};
template <typename T>
struct CudaModFunctor<
T, typename std::enable_if_t<std::is_floating_point<T>::value>> {
inline HOSTDEVICE T operator()(const T* args) const {
T res = fmod(args[0], args[1]);
// Accoding to #PR26732: in dividen % divsor
// remainder shall have the same sign as divsor.
if ((res != 0) && ((res < 0) != (args[1] < 0))) res += args[1];
return res;
}
};
template <typename T>
class ElementwiseModKernel<platform::CUDADeviceContext, T>
: public framework::OpKernel<T> {
......@@ -56,7 +31,7 @@ class ElementwiseModKernel<platform::CUDADeviceContext, T>
ctx.template device_context<platform::CUDADeviceContext>();
int axis = PackTensorsIntoVector<T>(ctx, &ins, &outs);
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>(
cuda_ctx, ins, &outs, axis, CudaModFunctor<T>());
cuda_ctx, ins, &outs, axis, ModFunctor<T>());
}
};
......
......@@ -19,29 +19,36 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename Enable = void>
struct ModFunctor {
inline HOSTDEVICE T operator()(T a, T b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
T res = a % b;
if ((res != 0) && ((res < 0) != (b < 0))) res += b;
// Accoding to #PR26732: in dividen % divsor
// remainder shall have the same sign as divsor.
if ((res != 0) && ((b ^ res) < 0)) res += b;
return res;
}
};
template <typename T>
struct InverseModFunctor {
inline HOSTDEVICE T operator()(T a, T b) const {
T res = b % a;
if ((res != 0) && ((res < 0) != (a < 0))) res += a;
struct ModFunctor<T,
typename std::enable_if_t<std::is_floating_point<T>::value>> {
inline HOSTDEVICE T operator()(const T a, const T b) const {
T res = fmod(a, b);
// Accoding to #PR26732: in dividen % divsor
// remainder shall have the same sign as divsor.
if ((res != 0) && ((res < 0) != (b < 0))) res += b;
return res;
}
};
template <typename T>
struct ModFunctorFP {
struct InverseModFunctor {
inline HOSTDEVICE T operator()(T a, T b) const {
T res = fmod(a, b);
if ((res != 0) && ((b < 0) != (res < 0))) res += b;
T res = b % a;
if ((res != 0) && ((res < 0) != (a < 0))) res += a;
return res;
}
};
......@@ -79,8 +86,8 @@ void elementwise_mod_fp(const framework::ExecutionContext &ctx,
auto x_dims = x->dims();
auto y_dims = y->dims();
if (x_dims.size() >= y_dims.size()) {
ElementwiseComputeEx<ModFunctorFP<T>, DeviceContext, T>(
ctx, x, y, axis, ModFunctorFP<T>(), z);
ElementwiseComputeEx<ModFunctor<T>, DeviceContext, T>(ctx, x, y, axis,
ModFunctor<T>(), z);
} else {
ElementwiseComputeEx<InverseModFunctorFP<T>, DeviceContext, T>(
ctx, x, y, axis, InverseModFunctorFP<T>(), z);
......
......@@ -16,26 +16,6 @@ namespace ops = paddle::operators;
namespace paddle {
namespace operators {
template <typename T, typename Enable = void>
struct CudaPowFunctor {
inline HOSTDEVICE T operator()(const T args[]) const {
return std::pow(args[0], args[1]);
}
};
template <typename T>
struct CudaPowFunctor<
T, typename std::enable_if<std::is_integral<T>::value>::type> {
// On CUDAPlace, std::pow(3, 1) calls pow(float, float), and
// it will return a float number like 2.99... , which floor to 2
// when cast to int by default and it is wrong.
// Use llrint to cast it to the nearest integer, which is 3.
inline HOSTDEVICE T operator()(const T args[]) const {
return std::llrint(
std::pow(static_cast<double>(args[0]), static_cast<double>(args[1])));
}
};
template <typename T>
class ElementwisePowKernel<platform::CUDADeviceContext, T>
: public framework::OpKernel<T> {
......@@ -48,7 +28,7 @@ class ElementwisePowKernel<platform::CUDADeviceContext, T>
int axis = PackTensorsIntoVector<T>(ctx, &ins, &outs);
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>(
cuda_ctx, ins, &outs, axis, CudaPowFunctor<T>());
cuda_ctx, ins, &outs, axis, PowFunctor<T>());
}
};
......
......@@ -26,31 +26,31 @@ namespace funcs {
// Add
template <typename T>
struct AddFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a + b; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return a + b; }
};
template <typename T>
struct InverseAddFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b + a; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return b + a; }
};
// Subtract
template <typename T>
struct SubtractFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a - b; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return a - b; }
};
template <typename T>
struct InverseSubtractFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b - a; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return b - a; }
};
// Multiply
template <typename T>
struct MultiplyFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return a * b; }
};
template <typename T>
struct InverseMultiplyFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b * a; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return b * a; }
};
// Divide
......@@ -60,14 +60,14 @@ struct InverseMultiplyFunctor {
template <typename T, typename Enable = void>
struct DivideFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; }
};
template <typename T>
struct DivideFunctor<
T,
typename std::enable_if<std::is_integral<T>::value>::type> {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
inline HOSTDEVICE T operator()(const T a, const T b) const {
// For int32/int64, need to check whether the divison is zero.
PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
return a / b;
......@@ -76,7 +76,7 @@ struct DivideFunctor<
template <typename T, typename Enable = void>
struct InverseDivideFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b / a; }
inline HOSTDEVICE T operator()(const T a, const T b) const { return b / a; }
};
} // namespace funcs
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册