diff --git a/dnn/scripts/gen_elemwise_multi_type_utils.py b/dnn/scripts/gen_elemwise_multi_type_utils.py index 6de5a5124476564c33555fe6614c1d85b5f3152a..39aec81819da22113d9180e506cca3daeed5fbe7 100755 --- a/dnn/scripts/gen_elemwise_multi_type_utils.py +++ b/dnn/scripts/gen_elemwise_multi_type_utils.py @@ -14,23 +14,27 @@ MODES = { 1: ['RELU', 'ABS', 'NEGATE', 'ACOS', 'ASIN', 'CEIL', 'COS', 'EXP', 'EXPM1', 'FLOOR', 'LOG', 'LOG1P', 'SIGMOID', 'SIN', 'TANH', 'FAST_TANH', 'ROUND', 'ERF', 'ERFINV', 'ERFC', - 'ERFCINV', 'H_SWISH', 'SILU', 'GELU'], + 'ERFCINV', 'H_SWISH', 'SILU', 'GELU', 'SINH', 'COSH', + 'ASINH', 'ACOSH', 'ATANH', 'TAN', 'SOFTPLUS', 'RELU6', + 'HSIGMOID', 'LOGSIGMOID', 'SQRT', 'SQUARE', 'SIGN'], 2: ['ABS_GRAD', 'ADD', 'FLOOR_DIV', 'MAX', 'MIN', 'MOD', 'MUL', 'SIGMOID_GRAD', 'SUB', 'SWITCH_GT0', 'TANH_GRAD', 'LT', 'LEQ', 'EQ', 'FUSE_ADD_RELU', 'TRUE_DIV', 'POW', 'LOG_SUM_EXP', 'FUSE_ADD_TANH', 'FAST_TANH_GRAD', 'FUSE_ADD_SIGMOID', 'ATAN2', 'H_SWISH_GRAD', - 'FUSE_ADD_H_SWISH', 'SILU_GRAD', 'GELU_GRAD'], - 3: ['COND_LEQ_MOV', 'COND_LT_MOV', 'FUSE_MUL_ADD3'], + 'FUSE_ADD_H_SWISH', 'SILU_GRAD', 'GELU_GRAD', 'PRELU', + 'ASINH_GRAD', 'ACOSH_GRAD', 'ATANH_GRAD', 'SOFTPLUS_GRAD', + 'RELU6_GRAD', 'HSIGMOID_GRAD'], + 3: ['COND_LEQ_MOV', 'COND_LT_MOV', 'FUSE_MUL_ADD3', 'CLIP', 'PRELU_GRAD'], } QINT4_MODES = { 1: ['RELU', 'ABS', 'NEGATE', 'CEIL', 'FLOOR', 'SIGMOID', 'TANH', 'FAST_TANH', 'ROUND', 'H_SWISH'], - 2: ['ADD', 'MAX', 'MIN', 'MUL', 'SUB', 'SWITCH_GT0', - 'LT', 'LEQ', 'EQ', 'FUSE_ADD_RELU', 'FUSE_ADD_TANH', - 'FUSE_ADD_SIGMOID', 'FUSE_ADD_H_SWISH'], - 3: ['COND_LEQ_MOV', 'COND_LT_MOV', 'FUSE_MUL_ADD3'], + 2: ['ADD', 'MAX', 'MIN', 'MUL', 'SUB', 'SWITCH_GT0', + 'LT', 'LEQ', 'EQ', 'FUSE_ADD_RELU', 'FUSE_ADD_TANH', + 'FUSE_ADD_SIGMOID', 'FUSE_ADD_H_SWISH', 'PRELU'], + 3: ['COND_LEQ_MOV', 'COND_LT_MOV', 'FUSE_MUL_ADD3', 'CLIP'], } QINT32_MODES = { diff --git a/dnn/scripts/gen_elemwise_utils.py b/dnn/scripts/gen_elemwise_utils.py index 84bc541f315ed0f338858891b00b81ec45642c44..52f28bb79f15661e31128d1921ff0156b72a075d 100755 --- a/dnn/scripts/gen_elemwise_utils.py +++ b/dnn/scripts/gen_elemwise_utils.py @@ -12,23 +12,27 @@ DTYPES = {'dt_int32': ('Int32', 'INT'), } MODES = { - (1, 'INT'): ['RELU', 'ABS', 'NEGATE'], + (1, 'INT'): ['RELU', 'ABS', 'NEGATE', 'RELU6', 'SQUARE', 'SIGN'], (2, 'INT'): ['ABS_GRAD', 'ADD', 'FLOOR_DIV', 'MAX', 'MIN', 'MOD', 'MUL', 'SIGMOID_GRAD', 'SUB', 'SWITCH_GT0', 'TANH_GRAD', 'LT', 'LEQ', - 'EQ', 'FUSE_ADD_RELU', 'SHL', 'SHR', 'RMULH'], - (3, 'INT'): ['COND_LEQ_MOV', 'COND_LT_MOV'], + 'EQ', 'FUSE_ADD_RELU', 'SHL', 'SHR', 'RMULH', 'PRELU'], + (3, 'INT'): ['COND_LEQ_MOV', 'COND_LT_MOV', 'CLIP'], (1, 'FLOAT'): ['RELU', 'ABS', 'NEGATE', 'ACOS', 'ASIN', 'CEIL', 'COS', 'EXP', 'EXPM1', 'FLOOR', 'LOG', 'LOG1P', 'SIGMOID', 'SIN', 'TANH', 'FAST_TANH', 'ROUND', 'ERF', 'ERFINV', 'ERFC', - 'ERFCINV', 'H_SWISH', 'SILU', 'GELU'], + 'ERFCINV', 'H_SWISH', 'SILU', 'GELU', 'SINH', 'COSH', + 'ASINH', 'ACOSH', 'ATANH', 'TAN', 'SOFTPLUS', 'RELU6', + 'HSIGMOID', 'LOGSIGMOID', 'SQRT', 'SQUARE', 'SIGN'], (2, 'FLOAT'): ['ABS_GRAD', 'ADD', 'FLOOR_DIV', 'MAX', 'MIN', 'MOD', 'MUL', 'SIGMOID_GRAD', 'SUB', 'SWITCH_GT0', 'TANH_GRAD', 'LT', 'LEQ', 'EQ', 'FUSE_ADD_RELU', 'TRUE_DIV', 'POW', 'LOG_SUM_EXP', 'FUSE_ADD_TANH', 'FAST_TANH_GRAD', 'FUSE_ADD_SIGMOID', 'ATAN2', 'H_SWISH_GRAD', - 'FUSE_ADD_H_SWISH', 'SILU_GRAD', 'GELU_GRAD'], - (3, 'FLOAT'): ['COND_LEQ_MOV', 'COND_LT_MOV', 'FUSE_MUL_ADD3'], + 'FUSE_ADD_H_SWISH', 'SILU_GRAD', 'GELU_GRAD', 'PRELU', + 'ASINH_GRAD', 'ACOSH_GRAD', 'ATANH_GRAD', 'SOFTPLUS_GRAD', + 'RELU6_GRAD', 'HSIGMOID_GRAD'], + (3, 'FLOAT'): ['COND_LEQ_MOV', 'COND_LT_MOV', 'FUSE_MUL_ADD3', 'CLIP', 'PRELU_GRAD'], (1, 'BOOL'): ['NOT'], (2, 'BOOL'): ['AND', 'OR', 'XOR', 'LT', 'LEQ', 'EQ'], (3, 'BOOL'): [] diff --git a/dnn/scripts/opr_param_defs.py b/dnn/scripts/opr_param_defs.py index 1c7dd1938f6cdec17e2c655e5456777a390a22d7..0e901b57c6f6538843354fddb5f91627a3ef7b1e 100755 --- a/dnn/scripts/opr_param_defs.py +++ b/dnn/scripts/opr_param_defs.py @@ -421,9 +421,31 @@ pdef('Elemwise').add_enum( Doc('GELU = 58', 'unary: x Phi(x)'), Doc('GELU_GRAD = 59', 'binary: grad(x Phi(x))'), Doc('COND_LT_MOV = 60', 'ternary: x < y ? z : 0'), - Doc('NEQ = 61', 'binary: x != y'), - Doc('ISNAN = 62', 'unary: isnan(x)'), - Doc('ISINF = 63', 'unary: isinf(x)'), + Doc('SINH = 61', 'unary: sinh(x)'), + Doc('COSH = 62', 'unary: cosh(x)'), + Doc('ASINH = 63', 'unary: asinh(x)'), + Doc('ACOSH = 64', 'unary: acosh(x)'), + Doc('ATANH = 65', 'unary: atanh(x)'), + Doc('TAN = 66', 'unary: tan(x)'), + Doc('ASINH_GRAD = 67', 'binary: y / sqrt(x^2 + 1)'), + Doc('ACOSH_GRAD = 68', 'binary: y / sqrt(x^2 - 1) (x > 1)'), + Doc('ATANH_GRAD = 69', 'binary: y / (1 - x^2) (|x| < 1)'), + Doc('PRELU = 70', 'binary: x > 0 ? x : x * y'), + Doc('CLIP = 71', 'ternary: x <= y ? y : (x <= z ? x : z)'), + Doc('PRELU_GRAD = 72', 'ternary: x > 0 ? y : y * z'), + Doc('SOFTPLUS = 73', 'unary: log(1 + e^x)'), + Doc('SOFTPLUS_GRAD = 74', 'binary: y * e^x / (1 + e^x)'), + Doc('RELU6 = 75', 'unary: min(max(0, x), 6)'), + Doc('RELU6_GRAD = 76', 'binary: x < 0 ? 0 : (x > 6 ? 0 : y)'), + Doc('HSIGMOID = 77', 'unary: relu6(x + 3) / 6'), + Doc('HSIGMOID_GRAD = 78', 'binary: x < -3 ? 0 : (x > 3 ? 0 : y / 6)'), + Doc('LOGSIGMOID = 79', 'unary: -log(1 + e^(-x))'), + Doc('SQRT = 80', 'unary: x^(1/2)'), + Doc('SQUARE = 81', 'unary: x^2'), + Doc('SIGN = 82', 'unary: sgn(x)'), + Doc('NEQ = 83', 'binary: x != y'), + Doc('ISNAN = 84', 'unary: isnan(x)'), + Doc('ISINF = 85', 'unary: isinf(x)'), ) pdef('ElemwiseMultiType').add_enum( diff --git a/dnn/src/common/elemwise/each_mode.inl b/dnn/src/common/elemwise/each_mode.inl index 67a144ca63fa5256c4eed54791e75855ff184ba0..48ca51dc65a2b7466e2f57bacbd1a4e656e337da 100644 --- a/dnn/src/common/elemwise/each_mode.inl +++ b/dnn/src/common/elemwise/each_mode.inl @@ -25,12 +25,28 @@ MEGDNN_ELEMWISE_MODE_ENABLE(ERFCINV, cb) \ MEGDNN_ELEMWISE_MODE_ENABLE(H_SWISH, cb) \ MEGDNN_ELEMWISE_MODE_ENABLE(SILU, cb) \ - MEGDNN_ELEMWISE_MODE_ENABLE(GELU, cb) + MEGDNN_ELEMWISE_MODE_ENABLE(GELU, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(SINH, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(COSH, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(ASINH, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(ATANH, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(TAN, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(LOGSIGMOID, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(SQRT, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) #define MEGDNN_FOREACH_ELEMWISE_MODE_UNARY_INT(cb) \ MEGDNN_ELEMWISE_MODE_ENABLE(RELU, cb) \ MEGDNN_ELEMWISE_MODE_ENABLE(ABS, cb) \ - MEGDNN_ELEMWISE_MODE_ENABLE(NEGATE, cb) + MEGDNN_ELEMWISE_MODE_ENABLE(NEGATE, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) #define MEGDNN_FOREACH_ELEMWISE_MODE_BINARY_BOOL(cb) \ MEGDNN_ELEMWISE_MODE_ENABLE(AND, cb) \ @@ -66,7 +82,14 @@ MEGDNN_ELEMWISE_MODE_ENABLE(H_SWISH_GRAD, cb) \ MEGDNN_ELEMWISE_MODE_ENABLE(FUSE_ADD_H_SWISH, cb) \ MEGDNN_ELEMWISE_MODE_ENABLE(SILU_GRAD, cb) \ - MEGDNN_ELEMWISE_MODE_ENABLE(GELU_GRAD, cb) + MEGDNN_ELEMWISE_MODE_ENABLE(GELU_GRAD, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(ASINH_GRAD, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH_GRAD, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(ATANH_GRAD, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS_GRAD, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(RELU6_GRAD, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID_GRAD, cb) #define MEGDNN_FOREACH_ELEMWISE_MODE_BINARY_INT(cb) \ MEGDNN_ELEMWISE_MODE_ENABLE(ABS_GRAD, cb) \ @@ -86,15 +109,19 @@ MEGDNN_ELEMWISE_MODE_ENABLE(FUSE_ADD_RELU, cb) \ MEGDNN_ELEMWISE_MODE_ENABLE(SHL, cb) \ MEGDNN_ELEMWISE_MODE_ENABLE(SHR, cb) \ - MEGDNN_ELEMWISE_MODE_ENABLE(RMULH, cb) + MEGDNN_ELEMWISE_MODE_ENABLE(RMULH, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) #define MEGDNN_FOREACH_ELEMWISE_MODE_TERNARY_BOOL(cb) #define MEGDNN_FOREACH_ELEMWISE_MODE_TERNARY_FLOAT(cb) \ MEGDNN_ELEMWISE_MODE_ENABLE(COND_LEQ_MOV, cb) \ MEGDNN_ELEMWISE_MODE_ENABLE(COND_LT_MOV, cb) \ - MEGDNN_ELEMWISE_MODE_ENABLE(FUSE_MUL_ADD3, cb) + MEGDNN_ELEMWISE_MODE_ENABLE(FUSE_MUL_ADD3, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(PRELU_GRAD, cb) #define MEGDNN_FOREACH_ELEMWISE_MODE_TERNARY_INT(cb) \ MEGDNN_ELEMWISE_MODE_ENABLE(COND_LEQ_MOV, cb) \ - MEGDNN_ELEMWISE_MODE_ENABLE(COND_LT_MOV, cb) + MEGDNN_ELEMWISE_MODE_ENABLE(COND_LT_MOV, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) diff --git a/dnn/src/common/elemwise/kern_defs.cuh b/dnn/src/common/elemwise/kern_defs.cuh index 87788c0d8217e7930836144e6138dd8c6994e99f..951054708b4901bd5bf26d80f4c2ae2030fa328a 100644 --- a/dnn/src/common/elemwise/kern_defs.cuh +++ b/dnn/src/common/elemwise/kern_defs.cuh @@ -154,11 +154,18 @@ struct ElemwiseKern; // int and float DEF_KERN_ALL(NEGATE, -x); +DEF_KERN_ALL(SQUARE, x* x); #if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) DEF_KERN_INT(RELU, x <= ctype(0) ? ctype(0) : x); +DEF_KERN_INT(RELU6, x <= ctype(0) ? ctype(0) : (x <= ctype(6) ? x : ctype(6))); +DEF_KERN_INT(SIGN, x < ctype(0) ? ctype(-1) : (x > ctype(0) ? ctype(1) : ctype(0))); DEF_KERN_FLOAT(RELU, x <= 0.f ? ctype(0) : x); +DEF_KERN_FLOAT(RELU6, x <= 6.f ? ctype(0) : (x <= 6.f ? x : ctype(6))); +DEF_KERN_FLOAT(SIGN, x < 0.f ? -1.f : (x > 0.f ? 1.f : 0.f)); #else DEF_KERN_ALL(RELU, x <= ctype(0) ? ctype(0) : x); +DEF_KERN_ALL(RELU6, x <= ctype(0) ? ctype(0) : (x <= ctype(6) ? x : ctype(6))); +DEF_KERN_ALL(SIGN, x < ctype(0) ? ctype(-1) : (x > ctype(0) ? ctype(1) : ctype(0))); #endif DEF_KERN_INT(ABS, abs(int(x))); // DEF_KERN_INT(ABS, x > ctype(0) ? x : -x); @@ -186,6 +193,18 @@ DEF_KERN_FLOAT(ERFCINV, erfcinvf(x)); DEF_KERN_FLOAT(H_SWISH, x* min(max(x + 3, 0.f), 6.f) * (1.f / 6.f)); DEF_KERN_FLOAT(SILU, x / (expf(-x) + 1.f)); DEF_KERN_FLOAT(GELU, x* normcdf(x)); +DEF_KERN_FLOAT(SINH, sinhf(x)); +DEF_KERN_FLOAT(COSH, coshf(x)); +DEF_KERN_FLOAT(ASINH, asinhf(x)); +DEF_KERN_FLOAT(ACOSH, acoshf(x)); +DEF_KERN_FLOAT(ATANH, atanhf(x)); +DEF_KERN_FLOAT(TAN, tanf(x)); +DEF_KERN_FLOAT(SOFTPLUS, log1pf(expf(-fabsf(x))) + (x <= ctype(0) ? ctype(0) : x)); +DEF_KERN_FLOAT( + HSIGMOID, + x <= ctype(-3) ? ctype(0) : (x >= ctype(3) ? ctype(1) : ((x + 3.f) / 6.f))); +DEF_KERN_FLOAT(SQRT, sqrtf(x)); +DEF_KERN_FLOAT(LOGSIGMOID, -log1pf(expf(-fabsf(x))) + (x >= ctype(0) ? ctype(0) : x)); // int only DEF_KERN(dt_bool, NOT, x ^ 1); @@ -240,6 +259,12 @@ DEF_KERN_FLOAT(FUSE_ADD_RELU, (x + y) <= 0.f ? ctype(0) : (x + y)); #else DEF_KERN_ALL(FUSE_ADD_RELU, (x + y) <= ctype(0) ? ctype(0) : (x + y)); #endif +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_PLATFORM_NVCC__) +DEF_KERN_INT(PRELU, x > ctype(0) ? x : (x * y)); +DEF_KERN_FLOAT(PRELU, x > 0.f ? x : (x * y)); +#else +DEF_KERN_ALL(PRELU, x > ctype(0) ? x : (x * y)); +#endif // float only DEF_KERN_FLOAT(TRUE_DIV, x / y); @@ -259,6 +284,14 @@ DEF_KERN_FLOAT( DEF_KERN_FLOAT(FUSE_ADD_H_SWISH, fuse_add_hswish(x, y)); DEF_KERN_FLOAT(SILU_GRAD, silu_grad(x, y)); DEF_KERN_FLOAT(GELU_GRAD, gelu_grad(x, y)); +DEF_KERN_FLOAT(ASINH_GRAD, y / sqrt(x * x + 1.f)); +DEF_KERN_FLOAT(ACOSH_GRAD, y / sqrt(x * x - 1.f)); +DEF_KERN_FLOAT(ATANH_GRAD, y / (1.f - x * x)); +DEF_KERN_FLOAT(SOFTPLUS_GRAD, y* expf(x) / (1.f + expf(x))); +DEF_KERN_FLOAT(RELU6_GRAD, x <= ctype(0) ? ctype(0) : (x >= ctype(6) ? ctype(0) : y)); +DEF_KERN_FLOAT( + HSIGMOID_GRAD, + x <= ctype(-3) ? ctype(0) : (x >= ctype(3) ? ctype(0) : (y / 6.f))); #undef KERN_SIG /* ================== ternary kernels ================== */ @@ -268,6 +301,8 @@ DEF_KERN_FLOAT(GELU_GRAD, gelu_grad(x, y)); DEF_KERN_ALL(COND_LEQ_MOV, x <= y ? z : ctype(0)); DEF_KERN_ALL(COND_LT_MOV, x < y ? z : ctype(0)); DEF_KERN_ALL(FUSE_MUL_ADD3, x* y + z); +DEF_KERN_ALL(CLIP, x <= y ? y : (x <= z ? x : z)); +DEF_KERN_FLOAT(PRELU_GRAD, x >= 0.f ? y : (y * z)); #undef KERN_SIG diff --git a/dnn/src/common/elemwise/opr_impl.cpp b/dnn/src/common/elemwise/opr_impl.cpp index ed994866c653b3baf6241b00ecb7853569c617c0..2f0a1d5ab2991ab44789dbd3a7de4a5de26f173b 100644 --- a/dnn/src/common/elemwise/opr_impl.cpp +++ b/dnn/src/common/elemwise/opr_impl.cpp @@ -220,6 +220,28 @@ const ModeTrait& ModeTrait::from_mode(Mode mode) { CB_MODE(Mode::GELU); CB_MODE(Mode::GELU_GRAD); CB_MODE(Mode::COND_LT_MOV); + CB_MODE(Mode::SINH); + CB_MODE(Mode::COSH); + CB_MODE(Mode::ASINH); + CB_MODE(Mode::ACOSH); + CB_MODE(Mode::ATANH); + CB_MODE(Mode::TAN); + CB_MODE(Mode::ASINH_GRAD); + CB_MODE(Mode::ACOSH_GRAD); + CB_MODE(Mode::ATANH_GRAD); + CB_MODE(Mode::PRELU); + CB_MODE(Mode::PRELU_GRAD); + CB_MODE(Mode::CLIP); + CB_MODE(Mode::SOFTPLUS); + CB_MODE(Mode::SOFTPLUS_GRAD); + CB_MODE(Mode::RELU6); + CB_MODE(Mode::RELU6_GRAD); + CB_MODE(Mode::HSIGMOID); + CB_MODE(Mode::HSIGMOID_GRAD); + CB_MODE(Mode::LOGSIGMOID); + CB_MODE(Mode::SQRT); + CB_MODE(Mode::SQUARE); + CB_MODE(Mode::SIGN); default: megdnn_assert( 0, diff --git a/dnn/src/cuda/elemwise/kimpl/ACOSH_GRAD_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/ACOSH_GRAD_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..fc3af7a753d1384d5ef5bf97c728b9bcfcc12d99 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ACOSH_GRAD_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/ACOSH_GRAD_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/ACOSH_GRAD_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..1c4f89c8f04c1117f9d69bf522d4025a33918fa8 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ACOSH_GRAD_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/ACOSH_GRAD_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/ACOSH_GRAD_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..7674459b3698a33b1ec6a21093cd9027af0f4e4c --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ACOSH_GRAD_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/ACOSH_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/ACOSH_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..80411b71b2cbf6f5e253705896bc8bffe65ff478 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ACOSH_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/ACOSH_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/ACOSH_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..aa417709bbb389dc4ecb2036fab84f9b7f517243 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ACOSH_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/ACOSH_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/ACOSH_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..cfcf7ad3488aa53bd75ee02994ffa4849a804147 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ACOSH_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/ASINH_GRAD_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/ASINH_GRAD_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..393728a4da3ea08b625e5feca888a419e821802b --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ASINH_GRAD_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/ASINH_GRAD_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/ASINH_GRAD_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..807bedca44031f25d83edf83d1753f9312dcb903 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ASINH_GRAD_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/ASINH_GRAD_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/ASINH_GRAD_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..7a3d5a3edf854c6c184d3eb98247793c8c9dbefc --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ASINH_GRAD_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/ASINH_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/ASINH_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..7a9123815dbdd5952974f112a679e5f2b86fe974 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ASINH_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/ASINH_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/ASINH_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..29c9307d2cf90ee005479bae3482adbf8c5c48a7 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ASINH_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/ASINH_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/ASINH_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..b6fc8ab6412d8753cb5aa7c0b233cc369fc91199 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ASINH_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/ATANH_GRAD_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/ATANH_GRAD_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..cf7afe1b76c63a1cf00a903ed1f5d5f0cadbc391 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ATANH_GRAD_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/ATANH_GRAD_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/ATANH_GRAD_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..333b8f606c6be46342e53fda9eecdebf100ef005 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ATANH_GRAD_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/ATANH_GRAD_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/ATANH_GRAD_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..be794c32d835e9cf4fd8789f559ccd67910942b4 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ATANH_GRAD_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/ATANH_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/ATANH_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..fa6683dfeaa38572803e57dc67edf14dd10592ad --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ATANH_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/ATANH_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/ATANH_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..804e5361338cc49ad03fb3b8630762327cb17d1a --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ATANH_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/ATANH_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/ATANH_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..5fd222b3a2e5422cb79163f8a9f5f79f2154baa1 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/ATANH_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/CLIP_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/CLIP_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..de73cd15d2c246cf5006379238ae4fc6b3f49682 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/CLIP_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/CLIP_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/CLIP_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..25e351c123538c392bbc936332893723ba6410a5 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/CLIP_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/CLIP_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/CLIP_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..cf8dc776ff0ed87a3167dbe07fbce5346b30f357 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/CLIP_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/CLIP_dt_int16.cu b/dnn/src/cuda/elemwise/kimpl/CLIP_dt_int16.cu new file mode 100644 index 0000000000000000000000000000000000000000..f60b5c4ced3b3541b2734339dd3d4fae7c6e9b5e --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/CLIP_dt_int16.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/CLIP_dt_int32.cu b/dnn/src/cuda/elemwise/kimpl/CLIP_dt_int32.cu new file mode 100644 index 0000000000000000000000000000000000000000..c003f595ea958af3884485680a3678b1eb8598cf --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/CLIP_dt_int32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/CLIP_dt_int8.cu b/dnn/src/cuda/elemwise/kimpl/CLIP_dt_int8.cu new file mode 100644 index 0000000000000000000000000000000000000000..cb0ec0463dfe7d38043d321085f150fe8bee1826 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/CLIP_dt_int8.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/CLIP_dt_uint8.cu b/dnn/src/cuda/elemwise/kimpl/CLIP_dt_uint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..b0198d93d68ba3cf90d60bbf262e6f44a02e1ffe --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/CLIP_dt_uint8.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/COSH_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/COSH_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..7cb17527beb5abb7f2cdba58f3633c9c5f309be2 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/COSH_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(COSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/COSH_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/COSH_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..5f42f2350232482a8fa6e3b2bdc5e3841baa89da --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/COSH_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(COSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/COSH_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/COSH_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..94ea18705cbb641fb4d8afb3bc23544db120e892 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/COSH_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(COSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/HSIGMOID_GRAD_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/HSIGMOID_GRAD_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..a8115bff8428750645a818544b7fa829248824b4 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/HSIGMOID_GRAD_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/HSIGMOID_GRAD_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/HSIGMOID_GRAD_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..a1fb7ee3ee5f1c288281a8bb7cb5445c0c596d43 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/HSIGMOID_GRAD_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/HSIGMOID_GRAD_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/HSIGMOID_GRAD_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..9c0a4aebd89bd9eb4aca61a27e5b4b4f8e393b23 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/HSIGMOID_GRAD_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/HSIGMOID_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/HSIGMOID_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..28a83976f4009d40d50e31fdeb6d94cf9bee5ab0 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/HSIGMOID_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/HSIGMOID_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/HSIGMOID_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..cdb77455f7aeb8a6207d40d0c38d0acdb94d77ba --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/HSIGMOID_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/HSIGMOID_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/HSIGMOID_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..528f944c2a66eb3dcc36868dfc4f89c8c9bae099 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/HSIGMOID_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/LOGSIGMOID_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/LOGSIGMOID_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..06322df63cecfe034ba9b53b9a7105c163e57e89 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/LOGSIGMOID_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(LOGSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/LOGSIGMOID_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/LOGSIGMOID_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..d0b6c026851ea01f3f796b8e9ccda787f51fceb3 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/LOGSIGMOID_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(LOGSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/LOGSIGMOID_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/LOGSIGMOID_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..ea1bcf1a3ab05774fd28e69152f18f37c80cbdbf --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/LOGSIGMOID_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(LOGSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/PRELU_GRAD_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/PRELU_GRAD_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..dc33150072bc4d40f3d56a9b45e9d26c2985b559 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/PRELU_GRAD_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU_GRAD, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/PRELU_GRAD_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/PRELU_GRAD_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..3441181806c090103f7e7711b52ce4c417252204 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/PRELU_GRAD_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU_GRAD, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/PRELU_GRAD_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/PRELU_GRAD_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..1fedc850076ee187afd30c48b51785fb958dc357 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/PRELU_GRAD_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU_GRAD, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/PRELU_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/PRELU_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..78c18f9d716524a01880336c03e906b9d646cd9c --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/PRELU_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/PRELU_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/PRELU_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..33e6ce7393622702eca9c187eaa1cb964b8f13b6 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/PRELU_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/PRELU_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/PRELU_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..46f2d36705d6ae3c09361b540bd5650750e71ae8 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/PRELU_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/PRELU_dt_int16.cu b/dnn/src/cuda/elemwise/kimpl/PRELU_dt_int16.cu new file mode 100644 index 0000000000000000000000000000000000000000..d1dfa9ace337bbda9b40e88137fa43a59baa9eb0 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/PRELU_dt_int16.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/PRELU_dt_int32.cu b/dnn/src/cuda/elemwise/kimpl/PRELU_dt_int32.cu new file mode 100644 index 0000000000000000000000000000000000000000..d6d7332f8ab7761a13ea68eacc38005b10e0b62b --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/PRELU_dt_int32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/PRELU_dt_int8.cu b/dnn/src/cuda/elemwise/kimpl/PRELU_dt_int8.cu new file mode 100644 index 0000000000000000000000000000000000000000..621a7dd311d38be2b39117270e7398de22086edb --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/PRELU_dt_int8.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/PRELU_dt_uint8.cu b/dnn/src/cuda/elemwise/kimpl/PRELU_dt_uint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..86ff475d046afda072fd0ba6de02b432d79cad72 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/PRELU_dt_uint8.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/RELU6_GRAD_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/RELU6_GRAD_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..906996656ee3c6f0cfb24a3a31d00a3e8544f046 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/RELU6_GRAD_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/RELU6_GRAD_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/RELU6_GRAD_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..efb61fa6b23dce1fdc68fca7d9e047d3d57cf94d --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/RELU6_GRAD_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/RELU6_GRAD_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/RELU6_GRAD_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..6088f41deae5038ebf6772f76b69cfdb97e81192 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/RELU6_GRAD_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/RELU6_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/RELU6_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..cf79b7e965a6d0eb7b0a3c83669ac7bc9c808d72 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/RELU6_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/RELU6_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/RELU6_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..0646045d1ec3a0c56117c3c321cf68fd1a656ccf --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/RELU6_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/RELU6_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/RELU6_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..2fe7746f70244b9c2cad4e2ca0a8dee11244cb2a --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/RELU6_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/RELU6_dt_int16.cu b/dnn/src/cuda/elemwise/kimpl/RELU6_dt_int16.cu new file mode 100644 index 0000000000000000000000000000000000000000..32c2dab37cfed97cb66a8b5352712312dd6817eb --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/RELU6_dt_int16.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/RELU6_dt_int32.cu b/dnn/src/cuda/elemwise/kimpl/RELU6_dt_int32.cu new file mode 100644 index 0000000000000000000000000000000000000000..e59877c32838c7a7a1b2943c76a4af25af1e56c5 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/RELU6_dt_int32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/RELU6_dt_int8.cu b/dnn/src/cuda/elemwise/kimpl/RELU6_dt_int8.cu new file mode 100644 index 0000000000000000000000000000000000000000..6f6f77416c5b1da4255c2eae3ba5ebe2c47ba3ff --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/RELU6_dt_int8.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/RELU6_dt_uint8.cu b/dnn/src/cuda/elemwise/kimpl/RELU6_dt_uint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..60812b5509ddcc27abe7507bc85461a6b0de0d94 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/RELU6_dt_uint8.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/SIGN_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/SIGN_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..34316156e524f88bcfc8e683fed67c69a4bba202 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SIGN_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/SIGN_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/SIGN_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..04ac0b8687bdeac1be29f152c33278ecc17a3257 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SIGN_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/SIGN_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/SIGN_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..0402184f056634b18572acbe4735bee01b9b39d6 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SIGN_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/SIGN_dt_int16.cu b/dnn/src/cuda/elemwise/kimpl/SIGN_dt_int16.cu new file mode 100644 index 0000000000000000000000000000000000000000..0a854c234513c1f50230e8a62d9a1066dbb84ae5 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SIGN_dt_int16.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/SIGN_dt_int32.cu b/dnn/src/cuda/elemwise/kimpl/SIGN_dt_int32.cu new file mode 100644 index 0000000000000000000000000000000000000000..5f3aa92707bc2a41e90984fee2d88c1a70e123eb --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SIGN_dt_int32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/SIGN_dt_int8.cu b/dnn/src/cuda/elemwise/kimpl/SIGN_dt_int8.cu new file mode 100644 index 0000000000000000000000000000000000000000..c0d44608aa081a9b4cfcd23f6da42800c06aadcd --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SIGN_dt_int8.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/SIGN_dt_uint8.cu b/dnn/src/cuda/elemwise/kimpl/SIGN_dt_uint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..37f4b4b24ef2f272c6fb465bf45eff288c9417d9 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SIGN_dt_uint8.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/SINH_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/SINH_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..19b3b24d4bccffd1d20b6776fcacc1937219341c --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SINH_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/SINH_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/SINH_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..0298140ee348480cc71f7dc46b9700adfc2815f8 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SINH_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/SINH_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/SINH_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..d781a287d648e3a91deded69a105ae383af35f31 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SINH_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_GRAD_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_GRAD_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..9769ef8793294c868338e59ed687b048f4cda809 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_GRAD_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_GRAD_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_GRAD_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..694fea1bfbea814472c444b8f4d4203406c89256 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_GRAD_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_GRAD_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_GRAD_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..05710880904d2589837ffbc52b3d5c17f98a95f1 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_GRAD_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..7df279c7fb7c8ddfc8b7374998997c83420f373f --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..98d84daddbd4734908f999caf3fbe8b77e4e3cac --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..898996df7c3d2615134dd72c2f3c1ec9b8ee713f --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SOFTPLUS_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/SQRT_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/SQRT_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..b6483dfefcbccb56f73d5bb8ffa46d5416bb5975 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SQRT_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQRT, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/SQRT_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/SQRT_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..262e68d4b517151dd28071fddff8e8ddba3afec3 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SQRT_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQRT, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/SQRT_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/SQRT_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..1c6aa2af998578e8a18b868df8af41d0aece06bf --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SQRT_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQRT, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..59f5383ecc121f378bf65621e9fbd2447597df21 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..c53551bb16fcd02dba621a2332b4fbe37a35eba7 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..4282b4793173f75f5e55a7bf8a47171d10dc120a --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_int16.cu b/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_int16.cu new file mode 100644 index 0000000000000000000000000000000000000000..5d7bec087512364120790473ebb7623635054a0e --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_int16.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_int32.cu b/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_int32.cu new file mode 100644 index 0000000000000000000000000000000000000000..c1237c3775c46fce8f4e32eb6272b5cb7677c737 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_int32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_int8.cu b/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_int8.cu new file mode 100644 index 0000000000000000000000000000000000000000..8c01483e5b305d6a6f0804d753ced10d920f5aa5 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_int8.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_uint8.cu b/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_uint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..094c419155572eec5310118820a9df803b894b96 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/SQUARE_dt_uint8.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise/kimpl/TAN_dt_bfloat16.cu b/dnn/src/cuda/elemwise/kimpl/TAN_dt_bfloat16.cu new file mode 100644 index 0000000000000000000000000000000000000000..415ed3fc49e9d6247e037e1083409e6cf79a1a8c --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/TAN_dt_bfloat16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(TAN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/TAN_dt_float16.cu b/dnn/src/cuda/elemwise/kimpl/TAN_dt_float16.cu new file mode 100644 index 0000000000000000000000000000000000000000..dd5339a35c4cc5c7ea6b5294713a85bdee765bf2 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/TAN_dt_float16.cu @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(TAN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/cuda/elemwise/kimpl/TAN_dt_float32.cu b/dnn/src/cuda/elemwise/kimpl/TAN_dt_float32.cu new file mode 100644 index 0000000000000000000000000000000000000000..796c4e65abc8f34328a146fe79af06e04442cdd1 --- /dev/null +++ b/dnn/src/cuda/elemwise/kimpl/TAN_dt_float32.cu @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(TAN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/ACOSH_GRAD_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/ACOSH_GRAD_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..74e69bb6b2c6805d293277299c271caa49a7c7d1 Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/ACOSH_GRAD_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/ACOSH_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/ACOSH_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..6734fd674d5641e1c985b27b3e942ca3e73c3096 Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/ACOSH_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/ASINH_GRAD_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/ASINH_GRAD_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..6af7f4060e1dad5488d42ac80efb69151f2d57bf Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/ASINH_GRAD_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/ASINH_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/ASINH_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..9b080b7369a4178755c151ca72e473096d101ce6 Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/ASINH_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/ATANH_GRAD_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/ATANH_GRAD_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..1d477ec4b2c0a891712c808b6eb97a1d829a74be Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/ATANH_GRAD_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/ATANH_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/ATANH_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..934699b3207bfad342e8aafe6dda481762e3fe26 Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/ATANH_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/CLIP_dt_qint4_dt_qint4.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/CLIP_dt_qint4_dt_qint4.cu new file mode 100644 index 0000000000000000000000000000000000000000..accf56fa04b1d33e2e142f45285f23ee512cdff6 Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/CLIP_dt_qint4_dt_qint4.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/CLIP_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/CLIP_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..78de5e8a6ae6843e9c90f0565ef2d1bb2966e71a Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/CLIP_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/CLIP_dt_quint4_dt_quint4.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/CLIP_dt_quint4_dt_quint4.cu new file mode 100644 index 0000000000000000000000000000000000000000..754de3e6edf2e9d96cec4518272a677ca51a11ba Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/CLIP_dt_quint4_dt_quint4.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/COSH_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/COSH_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..2bcc45ab6702ce3a242b29db41666ba6226149f7 Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/COSH_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/HSIGMOID_GRAD_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/HSIGMOID_GRAD_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..a50f44c9f09e013eca2a1374300c2f8b2d4fba1a Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/HSIGMOID_GRAD_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/HSIGMOID_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/HSIGMOID_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..5cac0b94bcd6223e7f57b62b0a8655e18d50bcd9 Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/HSIGMOID_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/LOGSIGMOID_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/LOGSIGMOID_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..bb2abf6aa57caae6de6d05f3f2008ad2650c2b72 Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/LOGSIGMOID_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/PRELU_GRAD_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/PRELU_GRAD_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..1518830ea8e4f5de3281a9b059fbcd74b4c2a7e4 Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/PRELU_GRAD_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/PRELU_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/PRELU_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..60fa38fa2d968c7ac1e392266a3f8158c740c119 Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/PRELU_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/RELU6_GRAD_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/RELU6_GRAD_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..49ef8a946a5415d193380638e82a71736b045b12 Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/RELU6_GRAD_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/RELU6_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/RELU6_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..7cf2b51230ac42bac52d9df41795e0fe15718606 Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/RELU6_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/SIGN_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/SIGN_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..279bb2bbb8a8321c316f26bce9903f0ed30369be Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/SIGN_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/SINH_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/SINH_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..68f6f9a7d1613bf060d217037c73401ae68102df Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/SINH_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/SOFTPLUS_GRAD_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/SOFTPLUS_GRAD_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..2c8aa8b61f48dab8cf290db6f4ca395063d8942b Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/SOFTPLUS_GRAD_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/SOFTPLUS_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/SOFTPLUS_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..7c250442daf19731b1f73a094a53f41a36017cac Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/SOFTPLUS_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/SQRT_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/SQRT_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..48290ef97f6853e4abc5cb11e6c5eb690511a48a Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/SQRT_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/SQUARE_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/SQUARE_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..a24e045707f53379b0599df1ae3bb426058edd4c Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/SQUARE_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/kimpl/TAN_dt_qint8_dt_qint8.cu b/dnn/src/cuda/elemwise_multi_type/kimpl/TAN_dt_qint8_dt_qint8.cu new file mode 100644 index 0000000000000000000000000000000000000000..ecf0bcb60437b0e6449787326530988e82244bd9 Binary files /dev/null and b/dnn/src/cuda/elemwise_multi_type/kimpl/TAN_dt_qint8_dt_qint8.cu differ diff --git a/dnn/src/cuda/elemwise_multi_type/opr_impl.cpp b/dnn/src/cuda/elemwise_multi_type/opr_impl.cpp index 64a90258a26a60b58bdc3cadae6002990e1eb588..2f45e524199928a474d2e73ca5e803c843ab92d0 100644 --- a/dnn/src/cuda/elemwise_multi_type/opr_impl.cpp +++ b/dnn/src/cuda/elemwise_multi_type/opr_impl.cpp @@ -267,7 +267,10 @@ IMPL_MODE_DISPATCHER(2, dt_qint4, dt_qint4); IMPL_MODE_DISPATCHER(2, dt_quint4, dt_quint4); #undef FOREACH -#define FOREACH MEGDNN_FOREACH_ELEMWISE_MODE_TERNARY_FLOAT +#define FOREACH(cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(COND_LEQ_MOV, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(FUSE_MUL_ADD3, cb) \ + MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) IMPL_MODE_DISPATCHER(3, dt_qint4, dt_qint4); IMPL_MODE_DISPATCHER(3, dt_quint4, dt_quint4); #undef FOREACH diff --git a/dnn/src/fallback/elemwise/fallback_impl/opr_binary_impl.cpp b/dnn/src/fallback/elemwise/fallback_impl/opr_binary_impl.cpp index 4197a2da522fb14fe95f315f008fcc82119b04ef..f0cf6c13ef97260c56ca7212b4eb217a549003f5 100644 --- a/dnn/src/fallback/elemwise/fallback_impl/opr_binary_impl.cpp +++ b/dnn/src/fallback/elemwise/fallback_impl/opr_binary_impl.cpp @@ -228,6 +228,7 @@ INST(Mode::SHL); INST(Mode::SHR); INST(Mode::FUSE_ADD_RELU); INST(Mode::RMULH); +INST(Mode::PRELU); #undef INST #define INST(mode) \ @@ -258,6 +259,13 @@ INST(Mode::H_SWISH_GRAD); INST(Mode::FUSE_ADD_H_SWISH); INST(Mode::SILU_GRAD); INST(Mode::GELU_GRAD); +INST(Mode::PRELU); +INST(Mode::ASINH_GRAD); +INST(Mode::ACOSH_GRAD); +INST(Mode::ATANH_GRAD); +INST(Mode::SOFTPLUS_GRAD); +INST(Mode::RELU6_GRAD); +INST(Mode::HSIGMOID_GRAD); #undef INST } // namespace fallback } // namespace megdnn diff --git a/dnn/src/fallback/elemwise/fallback_impl/opr_unary_impl.cpp b/dnn/src/fallback/elemwise/fallback_impl/opr_unary_impl.cpp index b36e7dae23f65bdd6468f29586e1dcbd2f45428d..bf7acf095dfc157ca74c97a2c90071bf9478fb57 100644 --- a/dnn/src/fallback/elemwise/fallback_impl/opr_unary_impl.cpp +++ b/dnn/src/fallback/elemwise/fallback_impl/opr_unary_impl.cpp @@ -77,6 +77,9 @@ using Mode = param_enumv::Elemwise::Mode; INST(Mode::RELU); INST(Mode::ABS); INST(Mode::NEGATE); +INST(Mode::RELU6); +INST(Mode::SQUARE); +INST(Mode::SIGN); #undef INST #define INST(mode) \ @@ -105,6 +108,19 @@ INST(Mode::ERFCINV); INST(Mode::H_SWISH); INST(Mode::SILU); INST(Mode::GELU); +INST(Mode::SINH); +INST(Mode::COSH); +INST(Mode::ASINH); +INST(Mode::ACOSH); +INST(Mode::ATANH); +INST(Mode::TAN); +INST(Mode::SOFTPLUS); +INST(Mode::RELU6); +INST(Mode::HSIGMOID); +INST(Mode::LOGSIGMOID); +INST(Mode::SQRT); +INST(Mode::SQUARE); +INST(Mode::SIGN); #undef INST } // namespace fallback } // namespace megdnn diff --git a/dnn/src/naive/elemwise/kimpl/ACOSH_GRAD_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/ACOSH_GRAD_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fc3af7a753d1384d5ef5bf97c728b9bcfcc12d99 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ACOSH_GRAD_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/ACOSH_GRAD_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/ACOSH_GRAD_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1c4f89c8f04c1117f9d69bf522d4025a33918fa8 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ACOSH_GRAD_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/ACOSH_GRAD_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/ACOSH_GRAD_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7674459b3698a33b1ec6a21093cd9027af0f4e4c --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ACOSH_GRAD_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/ACOSH_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/ACOSH_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..80411b71b2cbf6f5e253705896bc8bffe65ff478 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ACOSH_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/ACOSH_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/ACOSH_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..aa417709bbb389dc4ecb2036fab84f9b7f517243 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ACOSH_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/ACOSH_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/ACOSH_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cfcf7ad3488aa53bd75ee02994ffa4849a804147 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ACOSH_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/ASINH_GRAD_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/ASINH_GRAD_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..393728a4da3ea08b625e5feca888a419e821802b --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ASINH_GRAD_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/ASINH_GRAD_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/ASINH_GRAD_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..807bedca44031f25d83edf83d1753f9312dcb903 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ASINH_GRAD_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/ASINH_GRAD_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/ASINH_GRAD_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7a3d5a3edf854c6c184d3eb98247793c8c9dbefc --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ASINH_GRAD_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/ASINH_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/ASINH_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7a9123815dbdd5952974f112a679e5f2b86fe974 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ASINH_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/ASINH_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/ASINH_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..29c9307d2cf90ee005479bae3482adbf8c5c48a7 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ASINH_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/ASINH_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/ASINH_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b6fc8ab6412d8753cb5aa7c0b233cc369fc91199 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ASINH_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/ATANH_GRAD_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/ATANH_GRAD_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cf7afe1b76c63a1cf00a903ed1f5d5f0cadbc391 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ATANH_GRAD_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/ATANH_GRAD_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/ATANH_GRAD_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..333b8f606c6be46342e53fda9eecdebf100ef005 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ATANH_GRAD_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/ATANH_GRAD_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/ATANH_GRAD_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..be794c32d835e9cf4fd8789f559ccd67910942b4 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ATANH_GRAD_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/ATANH_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/ATANH_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fa6683dfeaa38572803e57dc67edf14dd10592ad --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ATANH_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/ATANH_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/ATANH_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..804e5361338cc49ad03fb3b8630762327cb17d1a --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ATANH_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/ATANH_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/ATANH_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5fd222b3a2e5422cb79163f8a9f5f79f2154baa1 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/ATANH_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/CLIP_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/CLIP_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..de73cd15d2c246cf5006379238ae4fc6b3f49682 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/CLIP_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/CLIP_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/CLIP_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..25e351c123538c392bbc936332893723ba6410a5 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/CLIP_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/CLIP_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/CLIP_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cf8dc776ff0ed87a3167dbe07fbce5346b30f357 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/CLIP_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/CLIP_dt_int16.cpp b/dnn/src/naive/elemwise/kimpl/CLIP_dt_int16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f60b5c4ced3b3541b2734339dd3d4fae7c6e9b5e --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/CLIP_dt_int16.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/CLIP_dt_int32.cpp b/dnn/src/naive/elemwise/kimpl/CLIP_dt_int32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c003f595ea958af3884485680a3678b1eb8598cf --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/CLIP_dt_int32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/CLIP_dt_int8.cpp b/dnn/src/naive/elemwise/kimpl/CLIP_dt_int8.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cb0ec0463dfe7d38043d321085f150fe8bee1826 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/CLIP_dt_int8.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/CLIP_dt_uint8.cpp b/dnn/src/naive/elemwise/kimpl/CLIP_dt_uint8.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b0198d93d68ba3cf90d60bbf262e6f44a02e1ffe --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/CLIP_dt_uint8.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/COSH_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/COSH_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7cb17527beb5abb7f2cdba58f3633c9c5f309be2 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/COSH_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(COSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/COSH_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/COSH_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5f42f2350232482a8fa6e3b2bdc5e3841baa89da --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/COSH_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(COSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/COSH_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/COSH_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..94ea18705cbb641fb4d8afb3bc23544db120e892 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/COSH_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(COSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/HSIGMOID_GRAD_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/HSIGMOID_GRAD_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a8115bff8428750645a818544b7fa829248824b4 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/HSIGMOID_GRAD_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/HSIGMOID_GRAD_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/HSIGMOID_GRAD_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a1fb7ee3ee5f1c288281a8bb7cb5445c0c596d43 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/HSIGMOID_GRAD_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/HSIGMOID_GRAD_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/HSIGMOID_GRAD_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9c0a4aebd89bd9eb4aca61a27e5b4b4f8e393b23 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/HSIGMOID_GRAD_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/HSIGMOID_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/HSIGMOID_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..28a83976f4009d40d50e31fdeb6d94cf9bee5ab0 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/HSIGMOID_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/HSIGMOID_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/HSIGMOID_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cdb77455f7aeb8a6207d40d0c38d0acdb94d77ba --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/HSIGMOID_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/HSIGMOID_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/HSIGMOID_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..528f944c2a66eb3dcc36868dfc4f89c8c9bae099 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/HSIGMOID_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/LOGSIGMOID_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/LOGSIGMOID_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..06322df63cecfe034ba9b53b9a7105c163e57e89 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/LOGSIGMOID_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(LOGSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/LOGSIGMOID_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/LOGSIGMOID_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d0b6c026851ea01f3f796b8e9ccda787f51fceb3 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/LOGSIGMOID_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(LOGSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/LOGSIGMOID_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/LOGSIGMOID_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ea1bcf1a3ab05774fd28e69152f18f37c80cbdbf --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/LOGSIGMOID_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(LOGSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/PRELU_GRAD_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/PRELU_GRAD_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dc33150072bc4d40f3d56a9b45e9d26c2985b559 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/PRELU_GRAD_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU_GRAD, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/PRELU_GRAD_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/PRELU_GRAD_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3441181806c090103f7e7711b52ce4c417252204 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/PRELU_GRAD_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU_GRAD, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/PRELU_GRAD_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/PRELU_GRAD_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1fedc850076ee187afd30c48b51785fb958dc357 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/PRELU_GRAD_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU_GRAD, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/PRELU_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/PRELU_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..78c18f9d716524a01880336c03e906b9d646cd9c --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/PRELU_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/PRELU_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/PRELU_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..33e6ce7393622702eca9c187eaa1cb964b8f13b6 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/PRELU_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/PRELU_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/PRELU_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..46f2d36705d6ae3c09361b540bd5650750e71ae8 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/PRELU_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/PRELU_dt_int16.cpp b/dnn/src/naive/elemwise/kimpl/PRELU_dt_int16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d1dfa9ace337bbda9b40e88137fa43a59baa9eb0 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/PRELU_dt_int16.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/PRELU_dt_int32.cpp b/dnn/src/naive/elemwise/kimpl/PRELU_dt_int32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d6d7332f8ab7761a13ea68eacc38005b10e0b62b --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/PRELU_dt_int32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/PRELU_dt_int8.cpp b/dnn/src/naive/elemwise/kimpl/PRELU_dt_int8.cpp new file mode 100644 index 0000000000000000000000000000000000000000..621a7dd311d38be2b39117270e7398de22086edb --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/PRELU_dt_int8.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/PRELU_dt_uint8.cpp b/dnn/src/naive/elemwise/kimpl/PRELU_dt_uint8.cpp new file mode 100644 index 0000000000000000000000000000000000000000..86ff475d046afda072fd0ba6de02b432d79cad72 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/PRELU_dt_uint8.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/RELU6_GRAD_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/RELU6_GRAD_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..906996656ee3c6f0cfb24a3a31d00a3e8544f046 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/RELU6_GRAD_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/RELU6_GRAD_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/RELU6_GRAD_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..efb61fa6b23dce1fdc68fca7d9e047d3d57cf94d --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/RELU6_GRAD_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/RELU6_GRAD_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/RELU6_GRAD_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6088f41deae5038ebf6772f76b69cfdb97e81192 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/RELU6_GRAD_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/RELU6_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/RELU6_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cf79b7e965a6d0eb7b0a3c83669ac7bc9c808d72 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/RELU6_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/RELU6_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/RELU6_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0646045d1ec3a0c56117c3c321cf68fd1a656ccf --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/RELU6_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/RELU6_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/RELU6_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2fe7746f70244b9c2cad4e2ca0a8dee11244cb2a --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/RELU6_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/RELU6_dt_int16.cpp b/dnn/src/naive/elemwise/kimpl/RELU6_dt_int16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..32c2dab37cfed97cb66a8b5352712312dd6817eb --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/RELU6_dt_int16.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/RELU6_dt_int32.cpp b/dnn/src/naive/elemwise/kimpl/RELU6_dt_int32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e59877c32838c7a7a1b2943c76a4af25af1e56c5 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/RELU6_dt_int32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/RELU6_dt_int8.cpp b/dnn/src/naive/elemwise/kimpl/RELU6_dt_int8.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6f6f77416c5b1da4255c2eae3ba5ebe2c47ba3ff --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/RELU6_dt_int8.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/RELU6_dt_uint8.cpp b/dnn/src/naive/elemwise/kimpl/RELU6_dt_uint8.cpp new file mode 100644 index 0000000000000000000000000000000000000000..60812b5509ddcc27abe7507bc85461a6b0de0d94 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/RELU6_dt_uint8.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/SIGN_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/SIGN_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..34316156e524f88bcfc8e683fed67c69a4bba202 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SIGN_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/SIGN_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/SIGN_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..04ac0b8687bdeac1be29f152c33278ecc17a3257 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SIGN_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/SIGN_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/SIGN_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0402184f056634b18572acbe4735bee01b9b39d6 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SIGN_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/SIGN_dt_int16.cpp b/dnn/src/naive/elemwise/kimpl/SIGN_dt_int16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0a854c234513c1f50230e8a62d9a1066dbb84ae5 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SIGN_dt_int16.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/SIGN_dt_int32.cpp b/dnn/src/naive/elemwise/kimpl/SIGN_dt_int32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5f3aa92707bc2a41e90984fee2d88c1a70e123eb --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SIGN_dt_int32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/SIGN_dt_int8.cpp b/dnn/src/naive/elemwise/kimpl/SIGN_dt_int8.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c0d44608aa081a9b4cfcd23f6da42800c06aadcd --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SIGN_dt_int8.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/SIGN_dt_uint8.cpp b/dnn/src/naive/elemwise/kimpl/SIGN_dt_uint8.cpp new file mode 100644 index 0000000000000000000000000000000000000000..37f4b4b24ef2f272c6fb465bf45eff288c9417d9 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SIGN_dt_uint8.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/SINH_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/SINH_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..19b3b24d4bccffd1d20b6776fcacc1937219341c --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SINH_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/SINH_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/SINH_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0298140ee348480cc71f7dc46b9700adfc2815f8 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SINH_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/SINH_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/SINH_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d781a287d648e3a91deded69a105ae383af35f31 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SINH_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/SOFTPLUS_GRAD_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/SOFTPLUS_GRAD_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9769ef8793294c868338e59ed687b048f4cda809 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SOFTPLUS_GRAD_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/SOFTPLUS_GRAD_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/SOFTPLUS_GRAD_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..694fea1bfbea814472c444b8f4d4203406c89256 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SOFTPLUS_GRAD_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/SOFTPLUS_GRAD_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/SOFTPLUS_GRAD_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..05710880904d2589837ffbc52b3d5c17f98a95f1 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SOFTPLUS_GRAD_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/SOFTPLUS_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/SOFTPLUS_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7df279c7fb7c8ddfc8b7374998997c83420f373f --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SOFTPLUS_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/SOFTPLUS_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/SOFTPLUS_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..98d84daddbd4734908f999caf3fbe8b77e4e3cac --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SOFTPLUS_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/SOFTPLUS_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/SOFTPLUS_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..898996df7c3d2615134dd72c2f3c1ec9b8ee713f --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SOFTPLUS_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/SQRT_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/SQRT_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b6483dfefcbccb56f73d5bb8ffa46d5416bb5975 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SQRT_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQRT, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/SQRT_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/SQRT_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..262e68d4b517151dd28071fddff8e8ddba3afec3 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SQRT_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQRT, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/SQRT_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/SQRT_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1c6aa2af998578e8a18b868df8af41d0aece06bf --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SQRT_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQRT, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/SQUARE_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/SQUARE_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..59f5383ecc121f378bf65621e9fbd2447597df21 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SQUARE_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/SQUARE_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/SQUARE_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c53551bb16fcd02dba621a2332b4fbe37a35eba7 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SQUARE_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/SQUARE_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/SQUARE_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4282b4793173f75f5e55a7bf8a47171d10dc120a --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SQUARE_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/SQUARE_dt_int16.cpp b/dnn/src/naive/elemwise/kimpl/SQUARE_dt_int16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5d7bec087512364120790473ebb7623635054a0e --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SQUARE_dt_int16.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/SQUARE_dt_int32.cpp b/dnn/src/naive/elemwise/kimpl/SQUARE_dt_int32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c1237c3775c46fce8f4e32eb6272b5cb7677c737 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SQUARE_dt_int32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/SQUARE_dt_int8.cpp b/dnn/src/naive/elemwise/kimpl/SQUARE_dt_int8.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8c01483e5b305d6a6f0804d753ced10d920f5aa5 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SQUARE_dt_int8.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/SQUARE_dt_uint8.cpp b/dnn/src/naive/elemwise/kimpl/SQUARE_dt_uint8.cpp new file mode 100644 index 0000000000000000000000000000000000000000..094c419155572eec5310118820a9df803b894b96 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/SQUARE_dt_uint8.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/naive/elemwise/kimpl/TAN_dt_bfloat16.cpp b/dnn/src/naive/elemwise/kimpl/TAN_dt_bfloat16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..415ed3fc49e9d6247e037e1083409e6cf79a1a8c --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/TAN_dt_bfloat16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(TAN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/TAN_dt_float16.cpp b/dnn/src/naive/elemwise/kimpl/TAN_dt_float16.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dd5339a35c4cc5c7ea6b5294713a85bdee765bf2 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/TAN_dt_float16.cpp @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(TAN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/naive/elemwise/kimpl/TAN_dt_float32.cpp b/dnn/src/naive/elemwise/kimpl/TAN_dt_float32.cpp new file mode 100644 index 0000000000000000000000000000000000000000..796c4e65abc8f34328a146fe79af06e04442cdd1 --- /dev/null +++ b/dnn/src/naive/elemwise/kimpl/TAN_dt_float32.cpp @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(TAN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/ACOSH_GRAD_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ACOSH_GRAD_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..a231d0a33fec72bff4cfbda682224c8a3f215b39 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ACOSH_GRAD_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/ACOSH_GRAD_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ACOSH_GRAD_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..ca103ee3b573124f63edf218416c4b7f2838af94 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ACOSH_GRAD_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/ACOSH_GRAD_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ACOSH_GRAD_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..11f768c9c52e1503c2bd5899c850ccc63857ab2b --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ACOSH_GRAD_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/ACOSH_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ACOSH_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..28c3f17367487d9a035c3d634b61380ff5f92fce --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ACOSH_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/ACOSH_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ACOSH_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..06d2c12e34e64b82460bcd9f227cff2cebb99653 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ACOSH_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/ACOSH_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ACOSH_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..9e96939608a8158628eea58c29fb1904abd25e99 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ACOSH_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ACOSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/ASINH_GRAD_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ASINH_GRAD_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..d0f61ca23ce4e96f1634c82e561c08e7fc75a376 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ASINH_GRAD_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/ASINH_GRAD_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ASINH_GRAD_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..0840a54c1f583c8f1460c8c8b448c37033beff07 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ASINH_GRAD_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/ASINH_GRAD_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ASINH_GRAD_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..c239ddb2d9035e1e903437fc19bef6877592ce3e --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ASINH_GRAD_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/ASINH_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ASINH_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..ea0f11ecf5242865af1286a77a89e173326f07dd --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ASINH_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/ASINH_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ASINH_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..cc5d73023c2f321aa6d1c35469783936d266538e --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ASINH_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/ASINH_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ASINH_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..9607630222a5105655a4b63e5a0a365a34d2b6c3 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ASINH_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ASINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/ATANH_GRAD_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ATANH_GRAD_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..3b0496f40b7c662e2177be83d64938eb242d57a2 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ATANH_GRAD_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/ATANH_GRAD_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ATANH_GRAD_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..1a0c841a628be653ac2d89d63063717ab5efdaa2 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ATANH_GRAD_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/ATANH_GRAD_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ATANH_GRAD_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..ce400c030129f96d30165f471910e3abe1418548 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ATANH_GRAD_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/ATANH_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ATANH_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..9ac3b63acf34f05af2fb0429c44ab516101a17e4 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ATANH_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/ATANH_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ATANH_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..56902e43605aac21546d8e020e461ede7d156a28 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ATANH_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/ATANH_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/ATANH_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..048cab3a76bc4e25f096d4d6ae1ec20cbd242cd8 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/ATANH_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(ATANH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/CLIP_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/CLIP_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..91c018ae1e4ad41e1d9f1e21376fdf8d82697f90 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/CLIP_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/CLIP_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/CLIP_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..1d06d8d1b7e24602b50298994f6657d5dee1128d --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/CLIP_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/CLIP_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/CLIP_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..346efcbd2b6713709af8eacdf8f012e24e8aa234 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/CLIP_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/CLIP_dt_int16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/CLIP_dt_int16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..4394848f2d91b795f6939ccf8c71f99ff54337c5 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/CLIP_dt_int16.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/CLIP_dt_int32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/CLIP_dt_int32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..ed51ebd8ea93ad5cd36a12f38141e7e4e967a8cd --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/CLIP_dt_int32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/CLIP_dt_int8.cpp.hip b/dnn/src/rocm/elemwise/kimpl/CLIP_dt_int8.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..dea82a24f65ec5da7bcdc817f05ec268df720ecf --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/CLIP_dt_int8.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/CLIP_dt_uint8.cpp.hip b/dnn/src/rocm/elemwise/kimpl/CLIP_dt_uint8.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..0d798b15d923ad378a9ed67a4e2f5c68fce38dc8 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/CLIP_dt_uint8.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(CLIP, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/COSH_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/COSH_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..25c504a30ce6958e2dec00b1503891e7b7b7703c --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/COSH_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(COSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/COSH_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/COSH_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..a5e92a13ffccdf5f22d970ea1cbafa0efbf1b484 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/COSH_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(COSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/COSH_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/COSH_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..2d4c2784a4d0a51abe2d3692724440d52a553a99 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/COSH_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(COSH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/HSIGMOID_GRAD_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/HSIGMOID_GRAD_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..54b03d90afc2bdede7486a0f8725cb6f3a1c91a2 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/HSIGMOID_GRAD_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/HSIGMOID_GRAD_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/HSIGMOID_GRAD_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..eaa54cca410540dabc4a7b6e5e0135a3ec2fdaae --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/HSIGMOID_GRAD_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/HSIGMOID_GRAD_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/HSIGMOID_GRAD_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..a000bb105eafbee679bca1bf9404f9b498c7aba2 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/HSIGMOID_GRAD_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/HSIGMOID_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/HSIGMOID_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..fdb642b662e6f834b7389ec84277ea7d602fcff4 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/HSIGMOID_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/HSIGMOID_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/HSIGMOID_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..94e88cd6427c5b3a7b4a30bb3df9bc55f87906dc --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/HSIGMOID_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/HSIGMOID_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/HSIGMOID_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..8e13dd5352d54d06badfdff5a441efdc821a40a1 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/HSIGMOID_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(HSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/LOGSIGMOID_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/LOGSIGMOID_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..fa85681cc7d363efc4dd575ea0eadf13c7598907 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/LOGSIGMOID_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(LOGSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/LOGSIGMOID_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/LOGSIGMOID_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..4992d81da43177d15475c3362e41b12aa1887b27 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/LOGSIGMOID_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(LOGSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/LOGSIGMOID_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/LOGSIGMOID_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..72e6e3a958a16b30705625c866f87695e0a0baac --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/LOGSIGMOID_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(LOGSIGMOID, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/PRELU_GRAD_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/PRELU_GRAD_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..d24ceb4ad6f2fd07b6f9bdbfb63185ff4ab9eb38 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/PRELU_GRAD_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU_GRAD, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/PRELU_GRAD_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/PRELU_GRAD_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..4665a27755bb279048dd6cd509b2189230ce9330 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/PRELU_GRAD_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU_GRAD, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/PRELU_GRAD_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/PRELU_GRAD_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..023f6fe1af2e3a52c4027332be09037f5032bca8 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/PRELU_GRAD_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU_GRAD, cb) +#define KERN_IMPL_ARITY 3 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/PRELU_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/PRELU_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..2ae7e683c775c2aacab44de03588ced34a075bb6 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/PRELU_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/PRELU_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/PRELU_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..1e1253caa4d24906ed02537625f7c742406ec9f0 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/PRELU_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/PRELU_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/PRELU_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..9d3b9676e38029ae699ec55303140f2a5f117f28 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/PRELU_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/PRELU_dt_int16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/PRELU_dt_int16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..03846baf8a3d2da8abe530d4c3fe225e23c6139c --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/PRELU_dt_int16.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/PRELU_dt_int32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/PRELU_dt_int32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..41f41670806ad556eab8fb4a2f984d747bdfb692 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/PRELU_dt_int32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/PRELU_dt_int8.cpp.hip b/dnn/src/rocm/elemwise/kimpl/PRELU_dt_int8.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..88c2bbb698ec02bd8cd4fa9ed82d2e703c5b208e --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/PRELU_dt_int8.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/PRELU_dt_uint8.cpp.hip b/dnn/src/rocm/elemwise/kimpl/PRELU_dt_uint8.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..a9febeadddd443eb523209408545a97be637af0b --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/PRELU_dt_uint8.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(PRELU, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/RELU6_GRAD_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/RELU6_GRAD_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..f2099c6bb68cff5c4633b5649ff075cacf5d1cda --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/RELU6_GRAD_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/RELU6_GRAD_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/RELU6_GRAD_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..b46f1c52df62d2a07fef279206084935d23a25c5 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/RELU6_GRAD_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/RELU6_GRAD_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/RELU6_GRAD_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..37b7d1baa0e613dc3d01b02fdeeb23e3c9bc99db --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/RELU6_GRAD_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/RELU6_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/RELU6_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..bb39a1a8f83758894115bf5f4d2110dc4fc6b9f8 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/RELU6_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/RELU6_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/RELU6_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..f84eac713c0aa82c3d341ac6c65ef7c0ff8e0656 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/RELU6_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/RELU6_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/RELU6_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..fd3fe60f2f66b89082e0594e986a8b0d4a430859 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/RELU6_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/RELU6_dt_int16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/RELU6_dt_int16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..66a41225b1810515d4b0bc5fa47787a0adf5d670 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/RELU6_dt_int16.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/RELU6_dt_int32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/RELU6_dt_int32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..1a5eed8257d459878fc8a49096470df6d95ddd60 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/RELU6_dt_int32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/RELU6_dt_int8.cpp.hip b/dnn/src/rocm/elemwise/kimpl/RELU6_dt_int8.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..f2ecc40acf7105edc7d341f6a781db28baa643ab --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/RELU6_dt_int8.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/RELU6_dt_uint8.cpp.hip b/dnn/src/rocm/elemwise/kimpl/RELU6_dt_uint8.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..b13aad0d21d6a3f111954c44dcd1b56bdf014fac --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/RELU6_dt_uint8.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(RELU6, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/SIGN_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SIGN_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..ee5373bfd14eb062305b570a0574f586f979541c --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SIGN_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/SIGN_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SIGN_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..69277cd56003e5dc284bfb06fa589e61d96efef9 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SIGN_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/SIGN_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SIGN_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..718709a16157ae4fdff1f0cf1ce1510d0ee7bb10 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SIGN_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/SIGN_dt_int16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SIGN_dt_int16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..d1cdfbd5e604089f20afedddda535bb414b00dbe --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SIGN_dt_int16.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/SIGN_dt_int32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SIGN_dt_int32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..2955aef0b0c79f4fbdd5c1996cdc84dabda2a73d --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SIGN_dt_int32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/SIGN_dt_int8.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SIGN_dt_int8.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..e6cc08494c5354658654d3403d1ecd02dd5d6e89 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SIGN_dt_int8.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/SIGN_dt_uint8.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SIGN_dt_uint8.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..b39727a4f782cdd847a35533a11b35e15d987821 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SIGN_dt_uint8.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SIGN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/SINH_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SINH_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..7b0d47402fae6599505e936a8b9fbebfcb4c7de8 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SINH_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/SINH_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SINH_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..070496fb23061a428a3b897b68998804cfddd9c4 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SINH_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/SINH_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SINH_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..d295a78f4b8551e050acfd5c60cfa09447f78607 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SINH_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SINH, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_GRAD_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_GRAD_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..2de73538a4b8cb79e21a508f5d2df571f646305a --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_GRAD_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_GRAD_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_GRAD_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..da7dabfd444afd92c39034900b3afe521ef29e9f --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_GRAD_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_GRAD_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_GRAD_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..f123cd7e99eac6351fca70fafb068cc7fc881974 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_GRAD_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS_GRAD, cb) +#define KERN_IMPL_ARITY 2 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..f86b264d9c03efb8e6d497aa43edbbf94bbfc866 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..991ba23079dd46974f7f87df14fb9b0bc7dee4cd --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..55716f22b90b8fd22cc3834aeb28b3ed46a14130 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SOFTPLUS_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SOFTPLUS, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/SQRT_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SQRT_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..9c4b92115a0969f676f7c50bdd9990f25eb1bc71 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SQRT_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQRT, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/SQRT_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SQRT_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..ad05f946340f8f78d57642ba620b64730059d46e --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SQRT_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQRT, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/SQRT_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SQRT_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..448f9a0caa72c1540db2c7b594963341fbba4340 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SQRT_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQRT, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..aa6d5912f8837696302bcd27ea97185e54d34059 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..1db3690dbe89f8fea70c13248bf87aa06ea6fdf8 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..da4072aee13f210ba74f53b577fec03eb9062bcf --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_int16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_int16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..487b9824921f6425c7aaa6c41ecd53963eaaa913 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_int16.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int16 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_int32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_int32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..ae617e29fefe53c5a5a2f0ba0c2802b9cc8743a9 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_int32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int32 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_int8.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_int8.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..9ebe2f858068f43828bc0b1b58391b03f5b6b8a3 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_int8.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_int8 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_uint8.cpp.hip b/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_uint8.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..b01411f101f44c8225ab1fbb80abec10455daed1 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/SQUARE_dt_uint8.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(SQUARE, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_uint8 +#include "../kern_impl.inl" diff --git a/dnn/src/rocm/elemwise/kimpl/TAN_dt_bfloat16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/TAN_dt_bfloat16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..9a274f50ad898d5bc8fd0048c10682acfeaa83ec --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/TAN_dt_bfloat16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(TAN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_bfloat16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/TAN_dt_float16.cpp.hip b/dnn/src/rocm/elemwise/kimpl/TAN_dt_float16.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..522e02f835a7b11547191ddf7d719603b73a1174 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/TAN_dt_float16.cpp.hip @@ -0,0 +1,7 @@ +// generated by gen_elemwise_kern_impls.py +#if !MEGDNN_DISABLE_FLOAT16 +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(TAN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float16 +#include "../kern_impl.inl" +#endif diff --git a/dnn/src/rocm/elemwise/kimpl/TAN_dt_float32.cpp.hip b/dnn/src/rocm/elemwise/kimpl/TAN_dt_float32.cpp.hip new file mode 100644 index 0000000000000000000000000000000000000000..42a0c3b1971843605554f9c712fb4f91d4251337 --- /dev/null +++ b/dnn/src/rocm/elemwise/kimpl/TAN_dt_float32.cpp.hip @@ -0,0 +1,5 @@ +// generated by gen_elemwise_kern_impls.py +#define KERN_IMPL_MODE(cb) MEGDNN_ELEMWISE_MODE_ENABLE(TAN, cb) +#define KERN_IMPL_ARITY 1 +#define KERN_IMPL_CTYPE dt_float32 +#include "../kern_impl.inl" diff --git a/dnn/test/common/elemwise.cpp b/dnn/test/common/elemwise.cpp index 31f9bcf02f63af3fd8279dbfdae143c409dd91cf..2ef46962e6b757987c6489c6dd1693a9a9c416ab 100644 --- a/dnn/test/common/elemwise.cpp +++ b/dnn/test/common/elemwise.cpp @@ -744,8 +744,8 @@ DEF_TEST(all_modes) { TensorShapeArray shapes; UniformFloatRNG default_rng_f32{-100.f, 100.f}, pos_rng_f32{.1f, 1000.f}, small_pos_rng_f32{.1f, .10f}, small_rng_f32{-3.f, 3.f}, - abslt1_rng_f32{-1.f, 1.f}, uniform_0_2_rng{0.f, 2.f}, - tanh_rng_f32{-5.f, 5.f}; + abslt1_rng_f32{-0.95f, 0.95f}, uniform_0_2_rng{0.f, 2.f}, + tanh_rng_f32{-5.f, 5.f}, lt1_rng_f32{1.f, 10.f}; UniformFloatNonZeroRNG nonzero_rng_f32{.1f, 1000.f}, big_nonzero_rng_f32{100.f, 1000.f}; UniformIntRNG default_rng_i32{-100, 100}, small_rng_i32{-2, 2}, @@ -786,12 +786,14 @@ DEF_TEST(all_modes) { shapes[shapes.size() - 1] = {}; auto do_run = [&](DType dtype, float eps = 1e-3) { // limit value ranges for some modes - if (mode == Mode::LOG || mode == Mode::LOG1P) { + if (mode == Mode::LOG || mode == Mode::LOG1P || mode == Mode::SQRT) { checker.set_rng(0, &pos_rng_f32); - } else if (mode == Mode::POW) { + } else if (mode == Mode::POW || mode == Mode::SOFTPLUS_GRAD) { checker.set_rng(0, &small_pos_rng_f32); checker.set_rng(1, &small_rng_f32); - } else if (mode == Mode::EXP || mode == Mode::EXPM1) { + } else if ( + mode == Mode::EXP || mode == Mode::EXPM1 || mode == Mode::SINH || + mode == Mode::COSH) { checker.set_rng(0, &small_rng_f32); } else if (mode == Mode::FAST_TANH) { checker.set_rng(0, &tanh_rng_f32); @@ -807,6 +809,10 @@ DEF_TEST(all_modes) { checker.set_rng(1, &default_rng_f32); } else if (mode == Mode::ERFCINV) { checker.set_rng(0, &uniform_0_2_rng); + } else if (mode == Mode::ACOSH_GRAD || mode == Mode::ACOSH) { + checker.set_rng(0, <1_rng_f32); + } else if (mode == Mode::ATANH_GRAD || mode == Mode::ATANH) { + checker.set_rng(0, &abslt1_rng_f32); } else if ( mode == Mode::MOD || mode == Mode::TRUE_DIV || mode == Mode::FLOOR_DIV) { diff --git a/imperative/python/megengine/functional/elemwise.py b/imperative/python/megengine/functional/elemwise.py index e92cb2d9861893f0e236806da1f8e1ed8c838ad7..b4568c6e744fdafebe0a39ed7bf94ef76df56694 100644 --- a/imperative/python/megengine/functional/elemwise.py +++ b/imperative/python/megengine/functional/elemwise.py @@ -467,12 +467,12 @@ def log1p(x): def sqrt(x: Tensor) -> Tensor: r"""Element-wise `sqrt`.""" - return x ** 0.5 + return _elwise(x, mode=Elemwise.Mode.SQRT) def square(x: Tensor) -> Tensor: r"""Element-wise `square`.""" - return x ** 2 + return _elwise(x, mode=Elemwise.Mode.SQUARE) def round(x): @@ -515,7 +515,7 @@ def sin(x): def tan(x): r"""Element-wise `tangent`.""" - return sin(x) / cos(x) + return _elwise(x, mode=Elemwise.Mode.TAN) def acos(x): @@ -544,13 +544,12 @@ def atan2(y, x): def cosh(x): r"""Element-wise `hyperbolic cosine`.""" - return 0.5 * (exp(x) + exp(-x)) + return _elwise(x, mode=Elemwise.Mode.COSH) def sinh(x): r"""Element-wise `hyperbolic sine`.""" - u = expm1(x) - return 0.5 * u / (u + 1) * (u + 2) + return _elwise(x, mode=Elemwise.Mode.SINH) def tanh(x): @@ -560,17 +559,17 @@ def tanh(x): def asinh(x): r"""Element-wise `inverse hyperbolic sine`.""" - return log(x + (x ** 2 + 1) ** 0.5) + return _elwise(x, mode=Elemwise.Mode.ASINH) def acosh(x): r"""Element-wise `inverse hyperbolic cosine`.""" - return log(x + (x ** 2 - 1) ** 0.5) + return _elwise(x, mode=Elemwise.Mode.ACOSH) def atanh(x): r"""Element-wise `inverse hyperbolic tangent`.""" - return log1p(2 * x / (1 - x)) / 2 + return _elwise(x, mode=Elemwise.Mode.ATANH) # bit-twiddling functions @@ -680,7 +679,7 @@ def clip(x: Tensor, lower=None, upper=None) -> Tensor: ), "At least one of 'lower' or 'upper' must not be None" if lower is not None: if upper is not None: - return minimum(maximum(x, lower), upper) + return _elwise(x, lower, upper, mode=Elemwise.Mode.CLIP) else: return maximum(x, lower) else: diff --git a/imperative/python/megengine/functional/math.py b/imperative/python/megengine/functional/math.py index 978221ac48c180736cfcf87ea5c362e1aea7b5d2..42fa014e2213ef157f4267ccc05881ab1627974c 100644 --- a/imperative/python/megengine/functional/math.py +++ b/imperative/python/megengine/functional/math.py @@ -6,7 +6,7 @@ from typing import Iterable, Optional, Sequence, Tuple, Union from ..core._imperative_rt.core2 import Const, apply from ..core._imperative_rt.ops import SubgraphBuilder as _SubgraphBuilder from ..core.ops import builtin -from ..core.tensor.array_method import _matmul +from ..core.tensor.array_method import _elwise, _matmul from ..core.tensor.utils import _normalize_axis from ..tensor import Tensor from ..utils.deprecation import deprecated_kwargs_default @@ -86,7 +86,7 @@ def sign(inp: Tensor): >>> F.sign(x) Tensor([ 1 -1 0], dtype=int32, device=xpux:0) """ - return (inp > 0).astype(inp.dtype) - (inp < 0).astype(inp.dtype) + return _elwise(inp, mode=builtin.Elemwise.Mode.SIGN) def sum( diff --git a/imperative/python/megengine/functional/nn.py b/imperative/python/megengine/functional/nn.py index 58a60b4d543c68243419d8e964a24c9c909373b2..8a42e9065593ba557c18862d8a5a468daf30d875 100644 --- a/imperative/python/megengine/functional/nn.py +++ b/imperative/python/megengine/functional/nn.py @@ -753,37 +753,9 @@ def sigmoid(x): return _elwise(x, mode=Elemwise.Mode.SIGMOID) -@lru_cache(maxsize=None) -def _get_hsigmoid_op(dtype=None, device=None): - @subgraph_fn( - "Hsigmoid", - dtype=dtype, - device=device, - nr_inputs=1, - jit_fusion=True, - custom_grad=True, - ) - def hsigmoid(inputs, f, c): - (inp,) = inputs[0:1] - inp = f("+", inp, c(3)) - max_0 = f("max", inp, c(0)) - min_6 = f("min", max_0, c(6)) - oup = f("/", min_6, c(6)) - (oup_grad,) = yield (oup,) - inp_grad = f("/", oup_grad, c(6)) - inp_grad = f("cond_leq_mov", max_0, c(6), inp_grad) - inp_grad = f("cond_leq_mov", c(0), inp, inp_grad) - yield (inp_grad,) - - return hsigmoid - - def hsigmoid(x): r"""Element-wise `relu6(x + 3) / 6`.""" - hsigmoid = _get_hsigmoid_op(x.dtype, x.device) - (x,) = hsigmoid(x) - return x - # return relu6(x + 3) / 6 + return _elwise(x, mode=Elemwise.Mode.HSIGMOID) def relu(x): @@ -791,95 +763,14 @@ def relu(x): return _elwise(x, mode=Elemwise.Mode.RELU) -@lru_cache(maxsize=None) -def _get_relu6_op(dtype=None, device=None): - @subgraph_fn( - "ReLU6", - dtype=dtype, - device=device, - nr_inputs=1, - jit_fusion=True, - custom_grad=True, - ) - def relu6(inputs, f, c): - (inp,) = inputs[0:1] - max_0 = f("max", inp, c(0)) - min_6 = f("min", max_0, c(6)) - oup = min_6 - (oup_grad,) = yield (oup,) - inp_grad = f("cond_leq_mov", max_0, c(6), oup_grad) - inp_grad = f("cond_leq_mov", c(0), inp, inp_grad) - yield (inp_grad,) - - return relu6 - - def relu6(x): r"""Element-wise `min(max(x, 0), 6)`.""" - relu6 = _get_relu6_op(x.dtype, x.device) - (x,) = relu6(x) - return x - - -@lru_cache(maxsize=None) -def _get_prelu_op(dtype=None, device=None): - @subgraph_fn( - "PReLU", - dtype=dtype, - device=device, - nr_inputs=2, - jit_fusion=True, - custom_grad=True, - ) - def prelu(inputs, f, c): - (inp, weight) = inputs[0:2] - max_0 = f("max", inp, c(0)) - min_0 = f("min", inp, c(0)) - oup = f("fma3", min_0, weight, max_0) - (oup_grad,) = yield (oup,) - inp_grad_0 = f("cond_leq_mov", c(0), inp, oup_grad) - inp_grad_1 = f("*", oup_grad, weight) - inp_grad_1 = f("cond_leq_mov", inp, c(0), inp_grad_1) - inp_grad = f("+", inp_grad_0, inp_grad_1) - weight_grad = f("*", oup_grad, min_0) - yield (inp_grad, weight_grad) - - return prelu - - -def prelu(inp: Tensor, weight: Tensor) -> Tensor: - r"""Element-wise PReLU function. - - Refer to :class:`~.PReLU` for more information. - """ - prelu = _get_prelu_op(dtype=inp.dtype, device=inp.device) - (oup,) = prelu(inp, broadcast_to(weight, inp.shape)) - return oup + return _elwise(x, mode=Elemwise.Mode.RELU6) -@lru_cache(maxsize=None) -def _get_leaky_relu_op(negative_slope, *, dtype=None, device=None): - @subgraph_fn( - "LeakyReLU", - dtype=dtype, - device=device, - nr_inputs=1, - jit_fusion=True, - custom_grad=True, - ) - def leakyReLU(inputs, f, c): - (inp,) = inputs[0:1] - max_0 = f("max", inp, c(0)) - min_0 = f("min", inp, c(0)) - oup = f("+", max_0, f("*", min_0, c(negative_slope))) - (oup_grad,) = yield (oup,) - inp_grad_0 = f("cond_leq_mov", c(0), inp, oup_grad) - inp_grad_1 = f("*", oup_grad, c(negative_slope)) - inp_grad_1 = f("cond_leq_mov", inp, c(0), inp_grad_1) - inp_grad = f("+", inp_grad_0, inp_grad_1) - yield (inp_grad,) - - return leakyReLU +def prelu(x, y): + r"""Element-wise `max(x, 0) + y * min(x, 0)`.""" + return _elwise(x, y, mode=Elemwise.Mode.PRELU) def leaky_relu(inp: Tensor, negative_slope: float = 0.01) -> Tensor: @@ -887,9 +778,7 @@ def leaky_relu(inp: Tensor, negative_slope: float = 0.01) -> Tensor: Refer to :class:`~.LeakyReLU` for more information. """ - leakyReLU = _get_leaky_relu_op(negative_slope, dtype=inp.dtype, device=inp.device) - (oup,) = leakyReLU(inp) - return oup + return _elwise(inp, negative_slope, mode=Elemwise.Mode.PRELU) def silu(x): @@ -908,36 +797,6 @@ def gelu(x): return _elwise(x, mode=Elemwise.Mode.GELU) -@lru_cache(maxsize=None) -def _get_softplus_op(dtype=None, device=None): - @subgraph_fn( - "Softplus", - dtype=dtype, - device=device, - nr_inputs=1, - jit_fusion=True, - custom_grad=True, - ) - def softplus(inputs, f, c): - (inp,) = inputs[0:1] - neg_abs = f("-", f("abs", inp)) - exp = f("exp", neg_abs) - oup0 = f("log1p", exp) - oup1 = f("relu", inp) - oup = f("+", oup0, oup1) - (oup_grad,) = yield (oup,) - inp_grad_0 = f("switch_gt0", oup1, oup_grad) - inp_grad_1 = oup_grad - inp_grad_1 = f("/", oup_grad, f("+", exp, c(1))) - inp_grad_1 = f("*", inp_grad_1, exp) - inp_grad_1 = f("-", inp_grad_1) - inp_grad_1 = f("abs_grad", inp, inp_grad_1) - inp_grad = f("+", inp_grad_0, inp_grad_1) - yield (inp_grad,) - - return softplus - - def softplus(inp: Tensor) -> Tensor: r"""Applies the element-wise function: @@ -960,9 +819,7 @@ def softplus(inp: Tensor) -> Tensor: >>> y.numpy().round(decimals=4) array([0.0486, 0.1269, 0.3133, 0.6931, 1.3133, 2.1269], dtype=float32) """ - softplus = _get_softplus_op(inp.dtype, inp.device) - (oup,) = softplus(inp) - return oup + return _elwise(inp, mode=Elemwise.Mode.SOFTPLUS) def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: @@ -991,39 +848,6 @@ def logsoftmax(inp: Tensor, axis: Union[int, Sequence[int]]) -> Tensor: return inp - logsumexp(inp, axis, keepdims=True) -@lru_cache(maxsize=None) -def _get_logsigmoid_op(dtype=None, device=None): - @subgraph_fn( - "LogSigmoid", - dtype=dtype, - device=device, - nr_inputs=1, - jit_fusion=True, - custom_grad=True, - ) - def logsigmoid(inputs, f, c): - (inp,) = inputs[0:1] - neg_abs = f("-", f("abs", inp)) - exp = f("exp", neg_abs) - oup0 = f("log1p", exp) - oup1 = f("relu", f("-", inp)) - oup = f("+", oup0, oup1) - oup = f("-", oup) - (oup_grad,) = yield (oup,) - oup_grad = f("-", oup_grad) - inp_grad_0 = f("switch_gt0", oup1, oup_grad) - inp_grad_0 = f("-", inp_grad_0) - inp_grad_1 = oup_grad - inp_grad_1 = f("/", inp_grad_1, f("+", exp, c(1))) - inp_grad_1 = f("*", inp_grad_1, exp) - inp_grad_1 = f("-", inp_grad_1) - inp_grad_1 = f("abs_grad", inp, inp_grad_1) - inp_grad = f("+", inp_grad_0, inp_grad_1) - yield (inp_grad,) - - return logsigmoid - - def logsigmoid(inp: Tensor) -> Tensor: r"""Applies the element-wise function: @@ -1041,9 +865,7 @@ def logsigmoid(inp: Tensor) -> Tensor: array([-5.0067, -4.0182, -3.0486, -2.1269, -1.3133, -0.6931, -0.3133, -0.1269, -0.0486, -0.0181], dtype=float32) """ - logsigmoid = _get_logsigmoid_op(inp.dtype, inp.device) - (oup,) = logsigmoid(inp) - return oup + return _elwise(inp, mode=Elemwise.Mode.LOGSIGMOID) def logsumexp( diff --git a/imperative/src/impl/transformations/dtype_promote.cpp b/imperative/src/impl/transformations/dtype_promote.cpp index 58de880a7364ff5785c6c6b87f3e026d2fe9f6d0..5e7743d77c097dc42ef41cd65bcb1a7258f4e0cd 100644 --- a/imperative/src/impl/transformations/dtype_promote.cpp +++ b/imperative/src/impl/transformations/dtype_promote.cpp @@ -116,12 +116,17 @@ ValueRefList elemwise_rule(const OpDef& op, Span inputs) { } static std::unordered_set cast_case1 = { - Elemwise::Mode::TRUE_DIV, Elemwise::Mode::EXP, - Elemwise::Mode::POW, Elemwise::Mode::LOG, - Elemwise::Mode::EXPM1, Elemwise::Mode::LOG1P, - Elemwise::Mode::ACOS, Elemwise::Mode::ASIN, - Elemwise::Mode::ATAN2, Elemwise::Mode::COS, - Elemwise::Mode::SIN, Elemwise::Mode::LOG_SUM_EXP, + Elemwise::Mode::TRUE_DIV, Elemwise::Mode::EXP, + Elemwise::Mode::POW, Elemwise::Mode::LOG, + Elemwise::Mode::EXPM1, Elemwise::Mode::LOG1P, + Elemwise::Mode::ACOS, Elemwise::Mode::ASIN, + Elemwise::Mode::ATAN2, Elemwise::Mode::COS, + Elemwise::Mode::SIN, Elemwise::Mode::LOG_SUM_EXP, + Elemwise::Mode::TAN, Elemwise::Mode::ASINH, + Elemwise::Mode::ACOSH, Elemwise::Mode::ATANH, + Elemwise::Mode::SINH, Elemwise::Mode::COSH, + Elemwise::Mode::SOFTPLUS, Elemwise::Mode::HSIGMOID, + Elemwise::Mode::LOGSIGMOID, Elemwise::Mode::SQRT, }; static std::unordered_set cast_case2 = { diff --git a/src/jit/impl/ast_c.cpp b/src/jit/impl/ast_c.cpp index edce00074fd3a6a61e0b44709cb285d2cafb0286..eb1dc794bacb97e6a70820e8d96e951ef963f89f 100644 --- a/src/jit/impl/ast_c.cpp +++ b/src/jit/impl/ast_c.cpp @@ -133,7 +133,7 @@ const ElemGeneratorMap& ast_c::elem_opr_generator() { 0.f}) / 6.f), }; - mgb_assert(map.size() + 19 == opr::Elemwise::Param::MODE_NR_MEMBER); + mgb_assert(map.size() + 41 == opr::Elemwise::Param::MODE_NR_MEMBER); // unimplemented modes: SHL, SHR, FAST_TANH, FAST_TANH_GRAD, ROUND, RMULH, // ERFINV, ERFCINV, NOT, AND, OR, XOR, NEQ, ISNAN, ISINF return map; diff --git a/src/opr/impl/basic_arith.cpp b/src/opr/impl/basic_arith.cpp index 8111b4e0585a049ce5e3f46822709e4f052c4e67..9924f74718f6aaceeb9afafda081acbb3b4131c1 100644 --- a/src/opr/impl/basic_arith.cpp +++ b/src/opr/impl/basic_arith.cpp @@ -543,6 +543,34 @@ MGB_IMPL_OPR_GRAD(Elemwise) { RET(EL2(SILU_GRAD, i0, og)); case Mode::GELU: RET(EL2(GELU_GRAD, i0, og)); + case Mode::SINH: + RET(EL1(COSH, i0) * og); + case Mode::COSH: + RET(EL1(SINH, i0) * og); + case Mode::ASINH: + RET(EL2(ASINH_GRAD, i0, og)); + case Mode::ACOSH: + RET(EL2(ACOSH_GRAD, i0, og)); + case Mode::ATANH: + RET(EL2(ATANH_GRAD, i0, og)); + case Mode::TAN: { + auto two = i0.make_scalar_dt(2); + RET(og / (EL2(POW, EL1(COS, i0), two))); + } + case Mode::RELU6: + RET(EL2(RELU6_GRAD, i0, og)); + case Mode::SOFTPLUS: + RET(EL2(SOFTPLUS_GRAD, i0, og)); + case Mode::HSIGMOID: + RET(EL2(HSIGMOID_GRAD, i0, og)); + case Mode::LOGSIGMOID: + RET(EL2(SOFTPLUS_GRAD, EL1(NEGATE, i0), og)); + case Mode::SQRT: + RET(og / EL1(SQRT, i0) / 2); + case Mode::SQUARE: + RET(og * 2 * i0); + case Mode::SIGN: + RET(i0.make_scalar_dt(0).broadcast(i0.symshape())); // binary case Mode::ABS_GRAD: @@ -617,6 +645,11 @@ MGB_IMPL_OPR_GRAD(Elemwise) { case Mode::XOR: case Mode::AND: return nullptr; + case Mode::PRELU: + if (wrt_idx == 0) { + RET(EL3(PRELU_GRAD, i0, og, i1)); + } + RET(EL2(SWITCH_GT0, -i0, og * i0)); // ternary case Mode::COND_LEQ_MOV: @@ -627,6 +660,15 @@ MGB_IMPL_OPR_GRAD(Elemwise) { if (wrt_idx <= 1) return nullptr; RET(EL3(COND_LT_MOV, i0, i1, og)); + case Mode::CLIP: + if (wrt_idx == 0) { + RET(EL3(COND_LEQ_MOV, i1, i0, EL3(COND_LEQ_MOV, i0, i2, og))); + } + if (wrt_idx == 1) { + RET(EL3(COND_LEQ_MOV, i0, i1, og)); + } + RET(EL3(COND_LEQ_MOV, i2, i0, og)); + // fuse oprs case Mode::FUSE_MUL_ADD3: if (wrt_idx < 2) { diff --git a/src/opr/test/basic_arith/elemwise.cpp b/src/opr/test/basic_arith/elemwise.cpp index b4bab90d59305c16e0d5aaff1e80ed335b6692ed..89ae9ca3b60e59877cd8c04d497745c5c7257093 100644 --- a/src/opr/test/basic_arith/elemwise.cpp +++ b/src/opr/test/basic_arith/elemwise.cpp @@ -349,6 +349,99 @@ struct CheckerConfig : public CheckerConfig {}; template <> struct CheckerConfig : public NoGradCheckerConfig {}; +template <> +struct CheckerConfig : public NoGradCheckerConfig { + template + static InputGenerator get_inp_gen(size_t) { + return get_inp_gen_f32_range(-1.2, 1.2); + } +}; +template <> +struct CheckerConfig : public CheckerConfig { + template + static InputGenerator get_inp_gen(size_t) { + return get_inp_gen_f32_range(-5, 5); + } + template + static void update_opt(Opt& opt) { + opt.numdiff_eps = 1e-2; + opt.numdiff_max_err = 0.1; + } +}; +template <> +struct CheckerConfig : public CheckerConfig {}; +template <> +struct CheckerConfig : public CheckerConfig { + template + static void update_opt(Opt& opt) { + opt.numdiff_eps = 1e-2; + opt.numdiff_max_err = 0.1; + } +}; +template <> +struct CheckerConfig : public CheckerConfig { + template + static InputGenerator get_inp_gen(size_t) { + return get_inp_gen_f32_range(1.05, 5); + } +}; +template <> +struct CheckerConfig : public CheckerConfig { + template + static InputGenerator get_inp_gen(size_t) { + return get_inp_gen_f32_range(-0.95, 0.95); + } +}; +template <> +struct CheckerConfig : public CheckerConfig {}; +template <> +struct CheckerConfig : public CheckerConfig {}; +template <> +struct CheckerConfig : public CheckerConfig {}; +template <> +struct CheckerConfig : public CheckerConfig { + template + static InputGenerator get_inp_gen(size_t) { + return get_inp_gen_f32_range(0.05, 5); + } + template + static void update_opt(Opt& opt) { + opt.numdiff_eps = 1e-2; + opt.numdiff_max_err = 0.1; + } +}; +template <> +struct CheckerConfig : public CheckerConfig { + template + static void do_update_checker(Checker& checker) { + auto icoord = [](const typename Checker::NumInpArray& inp) { + auto p0 = inp[0]->template ptr(); + for (size_t i = 0, it = inp[0]->shape().total_nr_elems(); i < it; ++i) { + if (std::abs(p0[i]) < 1) { + p0[i] += 2; + } else if (std::abs(p0[i] - 6) < 1) { + p0[i] += 2; + } + } + }; + checker.set_input_coordinator(icoord); + } + template + static void update_checker(Checker& checker) { + using ctype = typename Checker::ctype; + return do_update_checker(checker); + } +}; +template <> +struct CheckerConfig : public CheckerConfig { + template + static InputGenerator get_inp_gen(size_t) { + return get_inp_gen_f32_range(-2.95, 2.95); + } +}; +template <> +struct CheckerConfig : public NoZeroCheckerConfig<0> {}; + /* ======================= binary config ======================= */ template struct BinaryInputMinGap : public CheckerConfig { @@ -567,13 +660,85 @@ template <> struct CheckerConfig : public NoGradCheckerConfig {}; template <> struct CheckerConfig : public NoGradCheckerConfig {}; +template <> +struct CheckerConfig : public NoZeroCheckerConfig<0> {}; +template <> +struct CheckerConfig : public NoGradCheckerConfig {}; +template <> +struct CheckerConfig : public NoGradCheckerConfig { + template + static InputGenerator get_inp_gen(size_t) { + return get_inp_gen_f32_range(1.05, 5); + } +}; +template <> +struct CheckerConfig : public NoGradCheckerConfig { + template + static InputGenerator get_inp_gen(size_t) { + return get_inp_gen_f32_range(-0.95, 0.95); + } +}; +template <> +struct CheckerConfig : public NoGradCheckerConfig {}; +template <> +struct CheckerConfig : public NoGradCheckerConfig {}; +template <> +struct CheckerConfig : public NoGradCheckerConfig { + template + static InputGenerator get_inp_gen(size_t) { + return get_inp_gen_f32_range(-2.95, 2.95); + } +}; /* ======================= ternary config ======================= */ template <> struct CheckerConfig : public BinaryInputMinGap {}; template <> struct CheckerConfig : public BinaryInputMinGap {}; +struct CheckerConfig : public NoGradCheckerConfig {}; +template <> +struct CheckerConfig : public CheckerConfig { + template + static void do_update_checker(Checker& checker) { + auto icoord = [](const typename Checker::NumInpArray& inp) { + auto p0 = inp[0]->template ptr(), p1 = inp[1]->template ptr(), + p2 = inp[2]->template ptr(); + for (size_t i = 0, it = inp[0]->shape().total_nr_elems(); i < it; ++i) { + if (p1[i] > p2[i]) { + std::swap(p1[i], p2[i]); + } + if (p1[i] + 1 > p2[i]) { + p2[i] = p1[i] + 1; + } + if (std::abs(p1[i] - p0[i]) < 1) { + if (p1[i] < p0[i]) + p0[i] += 1; + else + p0[i] -= 1; + } + if (std::abs(p2[i] - p0[i]) < 1) { + if (p2[i] < p0[i]) + p0[i] += 1; + else + p0[i] -= 1; + } + } + }; + checker.set_input_coordinator(icoord); + } + + template + static void update_checker(Checker& checker) { + using ctype = typename Checker::ctype; + return do_update_checker(checker); + } + template + static void update_opt(Opt& opt) { + opt.numdiff_eps = 1e-3; + opt.numdiff_max_err = 0.1; + } +}; /* ======================= test runner ======================= */ namespace detail { template diff --git a/src/opr/test/basic_arith/elemwise_binary_trait_def.inl b/src/opr/test/basic_arith/elemwise_binary_trait_def.inl index 0663986a88b6f8fe8ab369715faf34fcf137879c..1ed742db53dfdb7c5afa28a70c91de33f07a8927 100644 --- a/src/opr/test/basic_arith/elemwise_binary_trait_def.inl +++ b/src/opr/test/basic_arith/elemwise_binary_trait_def.inl @@ -41,6 +41,7 @@ DEF_TRAIT(SWITCH_GT0, x > 0 ? y : 0) DEF_TRAIT(TANH_GRAD, (1 - x * x) * y) DEF_TRAIT(FUSE_ADD_RELU, std::max(x + y, 0)) +DEF_TRAIT(PRELU, (x > 0) ? x : (x* y)) #undef _ALLOW_INT #define _ALLOW_INT false @@ -57,6 +58,12 @@ DEF_TRAIT( SILU_GRAD, y*(1 + std::exp(-x) + x * std::exp(-x)) / (1 + std::exp(-x)) / (1 + std::exp(-x))) DEF_TRAIT(GELU_GRAD, do_gelu_grad(x, y)) +DEF_TRAIT(ASINH_GRAD, y / std::sqrt(x * x + 1)) +DEF_TRAIT(ACOSH_GRAD, y / std::sqrt(x * x - 1)) +DEF_TRAIT(ATANH_GRAD, y / (1 - x * x)) +DEF_TRAIT(SOFTPLUS_GRAD, y* std::exp(x) / (1.f + std::exp(x))) +DEF_TRAIT(RELU6_GRAD, x <= 0.f ? 0.f : (x >= 6.f ? 0.f : y)) +DEF_TRAIT(HSIGMOID_GRAD, x <= -3.f ? 0.f : (x >= 3.f ? 0.f : (y / 6.f))) #undef _ALLOW_INT #undef _ALLOW_FLOAT diff --git a/src/opr/test/basic_arith/elemwise_ternary_trait_def.inl b/src/opr/test/basic_arith/elemwise_ternary_trait_def.inl index e9e5cd8ecb2605ba8869f55326364a7c12f56e7a..a9bf8af143e43911567cad2d68735f6d49cc87bc 100644 --- a/src/opr/test/basic_arith/elemwise_ternary_trait_def.inl +++ b/src/opr/test/basic_arith/elemwise_ternary_trait_def.inl @@ -15,6 +15,10 @@ DEF_TRAIT(COND_LEQ_MOV, x <= y ? z : 0) DEF_TRAIT(COND_LT_MOV, x < y ? z : 0) DEF_TRAIT(FUSE_MUL_ADD3, x* y + z) +DEF_TRAIT(CLIP, x < y ? y : (x < z ? x : z)) +#undef _ALLOW_INT +#define _ALLOW_INT false +DEF_TRAIT(PRELU_GRAD, x > 0 ? y : (y * z)) #undef _ALLOW_INT #undef _ALLOW_FLOAT diff --git a/src/opr/test/basic_arith/elemwise_unary_trait_def.inl b/src/opr/test/basic_arith/elemwise_unary_trait_def.inl index a6e64ecd3e75af5e505e0a22eea96419373dff8d..edacc03581adc5d62b6c3486afeb8e3620caa256 100644 --- a/src/opr/test/basic_arith/elemwise_unary_trait_def.inl +++ b/src/opr/test/basic_arith/elemwise_unary_trait_def.inl @@ -22,6 +22,9 @@ DEF_TRAIT(NOT, !x) DEF_TRAIT(ABS, std::abs(x)) DEF_TRAIT(NEGATE, -x) DEF_TRAIT(RELU, std::max(x, 0)) +DEF_TRAIT(RELU6, std::min(std::max(x, 0), 6)) +DEF_TRAIT(SQUARE, x* x) +DEF_TRAIT(SIGN, x < 0 ? -1 : (x > 0 ? 1 : 0)) #undef _ALLOW_INT #define _ALLOW_INT false @@ -46,6 +49,16 @@ DEF_TRAIT(ERFCINV, do_erfcinv(x)) DEF_TRAIT(H_SWISH, do_h_swish(x)) DEF_TRAIT(SILU, x / (1 + std::exp(-x))) DEF_TRAIT(GELU, x*(0.5f * (1.f + std::erf(x / std::sqrt(2.f))))) +DEF_TRAIT(SINH, std::sinh(x)) +DEF_TRAIT(COSH, std::cosh(x)) +DEF_TRAIT(ASINH, std::asinh(x)) +DEF_TRAIT(ACOSH, std::acosh(x)) +DEF_TRAIT(ATANH, std::atanh(x)) +DEF_TRAIT(TAN, std::tan(x)) +DEF_TRAIT(SOFTPLUS, std::log1p(std::exp(-std::abs(x))) + std::max(x, 0)) +DEF_TRAIT(HSIGMOID, x <= -3.f ? 0.f : (x >= 3.f ? 1.f : ((x + 3.f) / 6.f))) +DEF_TRAIT(SQRT, std::sqrt(x)) +DEF_TRAIT(LOGSIGMOID, -std::log1p(std::exp(-std::abs(x))) - std::max(-x, 0)) #undef _ALLOW_INT #undef _ALLOW_FLOAT