提交 7383eefd 编写于 作者: T tensor-tang

add softmax mix and mkl code

test=develop
上级 50945685
......@@ -12,3 +12,4 @@ USE_JITKERNEL_MORE(kLSTMC1H1, mix)
USE_JITKERNEL_MORE(kGRUH1, mix)
USE_JITKERNEL_MORE(kGRUHtPart1, mix)
USE_JITKERNEL_MORE(kGRUHtPart2, mix)
USE_JITKERNEL_MORE(kSoftmax, mix)
......@@ -48,6 +48,27 @@ void VTanh(const T* x, T* y, int n) {
compute_addbias(&b, y, y, n);
}
void Softmax(const T* x, T* y, int n, int bs) {
auto compute_hmax = Get<kHMax, XRNTuples<T>, platform::CPUPlace>(n);
auto compute_hsum = Get<kHSum, XRNTuples<T>, platform::CPUPlace>(n);
auto compute_vscal = Get<kVScal, AXYNTuples<T>, platform::CPUPlace>(n);
auto compute_vaddbias = Get<kVAddBias, AXYNTuples<T>, platform::CPUPlace>(n);
auto compute_vexp =
Get<KernelType::kVExp, XYNTuples<T>, platform::CPUPlace>(n);
for (int i = 0; i < bs; ++i) {
T scalar;
compute_hmax(x, &scalar, n);
scalar = static_cast<T>(0) - scalar;
compute_vaddbias(&scalar, x, y, n); // x - max
compute_vexp(y, y, n);
compute_hsum(y, &scalar, n);
scalar = static_cast<T>(1) / scalar;
compute_vscal(&scalar, y, y, n);
x += n;
y += n;
}
}
void (*getActFunc(KernelType type, int d))(const T*, T*, int) { // NOLINT
if (type == kVSigmoid) {
return Get<kVSigmoid, XYNTuples<T>, platform::CPUPlace>(d);
......@@ -184,6 +205,8 @@ bool VSigmoidKernel::UseMe(const int& d) const { return true; }
bool VTanhKernel::UseMe(const int& d) const { return true; }
bool SoftmaxKernel::UseMe(const int& d) const { return true; }
bool LSTMCtHtKernel::UseMe(const lstm_attr_t& attr) const { return true; }
bool LSTMC1H1Kernel::UseMe(const lstm_attr_t& attr) const { return true; }
......@@ -207,6 +230,7 @@ namespace mix = paddle::operators::jit::more::mix;
REGISTER_MORE_KERNEL(kVSigmoid, VSigmoid);
REGISTER_MORE_KERNEL(kVTanh, VTanh);
REGISTER_MORE_KERNEL(kSoftmax, Softmax);
REGISTER_MORE_KERNEL(kLSTMCtHt, LSTMCtHt);
REGISTER_MORE_KERNEL(kLSTMC1H1, LSTMC1H1);
REGISTER_MORE_KERNEL(kGRUH1, GRUH1);
......
......@@ -26,6 +26,7 @@ using T = float;
void VSigmoid(const T* x, T* y, int n);
void VTanh(const T* x, T* y, int n);
void Softmax(const T* x, T* y, int n, int bs);
void LSTMCtHt(lstm_t* step, const lstm_attr_t* attr);
void LSTMC1H1(lstm_t* step, const lstm_attr_t* attr);
......@@ -45,6 +46,9 @@ void GRUHtPart2(gru_t* step, const gru_attr_t* attr);
DECLARE_MORE_KERNEL(VSigmoid, XYNTuples);
DECLARE_MORE_KERNEL(VTanh, XYNTuples);
// XRN
DECLARE_MORE_KERNEL(Softmax, SoftmaxTuples);
DECLARE_MORE_KERNEL(LSTMCtHt, LSTMTuples);
DECLARE_MORE_KERNEL(LSTMC1H1, LSTMTuples);
......
......@@ -12,3 +12,4 @@ USE_JITKERNEL_MORE(kVSquare, mkl)
USE_JITKERNEL_MORE(kVSigmoid, mkl)
USE_JITKERNEL_MORE(kVTanh, mkl)
USE_JITKERNEL_MORE(kSeqPool, mkl)
USE_JITKERNEL_MORE(kSoftmax, mkl)
......@@ -116,6 +116,16 @@ void VAXPY<double>(double a, const double* x, double* y, int n) {
platform::dynload::cblas_daxpy(n, a, x, 1, y, 1);
}
template <>
void ASum<float>(const float* x, float* res, int n) {
res[0] = platform::dynload::cblas_sasum(n, x, 1);
}
template <>
void ASum<double>(const double* x, double* res, int n) {
res[0] = platform::dynload::cblas_dasum(n, x, 1);
}
// TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512
template <>
bool MatMulKernel<float>::UseMe(const int& d) const {
......@@ -167,6 +177,11 @@ bool SeqPoolKernel<double>::UseMe(const seq_pool_attr_t& attr) const {
return true;
}
template <>
bool SoftmaxKernel<float>::UseMe(const int& d) const {
return true;
}
#define AWALYS_USE_ME_WITH_DOUBLE(func) \
template <> \
bool func##Kernel<double>::UseMe(const int& d) const { \
......@@ -181,6 +196,7 @@ AWALYS_USE_ME_WITH_DOUBLE(VExp);
AWALYS_USE_ME_WITH_DOUBLE(VSigmoid);
AWALYS_USE_ME_WITH_DOUBLE(VTanh);
AWALYS_USE_ME_WITH_DOUBLE(VSquare);
AWALYS_USE_ME_WITH_DOUBLE(Softmax);
#undef AWALYS_USE_ME_WITH_DOUBLE
} // namespace mkl
......@@ -204,5 +220,6 @@ REGISTER_MKL_KERNEL(kVSquare, VSquare);
REGISTER_MKL_KERNEL(kVSigmoid, VSigmoid);
REGISTER_MKL_KERNEL(kVTanh, VTanh);
REGISTER_MKL_KERNEL(kSeqPool, SeqPool);
REGISTER_MKL_KERNEL(kSoftmax, Softmax);
#undef REGISTER_MKL_KERNEL
......@@ -16,6 +16,7 @@
#include <cmath>
#include <type_traits>
#include <vector>
#include "paddle/fluid/operators/jit/kernel_base.h"
namespace paddle {
......@@ -90,6 +91,30 @@ void SeqPool(const T* x, T* y, const seq_pool_attr_t* attr) {
}
}
template <typename T>
void ASum(const T* x, T* res, int n);
template <typename T>
void Softmax(const T* x, T* y, int n, int bs) {
std::vector<T> entities(bs);
for (int i = 0; i < bs; ++i) {
entities[i] = x[i * n];
for (int c = 1; c < n; ++c) {
entities[i] = x[i * n + c] > entities[i] ? x[i * n + c] : entities[i];
}
for (int c = 0; c < n; ++c) {
y[i * n + c] = x[i * n + c] - entities[i];
}
}
VExp(y, y, n * bs);
for (int i = 0; i < bs; ++i) {
T sum;
ASum(&y[i * n], &sum, n);
sum = static_cast<T>(1) / sum;
VScal(&sum, &y[i * n], &y[i * n], n);
}
}
#define DECLARE_MKL_KERNEL(name, tuples) \
template <typename T> \
class name##Kernel : public KernelMore<tuples<T>> { \
......@@ -117,6 +142,8 @@ DECLARE_MKL_KERNEL(VSquare, XYNTuples);
DECLARE_MKL_KERNEL(SeqPool, SeqPoolTuples);
DECLARE_MKL_KERNEL(Softmax, SoftmaxTuples);
#undef DECLARE_MKL_KERNEL
} // namespace mkl
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册