提交 f45aced5 编写于 作者: D dengkaipeng

add jit test. develop=test

上级 51536f7f
......@@ -50,7 +50,7 @@ void VTanh(const T* x, T* y, int n) {
compute_addbias(&b, y, y, n);
}
void Softmax(const T* x, T* y, int n, int bs, int m) {
void Softmax(const T* x, T* y, int n, int bs, int remain) {
auto compute_hmax = KernelFuncs<HMaxTuple<T>, CPUPlace>::Cache().At(n);
auto compute_hsum = KernelFuncs<HSumTuple<T>, CPUPlace>::Cache().At(n);
auto compute_vscal = KernelFuncs<VScalTuple<T>, CPUPlace>::Cache().At(n);
......@@ -66,15 +66,15 @@ void Softmax(const T* x, T* y, int n, int bs, int m) {
scalar = static_cast<T>(0) - scalar;
compute_vaddbias(&scalar, x, y, n); // x - max
compute_vexp(y, y, n);
if (m == 1) {
if (remain == 1) {
compute_hsum(y, &scalar, n);
scalar = static_cast<T>(1) / scalar;
compute_vscal(&scalar, y, y, n);
} else {
for (int j = 0; j < m; ++j) {
compute_stridesum(&y[j], &scalar, n, m);
for (int j = 0; j < remain; ++j) {
compute_stridesum(&y[j], &scalar, n, remain);
scalar = static_cast<T>(1) / scalar;
compute_stridescal(&scalar, &y[j], &y[j], n, m);
compute_stridescal(&scalar, &y[j], &y[j], n, remain);
}
}
x += n;
......
......@@ -26,7 +26,7 @@ using T = float;
void VSigmoid(const T* x, T* y, int n);
void VTanh(const T* x, T* y, int n);
void Softmax(const T* x, T* y, int n, int bs, int m);
void Softmax(const T* x, T* y, int n, int bs, int remain);
void LSTMCtHt(lstm_t* step, const lstm_attr_t* attr);
void LSTMC1H1(lstm_t* step, const lstm_attr_t* attr);
......
......@@ -81,7 +81,7 @@ void VScal<double>(const double* a, const double* x, double* y, int n) {
template <>
void StrideScal<float>(const float* a, const float* x, float* y, int n, int stride) {
if (x == y) {
platform::dynload::cblas_sscal(n, *a, y, stride);
platform::dynload::cblas_sscal(n/stride, *a, y, stride);
} else {
refer::StrideScal<float>(a, x, y, n, stride);
}
......@@ -90,7 +90,7 @@ void StrideScal<float>(const float* a, const float* x, float* y, int n, int stri
template <>
void StrideScal<double>(const double* a, const double* x, double* y, int n, int stride) {
if (x == y) {
platform::dynload::cblas_dscal(n, *a, y, stride);
platform::dynload::cblas_dscal(n/stride, *a, y, stride);
} else {
refer::StrideScal<double>(a, x, y, n, stride);
}
......@@ -148,12 +148,12 @@ void ASum<double>(const double* x, double* res, int n) {
template <>
void StrideASum<float>(const float* x, float* res, int n, int stride) {
res[0] = platform::dynload::cblas_sasum(n, x, stride);
res[0] = platform::dynload::cblas_sasum(n/stride, x, stride);
}
template <>
void StrideASum<double>(const double* x, double* res, int n, int stride) {
res[0] = platform::dynload::cblas_dasum(n, x, stride);
res[0] = platform::dynload::cblas_dasum(n/stride, x, stride);
}
// TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512
......
......@@ -135,7 +135,7 @@ template <typename T>
void StrideScal(const T* a, const T* x, T* y, int n, int stride);
template <typename T>
void Softmax(const T* x, T* y, int n, int bs, int m=1) {
void Softmax(const T* x, T* y, int n, int bs, int remain=1) {
std::vector<T> entities(bs);
for (int i = 0; i < bs; ++i) {
entities[i] = x[i * n];
......@@ -149,15 +149,15 @@ void Softmax(const T* x, T* y, int n, int bs, int m=1) {
VExp(y, y, n * bs);
for (int i = 0; i < bs; ++i) {
T sum;
if (m == 1) {
if (remain == 1) {
ASum(&y[i * n], &sum, n);
sum = static_cast<T>(1) / sum;
VScal(&sum, &y[i * n], &y[i * n], n);
} else {
for (int j = 0; j < m; ++j) {
StrideASum(&y[i * n + j], &sum, n/m, m);
for (int j = 0; j < remain; ++j) {
StrideASum(&y[i * n + j], &sum, n, remain);
sum = static_cast<T>(1) / sum;
StrideScal(&sum, &y[i * n + j], &y[i * n + j], n/m, m);
StrideScal(&sum, &y[i * n + j], &y[i * n + j], n, remain);
}
}
}
......
......@@ -421,30 +421,34 @@ void StrideASum(const T* x, T* res, int n, int stride) {
template <typename T>
void StrideScal(const T* a, const T* x, T* y, int n , int stride) {
for (int i = 0; i < n; i+=stride) {
for (int i = 0; i < n; ++i) {
if (i % stride == 0) {
y[i] = x[i] * a[0];
} else {
y[i] = x[i];
}
}
}
// y = e^(x - max(x))
// y = y / sum(y)
template <typename T>
void Softmax(const T* x, T* y, int n, int bs = 1, int m = 1) {
void Softmax(const T* x, T* y, int n, int bs = 1, int remain = 1) {
for (int i = 0; i < bs; ++i) {
T scalar;
HMax(x, &scalar, n);
scalar = static_cast<T>(0) - scalar;
VAddBias(&scalar, x, y, n); // x - max
VExp(y, y, n);
if (m == 1) {
if (remain == 1) {
HSum(y, &scalar, n);
scalar = static_cast<T>(1) / scalar;
VScal(&scalar, y, y, n);
} else {
for (int j = 0; j < m; j++) {
StrideASum(&y[j], &scalar, n, m);
for (int j = 0; j < remain; j++) {
StrideASum(&y[j], &scalar, n, remain);
scalar = static_cast<T>(1) / scalar;
StrideScal(&scalar, &y[j], &y[j], n, m);
StrideScal(&scalar, &y[j], &y[j], n, remain);
}
}
x += n;
......
......@@ -723,11 +723,10 @@ void TestKernelSoftmax() {
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
for (int bs : {1, 2, 10}) {
for (int n : TestSizes()) {
for (int m : {1, 2}) {
for (int m : {1, 2, 3}) { // remain
if (m > n || n % m != 0) {
continue;
}
VLOG(10) << "Softmax: " << bs << ", " << n << ", " << m;
auto ref = jit::GetReferFunc<KernelTuple>();
EXPECT_TRUE(ref != nullptr);
std::vector<T> x(bs * n), y(bs * n);
......@@ -766,6 +765,86 @@ void TestKernelSoftmax() {
}
}
template <typename KernelTuple, typename PlaceType>
void TestKernelStrideASum() {
using T = typename KernelTuple::data_type;
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
for (int d : TestSizes()) {
for (int m : {1, 2, 3}) { // stride
if (m > d || d % m != 0) {
continue;
}
auto ref = jit::GetReferFunc<KernelTuple>();
EXPECT_TRUE(ref != nullptr);
std::vector<T> x(d);
RandomVec<T>(d, x.data());
T ref_res;
ref(x.data(), &ref_res, d, m);
auto verifier = [](const typename KernelTuple::func_type tgt,
const std::vector<T>& x, const T ref_res,
const int m) {
EXPECT_TRUE(tgt != nullptr);
T tgt_res;
tgt(x.data(), &tgt_res, x.size(), m);
ExpectEQ<T>(&tgt_res, &ref_res, 1);
};
TestAllImpls<KernelTuple, PlaceType>(d, verifier, x, ref_res, m);
}
}
}
template <typename KernelTuple, typename PlaceType>
void TestKernelStrideScal() {
using T = typename KernelTuple::data_type;
VLOG(10) << "Test JITKernel: " << jit::to_string(KernelTuple::kernel_type);
// for (int d : TestSizes()) {
// for (int m : {1, 2, 3}) { // stride
for (int d : {4}) {
for (int m : {2}) { // stride
if (m > d || d % m != 0) {
continue;
}
auto ref = jit::GetReferFunc<KernelTuple>();
EXPECT_TRUE(ref != nullptr);
const T a = static_cast<T>(3);
std::vector<T> x(d), yref(d);
std::vector<T> xinp(d); // inplace test
RandomVec<T>(d, x.data());
std::copy(x.begin(), x.end(), xinp.begin());
const T* x_data = x.data();
T* yref_data = yref.data();
T* xinp_data = xinp.data();
// test refer code inplace
ref(&a, x_data, yref_data, d, m);
ref(&a, xinp_data, xinp_data, d, m);
ExpectEQ<T>(xinp_data, yref_data, d);
auto verifier = [](const typename KernelTuple::func_type tgt, const T a,
const std::vector<T>& x, const std::vector<T>& yref,
const int m) {
EXPECT_TRUE(tgt != nullptr);
EXPECT_EQ(yref.size(), x.size());
const T* x_data = x.data();
const T* yref_data = yref.data();
const int d = yref.size();
std::vector<T> ytgt(d);
T* ytgt_data = ytgt.data();
// test normal
tgt(&a, x_data, ytgt_data, d, m);
ExpectEQ<T>(ytgt_data, yref_data, d);
// test inplace x
std::copy(x.begin(), x.end(), ytgt.begin());
tgt(&a, ytgt_data, ytgt_data, d, m);
ExpectEQ<T>(ytgt_data, yref_data, d);
};
TestAllImpls<KernelTuple, PlaceType>(d, verifier, a, x, yref, m);
}
}
}
template <typename KernelTuple, typename PlaceType>
void TestKernelSgd() {
using T = typename KernelTuple::data_type;
......@@ -918,7 +997,7 @@ TEST(JITKernel_pool, more) {
EXPECT_EQ(kers.size(), 10UL);
#else
#ifdef PADDLE_WITH_MKLML
EXPECT_EQ(kers.size(), 21UL);
EXPECT_EQ(kers.size(), 22UL);
#else
EXPECT_EQ(kers.size(), 8UL);
#endif
......@@ -927,7 +1006,7 @@ TEST(JITKernel_pool, more) {
TEST(JITKernel_pool, refer) {
const auto& kers = jit::ReferKernelPool::Instance().AllKernels();
EXPECT_EQ(kers.size(), 29UL);
EXPECT_EQ(kers.size(), 31UL);
}
// test helper
......@@ -1298,3 +1377,6 @@ TEST_CPU_KERNEL(MatMul);
TEST_CPU_KERNEL(Softmax);
TEST_CPU_KERNEL(Sgd);
TEST_CPU_KERNEL(VBroadcast);
TEST_CPU_KERNEL(StrideASum);
TEST_CPU_KERNEL(StrideScal);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册