Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
923ad5dc
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
923ad5dc
编写于
11月 28, 2022
作者:
P
PuQing
提交者:
GitHub
11月 28, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cpu_info.h (#48403)
上级
fe617f95
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
239 addition
and
201 deletion
+239
-201
paddle/fluid/platform/cpu_info.h
paddle/fluid/platform/cpu_info.h
+3
-12
paddle/phi/backends/cpu/cpu_info.h
paddle/phi/backends/cpu/cpu_info.h
+56
-0
paddle/phi/kernels/funcs/cpu_vec.h
paddle/phi/kernels/funcs/cpu_vec.h
+118
-131
paddle/phi/kernels/funcs/detail/activation_functions.h
paddle/phi/kernels/funcs/detail/activation_functions.h
+1
-1
paddle/phi/kernels/funcs/detail/avx_mathfun.h
paddle/phi/kernels/funcs/detail/avx_mathfun.h
+1
-1
paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc
paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc
+4
-6
paddle/phi/kernels/sparse/cpu/softmax_kernel.cc
paddle/phi/kernels/sparse/cpu/softmax_kernel.cc
+4
-6
paddle/phi/tests/kernels/test_cpu_vec.cc
paddle/phi/tests/kernels/test_cpu_vec.cc
+52
-44
未找到文件。
paddle/fluid/platform/cpu_info.h
浏览文件 @
923ad5dc
...
...
@@ -50,6 +50,8 @@ inline void cpuid(int reg[4], int x) {
#endif
#endif
#include "paddle/phi/backends/cpu/cpu_info.h"
namespace
paddle
{
namespace
platform
{
...
...
@@ -82,18 +84,7 @@ size_t NPUPinnedMinChunkSize();
//! Get the maximum chunk size for buddy allocator.
size_t
NPUPinnedMaxChunkSize
();
typedef
enum
{
isa_any
,
sse42
,
avx
,
avx2
,
avx512f
,
avx512_core
,
avx512_core_vnni
,
avx512_mic
,
avx512_mic_4ops
,
avx512_bf16
,
}
cpu_isa_t
;
// Instruction set architecture
using
namespace
phi
::
backends
::
cpu
;
// NOLINT
// May I use some instruction
bool
MayIUse
(
const
cpu_isa_t
cpu_isa
);
...
...
paddle/phi/backends/cpu/cpu_info.h
0 → 100644
浏览文件 @
923ad5dc
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stddef.h>
#ifdef _WIN32
#if defined(__AVX2__)
#include <immintrin.h> // avx2
#elif defined(__AVX__)
#include <intrin.h> // avx
#endif // AVX
#else // WIN32
#ifdef __AVX__
#include <immintrin.h>
#endif
#endif // WIN32
#if defined(_WIN32)
#define ALIGN32_BEG __declspec(align(32))
#define ALIGN32_END
#else
#define ALIGN32_BEG
#define ALIGN32_END __attribute__((aligned(32)))
#endif // _WIN32
namespace
phi
{
namespace
backends
{
namespace
cpu
{
typedef
enum
{
isa_any
,
sse42
,
avx
,
avx2
,
avx512f
,
avx512_core
,
avx512_core_vnni
,
avx512_mic
,
avx512_mic_4ops
,
avx512_bf16
,
}
cpu_isa_t
;
// Instruction set architecture
}
// namespace cpu
}
// namespace backends
}
// namespace phi
paddle/phi/kernels/funcs/cpu_vec.h
浏览文件 @
923ad5dc
...
...
@@ -17,7 +17,7 @@ limitations under the License. */
#include <functional>
#include <string>
#include "paddle/
fluid/platform
/cpu_info.h"
#include "paddle/
phi/backends/cpu
/cpu_info.h"
#include "paddle/phi/core/enforce.h"
#ifdef PADDLE_WITH_MKLML
...
...
@@ -81,8 +81,7 @@ inline void vec_scal<double>(const int n, const double a, double* x) {
#endif
// MKL scal only support inplace, choose this if src and dst are not equal
template
<
typename
T
,
paddle
::
platform
::
cpu_isa_t
isa
=
paddle
::
platform
::
isa_any
>
template
<
typename
T
,
backends
::
cpu
::
cpu_isa_t
isa
=
backends
::
cpu
::
isa_any
>
inline
void
vec_scal
(
const
int
n
,
const
T
a
,
const
T
*
x
,
T
*
y
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
a
*
x
[
i
];
...
...
@@ -90,14 +89,14 @@ inline void vec_scal(const int n, const T a, const T* x, T* y) {
}
template
<
>
inline
void
vec_scal
<
float
,
paddle
::
platform
::
avx
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_scal
<
float
,
backends
::
cpu
::
avx
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
#ifdef __AVX__
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
)
{
vec_scal
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
a
,
x
,
y
);
vec_scal
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
a
,
x
,
y
);
return
;
}
const
int
rest
=
n
%
block
;
...
...
@@ -121,29 +120,28 @@ inline void vec_scal<float, paddle::platform::avx>(const int n,
y
[
i
]
=
a
*
x
[
i
];
}
#else
vec_scal
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
a
,
x
,
y
);
vec_scal
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
a
,
x
,
y
);
#endif
}
template
<
>
inline
void
vec_scal
<
float
,
paddle
::
platform
::
avx2
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
vec_scal
<
float
,
paddle
::
platform
::
avx
>
(
n
,
a
,
x
,
y
);
inline
void
vec_scal
<
float
,
backends
::
cpu
::
avx2
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
vec_scal
<
float
,
backends
::
cpu
::
avx
>
(
n
,
a
,
x
,
y
);
}
template
<
>
inline
void
vec_scal
<
float
,
paddle
::
platform
::
avx512f
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_scal
<
float
,
backends
::
cpu
::
avx512f
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
// TODO(TJ): enable me
vec_scal
<
float
,
paddle
::
platform
::
avx2
>
(
n
,
a
,
x
,
y
);
vec_scal
<
float
,
backends
::
cpu
::
avx2
>
(
n
,
a
,
x
,
y
);
}
template
<
typename
T
,
paddle
::
platform
::
cpu_isa_t
isa
=
paddle
::
platform
::
isa_any
>
template
<
typename
T
,
backends
::
cpu
::
cpu_isa_t
isa
=
backends
::
cpu
::
isa_any
>
inline
void
vec_sum
(
const
size_t
n
,
const
T
*
x
,
T
*
s
)
{
s
[
0
]
=
x
[
0
];
for
(
size_t
i
=
1
;
i
<
n
;
++
i
)
{
...
...
@@ -152,13 +150,13 @@ inline void vec_sum(const size_t n, const T* x, T* s) {
}
template
<
>
inline
void
vec_sum
<
float
,
paddle
::
platform
::
avx
>
(
const
size_t
n
,
const
float
*
x
,
float
*
s
)
{
inline
void
vec_sum
<
float
,
backends
::
cpu
::
avx
>
(
const
size_t
n
,
const
float
*
x
,
float
*
s
)
{
#ifdef __AVX__
constexpr
unsigned
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
)
{
vec_sum
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
x
,
s
);
vec_sum
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
x
,
s
);
return
;
}
...
...
@@ -182,12 +180,11 @@ inline void vec_sum<float, paddle::platform::avx>(const size_t n,
s
[
0
]
+=
x
[
i
];
}
#else
vec_sum
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
x
,
s
);
vec_sum
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
x
,
s
);
#endif
}
template
<
typename
T
,
paddle
::
platform
::
cpu_isa_t
isa
=
paddle
::
platform
::
isa_any
>
template
<
typename
T
,
backends
::
cpu
::
cpu_isa_t
isa
=
backends
::
cpu
::
isa_any
>
inline
void
vec_mul
(
const
size_t
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
{
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
{
z
[
i
]
=
x
[
i
]
*
y
[
i
];
...
...
@@ -195,14 +192,14 @@ inline void vec_mul(const size_t n, const T* x, const T* y, T* z) {
}
template
<
>
inline
void
vec_mul
<
float
,
paddle
::
platform
::
avx
>
(
const
size_t
n
,
const
float
*
x
,
const
float
*
y
,
float
*
z
)
{
inline
void
vec_mul
<
float
,
backends
::
cpu
::
avx
>
(
const
size_t
n
,
const
float
*
x
,
const
float
*
y
,
float
*
z
)
{
#ifdef __AVX__
constexpr
unsigned
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
)
{
vec_mul
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
x
,
y
,
z
);
vec_mul
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
x
,
y
,
z
);
return
;
}
...
...
@@ -217,12 +214,11 @@ inline void vec_mul<float, paddle::platform::avx>(const size_t n,
z
[
i
]
=
x
[
i
]
*
y
[
i
];
}
#else
vec_mul
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
x
,
y
,
z
);
vec_mul
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
x
,
y
,
z
);
#endif
}
template
<
typename
T
,
paddle
::
platform
::
cpu_isa_t
isa
=
paddle
::
platform
::
isa_any
>
template
<
typename
T
,
backends
::
cpu
::
cpu_isa_t
isa
=
backends
::
cpu
::
isa_any
>
inline
void
vec_mul_reduce
(
const
size_t
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
{
z
[
0
]
=
x
[
0
]
*
y
[
0
];
for
(
size_t
i
=
1
;
i
<
n
;
++
i
)
{
...
...
@@ -231,14 +227,14 @@ inline void vec_mul_reduce(const size_t n, const T* x, const T* y, T* z) {
}
template
<
>
inline
void
vec_mul_reduce
<
float
,
paddle
::
platform
::
avx
>
(
const
size_t
n
,
const
float
*
x
,
const
float
*
y
,
float
*
z
)
{
inline
void
vec_mul_reduce
<
float
,
backends
::
cpu
::
avx
>
(
const
size_t
n
,
const
float
*
x
,
const
float
*
y
,
float
*
z
)
{
#ifdef __AVX__
constexpr
unsigned
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
)
{
vec_mul_reduce
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
x
,
y
,
z
);
vec_mul_reduce
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
x
,
y
,
z
);
return
;
}
...
...
@@ -262,12 +258,11 @@ inline void vec_mul_reduce<float, paddle::platform::avx>(const size_t n,
z
[
0
]
+=
x
[
i
]
*
y
[
i
];
}
#else
vec_mul_reduce
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
x
,
y
,
z
);
vec_mul_reduce
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
x
,
y
,
z
);
#endif
}
template
<
typename
T
,
paddle
::
platform
::
cpu_isa_t
isa
=
paddle
::
platform
::
isa_any
>
template
<
typename
T
,
backends
::
cpu
::
cpu_isa_t
isa
=
backends
::
cpu
::
isa_any
>
inline
void
vec_bias_sub
(
const
int
n
,
const
T
a
,
const
T
*
x
,
T
*
y
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
a
-
x
[
i
];
...
...
@@ -275,14 +270,14 @@ inline void vec_bias_sub(const int n, const T a, const T* x, T* y) {
}
template
<
>
inline
void
vec_bias_sub
<
float
,
paddle
::
platform
::
avx
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_bias_sub
<
float
,
backends
::
cpu
::
avx
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
#ifdef __AVX__
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
)
{
vec_bias_sub
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
a
,
x
,
y
);
vec_bias_sub
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
a
,
x
,
y
);
return
;
}
const
int
rest
=
n
%
block
;
...
...
@@ -306,30 +301,29 @@ inline void vec_bias_sub<float, paddle::platform::avx>(const int n,
y
[
i
]
=
a
-
x
[
i
];
}
#else
vec_bias_sub
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
a
,
x
,
y
);
vec_bias_sub
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
a
,
x
,
y
);
#endif
}
template
<
>
inline
void
vec_bias_sub
<
float
,
paddle
::
platform
::
avx2
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
vec_bias_sub
<
float
,
paddle
::
platform
::
avx
>
(
n
,
a
,
x
,
y
);
inline
void
vec_bias_sub
<
float
,
backends
::
cpu
::
avx2
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
vec_bias_sub
<
float
,
backends
::
cpu
::
avx
>
(
n
,
a
,
x
,
y
);
}
template
<
>
inline
void
vec_bias_sub
<
float
,
paddle
::
platform
::
avx512f
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_bias_sub
<
float
,
backends
::
cpu
::
avx512f
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
// TODO(TJ): enable me
vec_bias_sub
<
float
,
paddle
::
platform
::
avx2
>
(
n
,
a
,
x
,
y
);
vec_bias_sub
<
float
,
backends
::
cpu
::
avx2
>
(
n
,
a
,
x
,
y
);
}
// out = x*y + (1-x)*z
template
<
typename
T
,
paddle
::
platform
::
cpu_isa_t
isa
=
paddle
::
platform
::
isa_any
>
template
<
typename
T
,
backends
::
cpu
::
cpu_isa_t
isa
=
backends
::
cpu
::
isa_any
>
inline
void
vec_cross
(
const
int
n
,
const
T
*
x
,
const
T
*
y
,
const
T
*
z
,
T
*
out
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
out
[
i
]
=
x
[
i
]
*
y
[
i
]
+
(
static_cast
<
T
>
(
1
)
-
x
[
i
])
*
z
[
i
];
...
...
@@ -337,12 +331,12 @@ inline void vec_cross(const int n, const T* x, const T* y, const T* z, T* out) {
}
template
<
>
inline
void
vec_cross
<
float
,
paddle
::
platform
::
avx
>
(
inline
void
vec_cross
<
float
,
backends
::
cpu
::
avx
>
(
const
int
n
,
const
float
*
x
,
const
float
*
y
,
const
float
*
z
,
float
*
out
)
{
#ifdef __AVX__
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
)
{
vec_cross
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
x
,
y
,
z
,
out
);
vec_cross
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
x
,
y
,
z
,
out
);
return
;
}
const
int
rest
=
n
%
block
;
...
...
@@ -368,25 +362,24 @@ inline void vec_cross<float, paddle::platform::avx>(
out
[
i
]
=
x
[
i
]
*
y
[
i
]
+
(
1.
f
-
x
[
i
])
*
z
[
i
];
}
#else
vec_cross
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
x
,
y
,
z
,
out
);
vec_cross
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
x
,
y
,
z
,
out
);
#endif
}
template
<
>
inline
void
vec_cross
<
float
,
paddle
::
platform
::
avx2
>
(
inline
void
vec_cross
<
float
,
backends
::
cpu
::
avx2
>
(
const
int
n
,
const
float
*
x
,
const
float
*
y
,
const
float
*
z
,
float
*
out
)
{
vec_cross
<
float
,
paddle
::
platform
::
avx
>
(
n
,
x
,
y
,
z
,
out
);
vec_cross
<
float
,
backends
::
cpu
::
avx
>
(
n
,
x
,
y
,
z
,
out
);
}
template
<
>
inline
void
vec_cross
<
float
,
paddle
::
platform
::
avx512f
>
(
inline
void
vec_cross
<
float
,
backends
::
cpu
::
avx512f
>
(
const
int
n
,
const
float
*
x
,
const
float
*
y
,
const
float
*
z
,
float
*
out
)
{
// TODO(TJ): enable me
vec_cross
<
float
,
paddle
::
platform
::
avx
>
(
n
,
x
,
y
,
z
,
out
);
vec_cross
<
float
,
backends
::
cpu
::
avx
>
(
n
,
x
,
y
,
z
,
out
);
}
template
<
typename
T
,
paddle
::
platform
::
cpu_isa_t
isa
=
paddle
::
platform
::
isa_any
>
template
<
typename
T
,
backends
::
cpu
::
cpu_isa_t
isa
=
backends
::
cpu
::
isa_any
>
inline
void
vec_clip
(
const
size_t
n
,
const
T
a
,
const
T
*
x
,
T
*
y
)
{
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
x
[
i
]
<
a
?
a
:
x
[
i
];
...
...
@@ -394,14 +387,14 @@ inline void vec_clip(const size_t n, const T a, const T* x, T* y) {
}
template
<
>
inline
void
vec_clip
<
float
,
paddle
::
platform
::
avx
>
(
const
size_t
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_clip
<
float
,
backends
::
cpu
::
avx
>
(
const
size_t
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
#ifdef __AVX__
constexpr
unsigned
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
)
{
vec_clip
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
a
,
x
,
y
);
vec_clip
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
a
,
x
,
y
);
return
;
}
...
...
@@ -417,12 +410,11 @@ inline void vec_clip<float, paddle::platform::avx>(const size_t n,
y
[
i
]
=
x
[
i
]
<
a
?
a
:
x
[
i
];
}
#else
vec_clip
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
a
,
x
,
y
);
vec_clip
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
a
,
x
,
y
);
#endif
}
template
<
typename
T
,
paddle
::
platform
::
cpu_isa_t
isa
=
paddle
::
platform
::
isa_any
>
template
<
typename
T
,
backends
::
cpu
::
cpu_isa_t
isa
=
backends
::
cpu
::
isa_any
>
inline
void
vec_add_bias
(
const
int
n
,
const
T
a
,
const
T
*
x
,
T
*
y
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
x
[
i
]
+
a
;
...
...
@@ -430,14 +422,14 @@ inline void vec_add_bias(const int n, const T a, const T* x, T* y) {
}
template
<
>
inline
void
vec_add_bias
<
float
,
paddle
::
platform
::
avx
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_add_bias
<
float
,
backends
::
cpu
::
avx
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
#ifdef __AVX__
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
)
{
vec_add_bias
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
a
,
x
,
y
);
vec_add_bias
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
a
,
x
,
y
);
return
;
}
const
int
rest
=
n
%
block
;
...
...
@@ -461,36 +453,34 @@ inline void vec_add_bias<float, paddle::platform::avx>(const int n,
y
[
i
]
=
x
[
i
]
+
a
;
}
#else
vec_add_bias
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
a
,
x
,
y
);
vec_add_bias
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
a
,
x
,
y
);
#endif
}
template
<
>
inline
void
vec_add_bias
<
float
,
paddle
::
platform
::
avx2
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
vec_add_bias
<
float
,
paddle
::
platform
::
avx
>
(
n
,
a
,
x
,
y
);
inline
void
vec_add_bias
<
float
,
backends
::
cpu
::
avx2
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
vec_add_bias
<
float
,
backends
::
cpu
::
avx
>
(
n
,
a
,
x
,
y
);
}
template
<
>
inline
void
vec_add_bias
<
float
,
paddle
::
platform
::
avx512f
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_add_bias
<
float
,
backends
::
cpu
::
avx512f
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
// TODO(TJ): enable me
vec_add_bias
<
float
,
paddle
::
platform
::
avx2
>
(
n
,
a
,
x
,
y
);
vec_add_bias
<
float
,
backends
::
cpu
::
avx2
>
(
n
,
a
,
x
,
y
);
}
template
<
typename
T
,
paddle
::
platform
::
cpu_isa_t
isa
=
paddle
::
platform
::
isa_any
>
template
<
typename
T
,
backends
::
cpu
::
cpu_isa_t
isa
=
backends
::
cpu
::
isa_any
>
inline
void
vec_identity
(
const
int
n
,
const
T
*
x
,
T
*
y
)
{
// do nothing
return
;
}
template
<
typename
T
,
paddle
::
platform
::
cpu_isa_t
isa
=
paddle
::
platform
::
isa_any
>
template
<
typename
T
,
backends
::
cpu
::
cpu_isa_t
isa
=
backends
::
cpu
::
isa_any
>
inline
void
vec_sigmoid
(
const
int
n
,
const
T
*
x
,
T
*
y
)
{
const
T
min
=
SIGMOID_THRESHOLD_MIN
;
const
T
max
=
SIGMOID_THRESHOLD_MAX
;
...
...
@@ -505,13 +495,13 @@ inline void vec_sigmoid(const int n, const T* x, T* y) {
}
template
<
>
inline
void
vec_sigmoid
<
float
,
paddle
::
platform
::
avx
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_sigmoid
<
float
,
backends
::
cpu
::
avx
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
#ifdef __AVX__
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
)
{
vec_sigmoid
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
x
,
y
);
vec_sigmoid
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
x
,
y
);
return
;
}
const
int
rest
=
n
%
block
;
...
...
@@ -560,27 +550,26 @@ inline void vec_sigmoid<float, paddle::platform::avx>(const int n,
y
[
i
]
=
1.
f
/
(
1.
f
+
y
[
i
]);
}
#else
vec_sigmoid
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
x
,
y
);
vec_sigmoid
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
x
,
y
);
#endif
}
template
<
>
inline
void
vec_sigmoid
<
float
,
paddle
::
platform
::
avx2
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
vec_sigmoid
<
float
,
paddle
::
platform
::
avx
>
(
n
,
x
,
y
);
inline
void
vec_sigmoid
<
float
,
backends
::
cpu
::
avx2
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
vec_sigmoid
<
float
,
backends
::
cpu
::
avx
>
(
n
,
x
,
y
);
}
template
<
>
inline
void
vec_sigmoid
<
float
,
paddle
::
platform
::
avx512f
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_sigmoid
<
float
,
backends
::
cpu
::
avx512f
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
// TODO(TJ): enable me
vec_sigmoid
<
float
,
paddle
::
platform
::
avx2
>
(
n
,
x
,
y
);
vec_sigmoid
<
float
,
backends
::
cpu
::
avx2
>
(
n
,
x
,
y
);
}
template
<
typename
T
,
paddle
::
platform
::
cpu_isa_t
isa
=
paddle
::
platform
::
isa_any
>
template
<
typename
T
,
backends
::
cpu
::
cpu_isa_t
isa
=
backends
::
cpu
::
isa_any
>
inline
void
vec_tanh
(
const
int
n
,
const
T
*
x
,
T
*
y
)
{
vec_scal
<
T
,
isa
>
(
n
,
static_cast
<
T
>
(
2
),
x
,
y
);
vec_sigmoid
<
T
,
isa
>
(
n
,
y
,
y
);
...
...
@@ -589,8 +578,7 @@ inline void vec_tanh(const int n, const T* x, T* y) {
}
// TODO(TJ): make relu clip
template
<
typename
T
,
paddle
::
platform
::
cpu_isa_t
isa
=
paddle
::
platform
::
isa_any
>
template
<
typename
T
,
backends
::
cpu
::
cpu_isa_t
isa
=
backends
::
cpu
::
isa_any
>
inline
void
vec_relu
(
const
int
n
,
const
T
*
x
,
T
*
y
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
x
[
i
]
>
0
?
x
[
i
]
:
0
;
...
...
@@ -598,13 +586,13 @@ inline void vec_relu(const int n, const T* x, T* y) {
}
template
<
>
inline
void
vec_relu
<
float
,
paddle
::
platform
::
avx
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_relu
<
float
,
backends
::
cpu
::
avx
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
#ifdef __AVX__
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
*
4
)
{
vec_relu
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
x
,
y
);
vec_relu
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
x
,
y
);
return
;
}
...
...
@@ -628,29 +616,28 @@ inline void vec_relu<float, paddle::platform::avx>(const int n,
#undef MOVE_ONE_STEP
#else
vec_relu
<
float
,
paddle
::
platform
::
isa_any
>
(
n
,
x
,
y
);
vec_relu
<
float
,
backends
::
cpu
::
isa_any
>
(
n
,
x
,
y
);
#endif
}
template
<
>
inline
void
vec_relu
<
float
,
paddle
::
platform
::
avx2
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
vec_relu
<
float
,
paddle
::
platform
::
avx
>
(
n
,
x
,
y
);
inline
void
vec_relu
<
float
,
backends
::
cpu
::
avx2
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
vec_relu
<
float
,
backends
::
cpu
::
avx
>
(
n
,
x
,
y
);
}
template
<
>
inline
void
vec_relu
<
float
,
paddle
::
platform
::
avx512f
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_relu
<
float
,
backends
::
cpu
::
avx512f
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
// TODO(TJ): enable me
vec_relu
<
float
,
paddle
::
platform
::
avx2
>
(
n
,
x
,
y
);
vec_relu
<
float
,
backends
::
cpu
::
avx2
>
(
n
,
x
,
y
);
}
// TODO(TJ): optimize double of sigmoid, tanh and relu if necessary
template
<
typename
T
,
paddle
::
platform
::
cpu_isa_t
isa
=
paddle
::
platform
::
isa_any
>
template
<
typename
T
,
backends
::
cpu
::
cpu_isa_t
isa
=
backends
::
cpu
::
isa_any
>
class
VecActivations
{
public:
std
::
function
<
void
(
const
int
,
const
T
*
,
T
*
)
>
operator
()(
...
...
paddle/phi/kernels/funcs/detail/activation_functions.h
浏览文件 @
923ad5dc
...
...
@@ -18,7 +18,7 @@ limitations under the License. */
#include <stdexcept>
#include <string>
#include "paddle/
fluid/platform
/cpu_info.h"
#include "paddle/
phi/backends/cpu
/cpu_info.h"
#include "paddle/phi/core/hostdevice.h"
namespace
phi
{
...
...
paddle/phi/kernels/funcs/detail/avx_mathfun.h
浏览文件 @
923ad5dc
...
...
@@ -42,7 +42,7 @@
(this is the zlib license)
*/
#pragma once
#include "paddle/
fluid/platform
/cpu_info.h"
#include "paddle/
phi/backends/cpu
/cpu_info.h"
/* __m128 is ugly to write */
typedef
__m256
v8sf
;
// vector of 8 float (avx)
...
...
paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc
浏览文件 @
923ad5dc
...
...
@@ -14,15 +14,13 @@ limitations under the License. */
#include "paddle/phi/kernels/sparse/softmax_grad_kernel.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_info.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/visit_type.h"
#include "paddle/phi/kernels/funcs/cpu_vec.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
namespace
plt
=
paddle
::
platform
;
namespace
phi
{
namespace
sparse
{
...
...
@@ -72,11 +70,11 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx,
out_crows_data
[
crow_idx
]);
T
sum
=
0
;
phi
::
funcs
::
vec_mul_reduce
<
T
,
plt
::
avx
>
(
phi
::
funcs
::
vec_mul_reduce
<
T
,
backends
::
cpu
::
avx
>
(
row_nnz
,
dout_data
,
out_data
,
&
sum
);
phi
::
funcs
::
vec_add_bias
<
T
,
plt
::
avx
>
(
phi
::
funcs
::
vec_add_bias
<
T
,
backends
::
cpu
::
avx
>
(
row_nnz
,
static_cast
<
T
>
(
-
1
)
*
sum
,
dout_data
,
dx_data
);
phi
::
funcs
::
vec_mul
<
T
,
plt
::
avx
>
(
phi
::
funcs
::
vec_mul
<
T
,
backends
::
cpu
::
avx
>
(
row_nnz
,
dx_data
,
out_data
,
dx_data
);
out_data
=
out_data
+
row_nnz
;
...
...
paddle/phi/kernels/sparse/cpu/softmax_kernel.cc
浏览文件 @
923ad5dc
...
...
@@ -14,15 +14,13 @@ limitations under the License. */
#include "paddle/phi/kernels/sparse/softmax_kernel.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_info.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/visit_type.h"
#include "paddle/phi/kernels/funcs/cpu_vec.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
namespace
plt
=
paddle
::
platform
;
namespace
phi
{
namespace
sparse
{
...
...
@@ -70,14 +68,14 @@ void SoftmaxCsrKernel(const Context& dev_ctx,
x_crows_data
[
crow_idx
]);
row_max_val
=
*
std
::
max_element
(
x_data
,
x_data
+
row_nnz
);
phi
::
funcs
::
vec_add_bias
<
T
,
plt
::
avx
>
(
phi
::
funcs
::
vec_add_bias
<
T
,
backends
::
cpu
::
avx
>
(
row_nnz
,
static_cast
<
T
>
(
-
1
)
*
row_max_val
,
x_data
,
out_data
);
phi
::
funcs
::
vec_exp
<
T
>
(
row_nnz
,
out_data
,
out_data
);
T
sum
=
0
;
phi
::
funcs
::
vec_sum
<
T
,
plt
::
avx
>
(
row_nnz
,
out_data
,
&
sum
);
phi
::
funcs
::
vec_scal
<
T
,
plt
::
avx
>
(
phi
::
funcs
::
vec_sum
<
T
,
backends
::
cpu
::
avx
>
(
row_nnz
,
out_data
,
&
sum
);
phi
::
funcs
::
vec_scal
<
T
,
backends
::
cpu
::
avx
>
(
row_nnz
,
static_cast
<
T
>
(
1
)
/
sum
,
out_data
,
out_data
);
x_data
=
x_data
+
row_nnz
;
...
...
paddle/phi/tests/kernels/test_cpu_vec.cc
浏览文件 @
923ad5dc
...
...
@@ -106,42 +106,43 @@ void TestAndBench(const int n,
}
TEST
(
CpuVecTest
,
sigmoid
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
>
,
ref_sigmoid
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx
>
,
ref_sigmoid
<
float
>
);
sz
,
vec_sigmoid
<
float
,
backends
::
cpu
::
avx
>
,
ref_sigmoid
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx2
>
,
ref_sigmoid
<
float
>
);
sz
,
vec_sigmoid
<
float
,
backends
::
cpu
::
avx2
>
,
ref_sigmoid
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx512f
>
,
ref_sigmoid
<
float
>
);
sz
,
vec_sigmoid
<
float
,
backends
::
cpu
::
avx512f
>
,
ref_sigmoid
<
float
>
);
}
TestAndBench
<
double
>
(
30
,
vec_sigmoid
<
double
>
,
ref_sigmoid
<
double
>
);
}
TEST
(
CpuVecTest
,
tanh
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx2
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx512f
>
,
ref_tanh
<
float
>
);
sz
,
vec_tanh
<
float
,
backends
::
cpu
::
avx
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
backends
::
cpu
::
avx2
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
backends
::
cpu
::
avx512f
>
,
ref_tanh
<
float
>
);
}
TestAndBench
<
double
>
(
30
,
vec_tanh
<
double
>
,
ref_tanh
<
double
>
);
}
TEST
(
CpuVecTest
,
relu
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx2
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx512f
>
,
ref_relu
<
float
>
);
sz
,
vec_relu
<
float
,
backends
::
cpu
::
avx
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
backends
::
cpu
::
avx2
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
backends
::
cpu
::
avx512f
>
,
ref_relu
<
float
>
);
}
TestAndBench
<
double
>
(
30
,
vec_relu
<
double
>
,
ref_relu
<
double
>
);
}
...
...
@@ -161,14 +162,16 @@ void compare_sum(size_t n,
}
TEST
(
CpuVecTest
,
vec_sum
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
size_t
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
compare_sum
<
float
>
(
sz
,
vec_sum
<
float
>
,
vec_sum
<
float
,
platform
::
isa_any
>
);
compare_sum
<
float
>
(
sz
,
vec_sum
<
float
,
platform
::
avx
>
,
vec_sum
<
float
,
platform
::
isa_any
>
);
sz
,
vec_sum
<
float
>
,
vec_sum
<
float
,
backends
::
cpu
::
isa_any
>
);
compare_sum
<
float
>
(
sz
,
vec_sum
<
float
,
backends
::
cpu
::
avx
>
,
vec_sum
<
float
,
backends
::
cpu
::
isa_any
>
);
}
compare_sum
<
double
>
(
30U
,
vec_sum
<
double
>
,
vec_sum
<
double
,
platform
::
isa_any
>
);
compare_sum
<
double
>
(
30U
,
vec_sum
<
double
>
,
vec_sum
<
double
,
backends
::
cpu
::
isa_any
>
);
}
template
<
typename
T
>
...
...
@@ -192,18 +195,17 @@ void compare_clip(
}
TEST
(
CpuVecTest
,
vec_clip
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
size_t
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
compare_clip
<
float
>
(
sz
,
-
4.
f
,
vec_clip
<
float
>
,
vec_clip
<
float
,
platform
::
isa_any
>
);
sz
,
-
4.
f
,
vec_clip
<
float
>
,
vec_clip
<
float
,
backends
::
cpu
::
isa_any
>
);
compare_clip
<
float
>
(
sz
,
-
1.1
f
,
vec_clip
<
float
,
platform
::
avx
>
,
vec_clip
<
float
,
platform
::
isa_any
>
);
vec_clip
<
float
,
backends
::
cpu
::
avx
>
,
vec_clip
<
float
,
backends
::
cpu
::
isa_any
>
);
}
compare_clip
<
double
>
(
30U
,
1.0
,
vec_clip
<
double
>
,
vec_clip
<
double
,
platform
::
isa_any
>
);
30U
,
1.0
,
vec_clip
<
double
>
,
vec_clip
<
double
,
backends
::
cpu
::
isa_any
>
);
}
template
<
typename
T
>
...
...
@@ -230,14 +232,16 @@ void compare_mul(
}
TEST
(
CpuVecTest
,
vec_mul
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
size_t
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
compare_mul
<
float
>
(
sz
,
vec_mul
<
float
>
,
vec_mul
<
float
,
platform
::
isa_any
>
);
compare_mul
<
float
>
(
sz
,
vec_mul
<
float
,
platform
::
avx
>
,
vec_mul
<
float
,
platform
::
isa_any
>
);
sz
,
vec_mul
<
float
>
,
vec_mul
<
float
,
backends
::
cpu
::
isa_any
>
);
compare_mul
<
float
>
(
sz
,
vec_mul
<
float
,
backends
::
cpu
::
avx
>
,
vec_mul
<
float
,
backends
::
cpu
::
isa_any
>
);
}
compare_mul
<
double
>
(
30U
,
vec_mul
<
double
>
,
vec_mul
<
double
,
platform
::
isa_any
>
);
compare_mul
<
double
>
(
30U
,
vec_mul
<
double
>
,
vec_mul
<
double
,
backends
::
cpu
::
isa_any
>
);
}
template
<
typename
T
>
...
...
@@ -260,17 +264,18 @@ void compare_mul_reduce(
}
TEST
(
CpuVecTest
,
vec_mul_reduce
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
size_t
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
compare_mul_reduce
<
float
>
(
sz
,
vec_mul_reduce
<
float
>
,
vec_mul_reduce
<
float
,
platform
::
isa_any
>
);
compare_mul_reduce
<
float
>
(
sz
,
vec_mul_reduce
<
float
,
platform
::
avx
>
,
vec_mul_reduce
<
float
,
platform
::
isa_any
>
);
vec_mul_reduce
<
float
>
,
vec_mul_reduce
<
float
,
backends
::
cpu
::
isa_any
>
);
compare_mul_reduce
<
float
>
(
sz
,
vec_mul_reduce
<
float
,
backends
::
cpu
::
avx
>
,
vec_mul_reduce
<
float
,
backends
::
cpu
::
isa_any
>
);
}
compare_mul_reduce
<
double
>
(
30U
,
vec_mul_reduce
<
double
>
,
vec_mul_reduce
<
double
,
platform
::
isa_any
>
);
compare_mul_reduce
<
double
>
(
30U
,
vec_mul_reduce
<
double
>
,
vec_mul_reduce
<
double
,
backends
::
cpu
::
isa_any
>
);
}
template
<
typename
T
>
...
...
@@ -296,40 +301,43 @@ void TestInplace(const int n,
}
TEST
(
CpuVecTest
,
inplace_sigmoid
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
>
,
ref_sigmoid
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx
>
,
ref_sigmoid
<
float
>
);
sz
,
vec_sigmoid
<
float
,
backends
::
cpu
::
avx
>
,
ref_sigmoid
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx2
>
,
ref_sigmoid
<
float
>
);
sz
,
vec_sigmoid
<
float
,
backends
::
cpu
::
avx2
>
,
ref_sigmoid
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx512f
>
,
ref_sigmoid
<
float
>
);
sz
,
vec_sigmoid
<
float
,
backends
::
cpu
::
avx512f
>
,
ref_sigmoid
<
float
>
);
}
TestInplace
<
double
>
(
30
,
vec_sigmoid
<
double
>
,
ref_sigmoid
<
double
>
);
}
TEST
(
CpuVecTest
,
inplace_tanh
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx2
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx512f
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
backends
::
cpu
::
avx
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
backends
::
cpu
::
avx2
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
backends
::
cpu
::
avx512f
>
,
ref_tanh
<
float
>
);
}
TestInplace
<
double
>
(
30
,
vec_tanh
<
double
>
,
ref_tanh
<
double
>
);
}
TEST
(
CpuVecTest
,
inplace_relu
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx2
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx512f
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
backends
::
cpu
::
avx
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
backends
::
cpu
::
avx2
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
backends
::
cpu
::
avx512f
>
,
ref_relu
<
float
>
);
}
TestInplace
<
double
>
(
30
,
vec_relu
<
double
>
,
ref_relu
<
double
>
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录