Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
923ad5dc
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
923ad5dc
编写于
11月 28, 2022
作者:
P
PuQing
提交者:
GitHub
11月 28, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add cpu_info.h (#48403)
上级
fe617f95
变更
8
展开全部
隐藏空白更改
内联
并排
Showing
8 changed file
with
239 addition
and
201 deletion
+239
-201
paddle/fluid/platform/cpu_info.h
paddle/fluid/platform/cpu_info.h
+3
-12
paddle/phi/backends/cpu/cpu_info.h
paddle/phi/backends/cpu/cpu_info.h
+56
-0
paddle/phi/kernels/funcs/cpu_vec.h
paddle/phi/kernels/funcs/cpu_vec.h
+118
-131
paddle/phi/kernels/funcs/detail/activation_functions.h
paddle/phi/kernels/funcs/detail/activation_functions.h
+1
-1
paddle/phi/kernels/funcs/detail/avx_mathfun.h
paddle/phi/kernels/funcs/detail/avx_mathfun.h
+1
-1
paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc
paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc
+4
-6
paddle/phi/kernels/sparse/cpu/softmax_kernel.cc
paddle/phi/kernels/sparse/cpu/softmax_kernel.cc
+4
-6
paddle/phi/tests/kernels/test_cpu_vec.cc
paddle/phi/tests/kernels/test_cpu_vec.cc
+52
-44
未找到文件。
paddle/fluid/platform/cpu_info.h
浏览文件 @
923ad5dc
...
...
@@ -50,6 +50,8 @@ inline void cpuid(int reg[4], int x) {
#endif
#endif
#include "paddle/phi/backends/cpu/cpu_info.h"
namespace
paddle
{
namespace
platform
{
...
...
@@ -82,18 +84,7 @@ size_t NPUPinnedMinChunkSize();
//! Get the maximum chunk size for buddy allocator.
size_t
NPUPinnedMaxChunkSize
();
typedef
enum
{
isa_any
,
sse42
,
avx
,
avx2
,
avx512f
,
avx512_core
,
avx512_core_vnni
,
avx512_mic
,
avx512_mic_4ops
,
avx512_bf16
,
}
cpu_isa_t
;
// Instruction set architecture
using
namespace
phi
::
backends
::
cpu
;
// NOLINT
// May I use some instruction
bool
MayIUse
(
const
cpu_isa_t
cpu_isa
);
...
...
paddle/phi/backends/cpu/cpu_info.h
0 → 100644
浏览文件 @
923ad5dc
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stddef.h>
#ifdef _WIN32
#if defined(__AVX2__)
#include <immintrin.h> // avx2
#elif defined(__AVX__)
#include <intrin.h> // avx
#endif // AVX
#else // WIN32
#ifdef __AVX__
#include <immintrin.h>
#endif
#endif // WIN32
#if defined(_WIN32)
#define ALIGN32_BEG __declspec(align(32))
#define ALIGN32_END
#else
#define ALIGN32_BEG
#define ALIGN32_END __attribute__((aligned(32)))
#endif // _WIN32
namespace
phi
{
namespace
backends
{
namespace
cpu
{
typedef
enum
{
isa_any
,
sse42
,
avx
,
avx2
,
avx512f
,
avx512_core
,
avx512_core_vnni
,
avx512_mic
,
avx512_mic_4ops
,
avx512_bf16
,
}
cpu_isa_t
;
// Instruction set architecture
}
// namespace cpu
}
// namespace backends
}
// namespace phi
paddle/phi/kernels/funcs/cpu_vec.h
浏览文件 @
923ad5dc
此差异已折叠。
点击以展开。
paddle/phi/kernels/funcs/detail/activation_functions.h
浏览文件 @
923ad5dc
...
...
@@ -18,7 +18,7 @@ limitations under the License. */
#include <stdexcept>
#include <string>
#include "paddle/
fluid/platform
/cpu_info.h"
#include "paddle/
phi/backends/cpu
/cpu_info.h"
#include "paddle/phi/core/hostdevice.h"
namespace
phi
{
...
...
paddle/phi/kernels/funcs/detail/avx_mathfun.h
浏览文件 @
923ad5dc
...
...
@@ -42,7 +42,7 @@
(this is the zlib license)
*/
#pragma once
#include "paddle/
fluid/platform
/cpu_info.h"
#include "paddle/
phi/backends/cpu
/cpu_info.h"
/* __m128 is ugly to write */
typedef
__m256
v8sf
;
// vector of 8 float (avx)
...
...
paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc
浏览文件 @
923ad5dc
...
...
@@ -14,15 +14,13 @@ limitations under the License. */
#include "paddle/phi/kernels/sparse/softmax_grad_kernel.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_info.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/visit_type.h"
#include "paddle/phi/kernels/funcs/cpu_vec.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
namespace
plt
=
paddle
::
platform
;
namespace
phi
{
namespace
sparse
{
...
...
@@ -72,11 +70,11 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx,
out_crows_data
[
crow_idx
]);
T
sum
=
0
;
phi
::
funcs
::
vec_mul_reduce
<
T
,
plt
::
avx
>
(
phi
::
funcs
::
vec_mul_reduce
<
T
,
backends
::
cpu
::
avx
>
(
row_nnz
,
dout_data
,
out_data
,
&
sum
);
phi
::
funcs
::
vec_add_bias
<
T
,
plt
::
avx
>
(
phi
::
funcs
::
vec_add_bias
<
T
,
backends
::
cpu
::
avx
>
(
row_nnz
,
static_cast
<
T
>
(
-
1
)
*
sum
,
dout_data
,
dx_data
);
phi
::
funcs
::
vec_mul
<
T
,
plt
::
avx
>
(
phi
::
funcs
::
vec_mul
<
T
,
backends
::
cpu
::
avx
>
(
row_nnz
,
dx_data
,
out_data
,
dx_data
);
out_data
=
out_data
+
row_nnz
;
...
...
paddle/phi/kernels/sparse/cpu/softmax_kernel.cc
浏览文件 @
923ad5dc
...
...
@@ -14,15 +14,13 @@ limitations under the License. */
#include "paddle/phi/kernels/sparse/softmax_kernel.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_info.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/visit_type.h"
#include "paddle/phi/kernels/funcs/cpu_vec.h"
#include "paddle/phi/kernels/sparse/empty_kernel.h"
namespace
plt
=
paddle
::
platform
;
namespace
phi
{
namespace
sparse
{
...
...
@@ -70,14 +68,14 @@ void SoftmaxCsrKernel(const Context& dev_ctx,
x_crows_data
[
crow_idx
]);
row_max_val
=
*
std
::
max_element
(
x_data
,
x_data
+
row_nnz
);
phi
::
funcs
::
vec_add_bias
<
T
,
plt
::
avx
>
(
phi
::
funcs
::
vec_add_bias
<
T
,
backends
::
cpu
::
avx
>
(
row_nnz
,
static_cast
<
T
>
(
-
1
)
*
row_max_val
,
x_data
,
out_data
);
phi
::
funcs
::
vec_exp
<
T
>
(
row_nnz
,
out_data
,
out_data
);
T
sum
=
0
;
phi
::
funcs
::
vec_sum
<
T
,
plt
::
avx
>
(
row_nnz
,
out_data
,
&
sum
);
phi
::
funcs
::
vec_scal
<
T
,
plt
::
avx
>
(
phi
::
funcs
::
vec_sum
<
T
,
backends
::
cpu
::
avx
>
(
row_nnz
,
out_data
,
&
sum
);
phi
::
funcs
::
vec_scal
<
T
,
backends
::
cpu
::
avx
>
(
row_nnz
,
static_cast
<
T
>
(
1
)
/
sum
,
out_data
,
out_data
);
x_data
=
x_data
+
row_nnz
;
...
...
paddle/phi/tests/kernels/test_cpu_vec.cc
浏览文件 @
923ad5dc
...
...
@@ -106,42 +106,43 @@ void TestAndBench(const int n,
}
TEST
(
CpuVecTest
,
sigmoid
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
>
,
ref_sigmoid
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx
>
,
ref_sigmoid
<
float
>
);
sz
,
vec_sigmoid
<
float
,
backends
::
cpu
::
avx
>
,
ref_sigmoid
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx2
>
,
ref_sigmoid
<
float
>
);
sz
,
vec_sigmoid
<
float
,
backends
::
cpu
::
avx2
>
,
ref_sigmoid
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx512f
>
,
ref_sigmoid
<
float
>
);
sz
,
vec_sigmoid
<
float
,
backends
::
cpu
::
avx512f
>
,
ref_sigmoid
<
float
>
);
}
TestAndBench
<
double
>
(
30
,
vec_sigmoid
<
double
>
,
ref_sigmoid
<
double
>
);
}
TEST
(
CpuVecTest
,
tanh
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx2
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx512f
>
,
ref_tanh
<
float
>
);
sz
,
vec_tanh
<
float
,
backends
::
cpu
::
avx
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
backends
::
cpu
::
avx2
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
backends
::
cpu
::
avx512f
>
,
ref_tanh
<
float
>
);
}
TestAndBench
<
double
>
(
30
,
vec_tanh
<
double
>
,
ref_tanh
<
double
>
);
}
TEST
(
CpuVecTest
,
relu
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx2
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx512f
>
,
ref_relu
<
float
>
);
sz
,
vec_relu
<
float
,
backends
::
cpu
::
avx
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
backends
::
cpu
::
avx2
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
backends
::
cpu
::
avx512f
>
,
ref_relu
<
float
>
);
}
TestAndBench
<
double
>
(
30
,
vec_relu
<
double
>
,
ref_relu
<
double
>
);
}
...
...
@@ -161,14 +162,16 @@ void compare_sum(size_t n,
}
TEST
(
CpuVecTest
,
vec_sum
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
size_t
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
compare_sum
<
float
>
(
sz
,
vec_sum
<
float
>
,
vec_sum
<
float
,
platform
::
isa_any
>
);
compare_sum
<
float
>
(
sz
,
vec_sum
<
float
,
platform
::
avx
>
,
vec_sum
<
float
,
platform
::
isa_any
>
);
sz
,
vec_sum
<
float
>
,
vec_sum
<
float
,
backends
::
cpu
::
isa_any
>
);
compare_sum
<
float
>
(
sz
,
vec_sum
<
float
,
backends
::
cpu
::
avx
>
,
vec_sum
<
float
,
backends
::
cpu
::
isa_any
>
);
}
compare_sum
<
double
>
(
30U
,
vec_sum
<
double
>
,
vec_sum
<
double
,
platform
::
isa_any
>
);
compare_sum
<
double
>
(
30U
,
vec_sum
<
double
>
,
vec_sum
<
double
,
backends
::
cpu
::
isa_any
>
);
}
template
<
typename
T
>
...
...
@@ -192,18 +195,17 @@ void compare_clip(
}
TEST
(
CpuVecTest
,
vec_clip
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
size_t
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
compare_clip
<
float
>
(
sz
,
-
4.
f
,
vec_clip
<
float
>
,
vec_clip
<
float
,
platform
::
isa_any
>
);
sz
,
-
4.
f
,
vec_clip
<
float
>
,
vec_clip
<
float
,
backends
::
cpu
::
isa_any
>
);
compare_clip
<
float
>
(
sz
,
-
1.1
f
,
vec_clip
<
float
,
platform
::
avx
>
,
vec_clip
<
float
,
platform
::
isa_any
>
);
vec_clip
<
float
,
backends
::
cpu
::
avx
>
,
vec_clip
<
float
,
backends
::
cpu
::
isa_any
>
);
}
compare_clip
<
double
>
(
30U
,
1.0
,
vec_clip
<
double
>
,
vec_clip
<
double
,
platform
::
isa_any
>
);
30U
,
1.0
,
vec_clip
<
double
>
,
vec_clip
<
double
,
backends
::
cpu
::
isa_any
>
);
}
template
<
typename
T
>
...
...
@@ -230,14 +232,16 @@ void compare_mul(
}
TEST
(
CpuVecTest
,
vec_mul
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
size_t
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
compare_mul
<
float
>
(
sz
,
vec_mul
<
float
>
,
vec_mul
<
float
,
platform
::
isa_any
>
);
compare_mul
<
float
>
(
sz
,
vec_mul
<
float
,
platform
::
avx
>
,
vec_mul
<
float
,
platform
::
isa_any
>
);
sz
,
vec_mul
<
float
>
,
vec_mul
<
float
,
backends
::
cpu
::
isa_any
>
);
compare_mul
<
float
>
(
sz
,
vec_mul
<
float
,
backends
::
cpu
::
avx
>
,
vec_mul
<
float
,
backends
::
cpu
::
isa_any
>
);
}
compare_mul
<
double
>
(
30U
,
vec_mul
<
double
>
,
vec_mul
<
double
,
platform
::
isa_any
>
);
compare_mul
<
double
>
(
30U
,
vec_mul
<
double
>
,
vec_mul
<
double
,
backends
::
cpu
::
isa_any
>
);
}
template
<
typename
T
>
...
...
@@ -260,17 +264,18 @@ void compare_mul_reduce(
}
TEST
(
CpuVecTest
,
vec_mul_reduce
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
size_t
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
compare_mul_reduce
<
float
>
(
sz
,
vec_mul_reduce
<
float
>
,
vec_mul_reduce
<
float
,
platform
::
isa_any
>
);
compare_mul_reduce
<
float
>
(
sz
,
vec_mul_reduce
<
float
,
platform
::
avx
>
,
vec_mul_reduce
<
float
,
platform
::
isa_any
>
);
vec_mul_reduce
<
float
>
,
vec_mul_reduce
<
float
,
backends
::
cpu
::
isa_any
>
);
compare_mul_reduce
<
float
>
(
sz
,
vec_mul_reduce
<
float
,
backends
::
cpu
::
avx
>
,
vec_mul_reduce
<
float
,
backends
::
cpu
::
isa_any
>
);
}
compare_mul_reduce
<
double
>
(
30U
,
vec_mul_reduce
<
double
>
,
vec_mul_reduce
<
double
,
platform
::
isa_any
>
);
compare_mul_reduce
<
double
>
(
30U
,
vec_mul_reduce
<
double
>
,
vec_mul_reduce
<
double
,
backends
::
cpu
::
isa_any
>
);
}
template
<
typename
T
>
...
...
@@ -296,40 +301,43 @@ void TestInplace(const int n,
}
TEST
(
CpuVecTest
,
inplace_sigmoid
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
>
,
ref_sigmoid
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx
>
,
ref_sigmoid
<
float
>
);
sz
,
vec_sigmoid
<
float
,
backends
::
cpu
::
avx
>
,
ref_sigmoid
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx2
>
,
ref_sigmoid
<
float
>
);
sz
,
vec_sigmoid
<
float
,
backends
::
cpu
::
avx2
>
,
ref_sigmoid
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx512f
>
,
ref_sigmoid
<
float
>
);
sz
,
vec_sigmoid
<
float
,
backends
::
cpu
::
avx512f
>
,
ref_sigmoid
<
float
>
);
}
TestInplace
<
double
>
(
30
,
vec_sigmoid
<
double
>
,
ref_sigmoid
<
double
>
);
}
TEST
(
CpuVecTest
,
inplace_tanh
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx2
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx512f
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
backends
::
cpu
::
avx
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
backends
::
cpu
::
avx2
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
backends
::
cpu
::
avx512f
>
,
ref_tanh
<
float
>
);
}
TestInplace
<
double
>
(
30
,
vec_tanh
<
double
>
,
ref_tanh
<
double
>
);
}
TEST
(
CpuVecTest
,
inplace_relu
)
{
namespace
platform
=
paddle
::
platform
;
using
namespace
phi
::
funcs
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx2
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx512f
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
backends
::
cpu
::
avx
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
backends
::
cpu
::
avx2
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
backends
::
cpu
::
avx512f
>
,
ref_relu
<
float
>
);
}
TestInplace
<
double
>
(
30
,
vec_relu
<
double
>
,
ref_relu
<
double
>
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录