Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
09096aeb
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
09096aeb
编写于
7月 01, 2022
作者:
L
Leo Chen
提交者:
GitHub
7月 02, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
unify cpu context (#43989)
* unify cpu context * fix init() * delete test_device_context * fix test_scalar
上级
8d9f00a8
变更
72
显示空白变更内容
内联
并排
Showing
72 changed file
with
61 addition
and
2121 deletion
+61
-2121
paddle/fluid/operators/elementwise/elementwise_add_op.cc
paddle/fluid/operators/elementwise/elementwise_add_op.cc
+0
-6
paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc
...le/fluid/operators/elementwise/elementwise_floordiv_op.cc
+0
-3
paddle/fluid/operators/elementwise/elementwise_max_op.cc
paddle/fluid/operators/elementwise/elementwise_max_op.cc
+0
-3
paddle/fluid/operators/elementwise/elementwise_min_op.cc
paddle/fluid/operators/elementwise/elementwise_min_op.cc
+0
-3
paddle/fluid/operators/elementwise/elementwise_mod_op.cc
paddle/fluid/operators/elementwise/elementwise_mod_op.cc
+0
-3
paddle/fluid/operators/elementwise/elementwise_pow_op.cc
paddle/fluid/operators/elementwise/elementwise_pow_op.cc
+0
-3
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
+0
-3
paddle/fluid/operators/increment_op.cc
paddle/fluid/operators/increment_op.cc
+0
-3
paddle/fluid/operators/isfinite_op.cc
paddle/fluid/operators/isfinite_op.cc
+0
-3
paddle/fluid/operators/isfinite_v2_op.cc
paddle/fluid/operators/isfinite_v2_op.cc
+0
-3
paddle/fluid/operators/label_smooth_op.cc
paddle/fluid/operators/label_smooth_op.cc
+0
-3
paddle/fluid/operators/math/beam_search.cc
paddle/fluid/operators/math/beam_search.cc
+7
-14
paddle/fluid/operators/math/concat_and_split.cc
paddle/fluid/operators/math/concat_and_split.cc
+1
-7
paddle/fluid/operators/math/context_project.cc
paddle/fluid/operators/math/context_project.cc
+3
-8
paddle/fluid/operators/math/cos_sim_functor.cc
paddle/fluid/operators/math/cos_sim_functor.cc
+0
-7
paddle/fluid/operators/math/cross_entropy.cc
paddle/fluid/operators/math/cross_entropy.cc
+0
-9
paddle/fluid/operators/math/gru_compute.cc
paddle/fluid/operators/math/gru_compute.cc
+0
-6
paddle/fluid/operators/math/im2col.cc
paddle/fluid/operators/math/im2col.cc
+0
-30
paddle/fluid/operators/math/math_function.cc
paddle/fluid/operators/math/math_function.cc
+0
-335
paddle/fluid/operators/math/maxouting.cc
paddle/fluid/operators/math/maxouting.cc
+0
-5
paddle/fluid/operators/math/sample_prob.cc
paddle/fluid/operators/math/sample_prob.cc
+1
-12
paddle/fluid/operators/math/selected_rows_functor.cc
paddle/fluid/operators/math/selected_rows_functor.cc
+2
-84
paddle/fluid/operators/math/sequence_padding.cc
paddle/fluid/operators/math/sequence_padding.cc
+0
-113
paddle/fluid/operators/math/sequence_scale.cc
paddle/fluid/operators/math/sequence_scale.cc
+0
-26
paddle/fluid/operators/math/softmax.cc
paddle/fluid/operators/math/softmax.cc
+0
-7
paddle/fluid/operators/math/vol2col.cc
paddle/fluid/operators/math/vol2col.cc
+0
-250
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
+0
-1
paddle/fluid/operators/rank_loss_op.cc
paddle/fluid/operators/rank_loss_op.cc
+0
-3
paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc
paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc
+0
-3
paddle/fluid/operators/reduce_ops/reduce_all_op.cc
paddle/fluid/operators/reduce_ops/reduce_all_op.cc
+0
-3
paddle/fluid/operators/reduce_ops/reduce_any_op.cc
paddle/fluid/operators/reduce_ops/reduce_any_op.cc
+0
-3
paddle/fluid/operators/reduce_ops/reduce_prod_op.cc
paddle/fluid/operators/reduce_ops/reduce_prod_op.cc
+0
-3
paddle/fluid/operators/reduce_ops/reduce_sum_op.cc
paddle/fluid/operators/reduce_ops/reduce_sum_op.cc
+0
-3
paddle/fluid/operators/set_value_op.cc
paddle/fluid/operators/set_value_op.cc
+0
-3
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+0
-8
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+1
-8
paddle/fluid/platform/transform.h
paddle/fluid/platform/transform.h
+0
-24
paddle/infrt/kernel/phi/context_kernels.cc
paddle/infrt/kernel/phi/context_kernels.cc
+0
-1
paddle/infrt/kernel/phi/infershaped/infershape_launchers_test.cc
...infrt/kernel/phi/infershaped/infershape_launchers_test.cc
+0
-1
paddle/phi/backends/cpu/cpu_context.cc
paddle/phi/backends/cpu/cpu_context.cc
+6
-4
paddle/phi/backends/cpu/cpu_context.h
paddle/phi/backends/cpu/cpu_context.h
+0
-6
paddle/phi/kernels/funcs/blas/blas_impl.h
paddle/phi/kernels/funcs/blas/blas_impl.h
+0
-616
paddle/phi/kernels/funcs/fc_functor.cc
paddle/phi/kernels/funcs/fc_functor.cc
+0
-2
paddle/phi/kernels/funcs/for_range.h
paddle/phi/kernels/funcs/for_range.h
+0
-16
paddle/phi/kernels/funcs/gru_compute.cc
paddle/phi/kernels/funcs/gru_compute.cc
+0
-185
paddle/phi/kernels/funcs/lstm_compute.cc
paddle/phi/kernels/funcs/lstm_compute.cc
+0
-80
paddle/phi/kernels/funcs/math_function.cc
paddle/phi/kernels/funcs/math_function.cc
+40
-94
paddle/phi/kernels/funcs/matrix_inverse.cc
paddle/phi/kernels/funcs/matrix_inverse.cc
+0
-4
paddle/phi/tests/api/test_sparse_utils_api.cc
paddle/phi/tests/api/test_sparse_utils_api.cc
+0
-1
paddle/phi/tests/common/test_scalar.cu
paddle/phi/tests/common/test_scalar.cu
+0
-7
paddle/phi/tests/core/CMakeLists.txt
paddle/phi/tests/core/CMakeLists.txt
+0
-4
paddle/phi/tests/core/test_device_context.cc
paddle/phi/tests/core/test_device_context.cc
+0
-54
paddle/phi/tests/kernels/test_cast_dev_api.cc
paddle/phi/tests/kernels/test_cast_dev_api.cc
+0
-1
paddle/phi/tests/kernels/test_concat_dev_api.cc
paddle/phi/tests/kernels/test_concat_dev_api.cc
+0
-1
paddle/phi/tests/kernels/test_conj_dev_api.cc
paddle/phi/tests/kernels/test_conj_dev_api.cc
+0
-1
paddle/phi/tests/kernels/test_copy_dev_api.cc
paddle/phi/tests/kernels/test_copy_dev_api.cc
+0
-1
paddle/phi/tests/kernels/test_creation_dev_api.cc
paddle/phi/tests/kernels/test_creation_dev_api.cc
+0
-4
paddle/phi/tests/kernels/test_dot_dev_api.cc
paddle/phi/tests/kernels/test_dot_dev_api.cc
+0
-1
paddle/phi/tests/kernels/test_elementwise_dev_api.cc
paddle/phi/tests/kernels/test_elementwise_dev_api.cc
+0
-4
paddle/phi/tests/kernels/test_flatten_dev_api.cc
paddle/phi/tests/kernels/test_flatten_dev_api.cc
+0
-1
paddle/phi/tests/kernels/test_math_function.cc
paddle/phi/tests/kernels/test_math_function.cc
+0
-1
paddle/phi/tests/kernels/test_matmul_dev_api.cc
paddle/phi/tests/kernels/test_matmul_dev_api.cc
+0
-1
paddle/phi/tests/kernels/test_mean_dev_api.cc
paddle/phi/tests/kernels/test_mean_dev_api.cc
+0
-1
paddle/phi/tests/kernels/test_reshape_dev_api.cc
paddle/phi/tests/kernels/test_reshape_dev_api.cc
+0
-1
paddle/phi/tests/kernels/test_scale_dev_api.cc
paddle/phi/tests/kernels/test_scale_dev_api.cc
+0
-2
paddle/phi/tests/kernels/test_sparse_activation_dev_api.cc
paddle/phi/tests/kernels/test_sparse_activation_dev_api.cc
+0
-1
paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc
paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc
+0
-1
paddle/phi/tests/kernels/test_sparse_elementwise_dev_api.cc
paddle/phi/tests/kernels/test_sparse_elementwise_dev_api.cc
+0
-4
paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc
paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc
+0
-1
paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc
paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc
+0
-6
paddle/phi/tests/kernels/test_split_dev_api.cc
paddle/phi/tests/kernels/test_split_dev_api.cc
+0
-1
paddle/phi/tests/kernels/test_sum_dev_api.cc
paddle/phi/tests/kernels/test_sum_dev_api.cc
+0
-1
未找到文件。
paddle/fluid/operators/elementwise/elementwise_add_op.cc
浏览文件 @
09096aeb
...
@@ -20,12 +20,6 @@ namespace paddle {
...
@@ -20,12 +20,6 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
class
OpDesc
;
class
OpDesc
;
}
// namespace framework
}
// namespace framework
namespace
imperative
{
class
OpBase
;
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc
浏览文件 @
09096aeb
...
@@ -25,9 +25,6 @@ class EmptyGradOpMaker;
...
@@ -25,9 +25,6 @@ class EmptyGradOpMaker;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/elementwise/elementwise_max_op.cc
浏览文件 @
09096aeb
...
@@ -23,9 +23,6 @@ class OpDesc;
...
@@ -23,9 +23,6 @@ class OpDesc;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/elementwise/elementwise_min_op.cc
浏览文件 @
09096aeb
...
@@ -23,9 +23,6 @@ class OpDesc;
...
@@ -23,9 +23,6 @@ class OpDesc;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/elementwise/elementwise_mod_op.cc
浏览文件 @
09096aeb
...
@@ -25,9 +25,6 @@ class EmptyGradOpMaker;
...
@@ -25,9 +25,6 @@ class EmptyGradOpMaker;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/elementwise/elementwise_pow_op.cc
浏览文件 @
09096aeb
...
@@ -20,9 +20,6 @@ class OpDesc;
...
@@ -20,9 +20,6 @@ class OpDesc;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
浏览文件 @
09096aeb
...
@@ -23,9 +23,6 @@ class OpDesc;
...
@@ -23,9 +23,6 @@ class OpDesc;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/increment_op.cc
浏览文件 @
09096aeb
...
@@ -25,9 +25,6 @@ class OpDesc;
...
@@ -25,9 +25,6 @@ class OpDesc;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/isfinite_op.cc
浏览文件 @
09096aeb
...
@@ -26,9 +26,6 @@ class EmptyGradOpMaker;
...
@@ -26,9 +26,6 @@ class EmptyGradOpMaker;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/isfinite_v2_op.cc
浏览文件 @
09096aeb
...
@@ -34,9 +34,6 @@ namespace operators {
...
@@ -34,9 +34,6 @@ namespace operators {
template
<
typename
DeviceContext
,
typename
T
,
typename
Functor
>
template
<
typename
DeviceContext
,
typename
T
,
typename
Functor
>
class
OverflowKernel
;
class
OverflowKernel
;
}
// namespace operators
}
// namespace operators
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
plat
=
paddle
::
platform
;
namespace
plat
=
paddle
::
platform
;
...
...
paddle/fluid/operators/label_smooth_op.cc
浏览文件 @
09096aeb
...
@@ -24,9 +24,6 @@ class OpDesc;
...
@@ -24,9 +24,6 @@ class OpDesc;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/math/beam_search.cc
浏览文件 @
09096aeb
...
@@ -13,26 +13,19 @@ See the License for the specific language governing permissions and
...
@@ -13,26 +13,19 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/math/beam_search.h"
#include "paddle/fluid/operators/math/beam_search.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
namespace
phi
{
namespace
phi
{
class
DenseTensor
;
class
DenseTensor
;
}
// namespace phi
}
// namespace phi
namespace
paddle
{
namespace
framework
{}
// namespace framework
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
template
<
typename
T
>
template
<
typename
T
>
class
BeamSearchFunctor
<
p
latform
::
CPUDevice
Context
,
T
>
{
class
BeamSearchFunctor
<
p
hi
::
CPU
Context
,
T
>
{
public:
public:
void
operator
()(
const
p
latform
::
CPUDevice
Context
&
context
,
void
operator
()(
const
p
hi
::
CPU
Context
&
context
,
const
framework
::
LoDTensor
*
pre_ids
,
const
framework
::
LoDTensor
*
pre_ids
,
const
framework
::
LoDTensor
*
pre_scores
,
const
framework
::
LoDTensor
*
pre_scores
,
const
framework
::
LoDTensor
*
ids
,
const
framework
::
LoDTensor
*
ids
,
...
@@ -308,10 +301,10 @@ class BeamSearchFunctor<platform::CPUDeviceContext, T> {
...
@@ -308,10 +301,10 @@ class BeamSearchFunctor<platform::CPUDeviceContext, T> {
}
}
};
};
template
class
BeamSearchFunctor
<
p
latform
::
CPUDevice
Context
,
int
>;
template
class
BeamSearchFunctor
<
p
hi
::
CPU
Context
,
int
>;
template
class
BeamSearchFunctor
<
p
latform
::
CPUDevice
Context
,
int64_t
>;
template
class
BeamSearchFunctor
<
p
hi
::
CPU
Context
,
int64_t
>;
template
class
BeamSearchFunctor
<
p
latform
::
CPUDevice
Context
,
float
>;
template
class
BeamSearchFunctor
<
p
hi
::
CPU
Context
,
float
>;
template
class
BeamSearchFunctor
<
p
latform
::
CPUDevice
Context
,
double
>;
template
class
BeamSearchFunctor
<
p
hi
::
CPU
Context
,
double
>;
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
...
...
paddle/fluid/operators/math/concat_and_split.cc
浏览文件 @
09096aeb
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
#ifdef PADDLE_WITH_ASCEND_CL
#ifdef PADDLE_WITH_ASCEND_CL
...
@@ -28,13 +29,6 @@ namespace phi {
...
@@ -28,13 +29,6 @@ namespace phi {
class
DenseTensor
;
class
DenseTensor
;
}
// namespace phi
}
// namespace phi
namespace
paddle
{
namespace
framework
{}
// namespace framework
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
...
...
paddle/fluid/operators/math/context_project.cc
浏览文件 @
09096aeb
...
@@ -13,19 +13,14 @@ See the License for the specific language governing permissions and
...
@@ -13,19 +13,14 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/math/context_project.h"
#include "paddle/fluid/operators/math/context_project.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
namespace
paddle
{
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
template
class
ContextProjectFunctor
<
p
latform
::
CPUDevice
Context
,
float
>;
template
class
ContextProjectFunctor
<
p
hi
::
CPU
Context
,
float
>;
template
class
ContextProjectFunctor
<
p
latform
::
CPUDevice
Context
,
double
>;
template
class
ContextProjectFunctor
<
p
hi
::
CPU
Context
,
double
>;
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
...
...
paddle/fluid/operators/math/cos_sim_functor.cc
浏览文件 @
09096aeb
...
@@ -14,16 +14,9 @@ limitations under the License. */
...
@@ -14,16 +14,9 @@ limitations under the License. */
#include "paddle/fluid/operators/math/cos_sim_functor.h"
#include "paddle/fluid/operators/math/cos_sim_functor.h"
namespace
paddle
{
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
template
<
typename
T
>
template
<
typename
T
>
struct
CosSimDyFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
struct
CosSimDyFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
void
operator
()(
const
platform
::
CPUDeviceContext
&
ctx
,
void
operator
()(
const
platform
::
CPUDeviceContext
&
ctx
,
...
...
paddle/fluid/operators/math/cross_entropy.cc
浏览文件 @
09096aeb
...
@@ -17,12 +17,6 @@ limitations under the License. */
...
@@ -17,12 +17,6 @@ limitations under the License. */
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
namespace
paddle
{
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
...
@@ -129,9 +123,6 @@ void CrossEntropyFunctor<DeviceContext, T>::operator()(
...
@@ -129,9 +123,6 @@ void CrossEntropyFunctor<DeviceContext, T>::operator()(
}
}
}
}
template
class
CrossEntropyFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
CrossEntropyFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
class
CrossEntropyFunctor
<
phi
::
CPUContext
,
float
>;
template
class
CrossEntropyFunctor
<
phi
::
CPUContext
,
float
>;
template
class
CrossEntropyFunctor
<
phi
::
CPUContext
,
double
>;
template
class
CrossEntropyFunctor
<
phi
::
CPUContext
,
double
>;
}
// namespace math
}
// namespace math
...
...
paddle/fluid/operators/math/gru_compute.cc
浏览文件 @
09096aeb
...
@@ -15,12 +15,6 @@ limitations under the License. */
...
@@ -15,12 +15,6 @@ limitations under the License. */
#include "paddle/fluid/operators/math/detail/gru_kernel.h"
#include "paddle/fluid/operators/math/detail/gru_kernel.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
namespace
paddle
{
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
...
...
paddle/fluid/operators/math/im2col.cc
浏览文件 @
09096aeb
...
@@ -16,12 +16,6 @@ limitations under the License. */
...
@@ -16,12 +16,6 @@ limitations under the License. */
#include "paddle/fluid/operators/math/im2col_cfo_cpu.h"
#include "paddle/fluid/operators/math/im2col_cfo_cpu.h"
namespace
paddle
{
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
namespace
phi
{
namespace
phi
{
class
CPUContext
;
class
CPUContext
;
}
// namespace phi
}
// namespace phi
...
@@ -166,24 +160,12 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
...
@@ -166,24 +160,12 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
}
}
};
};
template
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
platform
::
CPUDeviceContext
,
float
>;
template
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
platform
::
CPUDeviceContext
,
double
>;
template
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
template
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
phi
::
CPUContext
,
phi
::
CPUContext
,
float
>;
float
>;
template
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
template
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
phi
::
CPUContext
,
phi
::
CPUContext
,
double
>;
double
>;
template
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
platform
::
CPUDeviceContext
,
float
>;
template
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
platform
::
CPUDeviceContext
,
double
>;
template
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
template
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
phi
::
CPUContext
,
phi
::
CPUContext
,
float
>;
float
>;
...
@@ -353,24 +335,12 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
...
@@ -353,24 +335,12 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
}
}
};
};
template
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
platform
::
CPUDeviceContext
,
float
>;
template
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
platform
::
CPUDeviceContext
,
double
>;
template
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
template
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
phi
::
CPUContext
,
phi
::
CPUContext
,
float
>;
float
>;
template
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
template
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
phi
::
CPUContext
,
phi
::
CPUContext
,
double
>;
double
>;
template
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
platform
::
CPUDeviceContext
,
float
>;
template
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
platform
::
CPUDeviceContext
,
double
>;
template
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
template
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
phi
::
CPUContext
,
phi
::
CPUContext
,
float
>;
float
>;
...
...
paddle/fluid/operators/math/math_function.cc
已删除
100644 → 0
浏览文件 @
8d9f00a8
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/math_function.h"
#ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h"
#endif
#ifdef PADDLE_USE_OPENBLAS
#include <cblas.h>
#endif
#include <memory>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/operators/math/math_function_impl.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "unsupported/Eigen/CXX11/Tensor"
namespace
paddle
{
namespace
operators
{
namespace
math
{
using
float16
=
paddle
::
platform
::
float16
;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
platform
::
float16
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
platform
::
bfloat16
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
double
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
int16_t
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
int
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
int64_t
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
bool
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
uint8_t
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
platform
::
complex
<
float
>
>
;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
platform
::
complex
<
double
>
>
;
template
struct
SetConstant
<
phi
::
CPUContext
,
platform
::
float16
>;
template
struct
SetConstant
<
phi
::
CPUContext
,
platform
::
bfloat16
>;
template
struct
SetConstant
<
phi
::
CPUContext
,
float
>;
template
struct
SetConstant
<
phi
::
CPUContext
,
double
>;
template
struct
SetConstant
<
phi
::
CPUContext
,
int16_t
>;
template
struct
SetConstant
<
phi
::
CPUContext
,
int
>;
template
struct
SetConstant
<
phi
::
CPUContext
,
int64_t
>;
template
struct
SetConstant
<
phi
::
CPUContext
,
bool
>;
template
struct
SetConstant
<
phi
::
CPUContext
,
uint8_t
>;
template
struct
SetConstant
<
phi
::
CPUContext
,
platform
::
complex
<
float
>
>
;
template
struct
SetConstant
<
phi
::
CPUContext
,
platform
::
complex
<
double
>
>
;
#ifdef PADDLE_WITH_XPU
template
struct
SetConstant
<
platform
::
XPUDeviceContext
,
platform
::
float16
>;
template
struct
SetConstant
<
platform
::
XPUDeviceContext
,
platform
::
bfloat16
>;
template
struct
SetConstant
<
platform
::
XPUDeviceContext
,
float
>;
template
struct
SetConstant
<
platform
::
XPUDeviceContext
,
double
>;
template
struct
SetConstant
<
platform
::
XPUDeviceContext
,
uint8_t
>;
template
struct
SetConstant
<
platform
::
XPUDeviceContext
,
int16_t
>;
template
struct
SetConstant
<
platform
::
XPUDeviceContext
,
int
>;
template
struct
SetConstant
<
platform
::
XPUDeviceContext
,
int64_t
>;
template
struct
SetConstant
<
platform
::
XPUDeviceContext
,
bool
>;
template
struct
SetConstant
<
platform
::
XPUDeviceContext
,
platform
::
complex
<
float
>
>
;
template
struct
SetConstant
<
platform
::
XPUDeviceContext
,
platform
::
complex
<
double
>
>
;
#endif
#define DEFINE_CPU_TRANS(RANK) \
template struct Transpose<platform::CPUDeviceContext, \
platform::float16, \
RANK>; \
template struct Transpose<platform::CPUDeviceContext, \
platform::bfloat16, \
RANK>; \
template struct Transpose<platform::CPUDeviceContext, float, RANK>; \
template struct Transpose<platform::CPUDeviceContext, double, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int64_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, bool, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int16_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, uint8_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, int8_t, RANK>; \
template struct Transpose<platform::CPUDeviceContext, \
platform::complex<float>, \
RANK>; \
template struct Transpose<platform::CPUDeviceContext, \
platform::complex<double>, \
RANK>;
DEFINE_CPU_TRANS
(
1
);
DEFINE_CPU_TRANS
(
2
);
DEFINE_CPU_TRANS
(
3
);
DEFINE_CPU_TRANS
(
4
);
DEFINE_CPU_TRANS
(
5
);
DEFINE_CPU_TRANS
(
6
);
template
<
typename
T
>
struct
TransposeNormal
<
platform
::
CPUDeviceContext
,
T
>
{
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
Tensor
&
in
,
framework
::
Tensor
*
out
,
const
std
::
vector
<
int
>&
axis
)
{
const
int
rank
=
axis
.
size
();
auto
in_stride
=
phi
::
stride
(
in
.
dims
());
auto
out_stride
=
phi
::
stride
(
out
->
dims
());
const
T
*
in_ptr
=
in
.
data
<
T
>
();
T
*
out_ptr
=
out
->
data
<
T
>
();
auto
transpose_helper
=
[
&
](
int64_t
beg
,
int64_t
end
)
{
for
(
int64_t
out_idx
=
beg
;
out_idx
<
end
;
++
out_idx
)
{
int64_t
in_idx
=
0
;
int64_t
tmp_idx
=
out_idx
;
// calculate the input index
for
(
int
i
=
0
;
i
<
rank
;
++
i
)
{
const
int64_t
coordinate
=
tmp_idx
/
out_stride
[
i
];
tmp_idx
-=
coordinate
*
out_stride
[
i
];
in_idx
+=
coordinate
*
in_stride
[
axis
[
i
]];
}
out_ptr
[
out_idx
]
=
in_ptr
[
in_idx
];
}
};
transpose_helper
(
0
,
out
->
numel
());
}
};
// define transpose normal
#define DEFINE_CPU_TRANS_NORMAL(TYPE) \
template struct TransposeNormal<platform::CPUDeviceContext, TYPE>
DEFINE_CPU_TRANS_NORMAL
(
platform
::
float16
);
DEFINE_CPU_TRANS_NORMAL
(
platform
::
bfloat16
);
DEFINE_CPU_TRANS_NORMAL
(
float
);
DEFINE_CPU_TRANS_NORMAL
(
double
);
DEFINE_CPU_TRANS_NORMAL
(
int
);
DEFINE_CPU_TRANS_NORMAL
(
int64_t
);
DEFINE_CPU_TRANS_NORMAL
(
bool
);
DEFINE_CPU_TRANS_NORMAL
(
int16_t
);
DEFINE_CPU_TRANS_NORMAL
(
uint8_t
);
DEFINE_CPU_TRANS_NORMAL
(
int8_t
);
DEFINE_CPU_TRANS_NORMAL
(
platform
::
complex
<
float
>
);
DEFINE_CPU_TRANS_NORMAL
(
platform
::
complex
<
double
>
);
struct
TensorSetConstantCPU
{
TensorSetConstantCPU
(
framework
::
Tensor
*
tensor
,
float
value
)
:
tensor_
(
tensor
),
value_
(
value
)
{}
template
<
typename
T
>
void
apply
()
const
{
auto
cpu
=
platform
::
CPUPlace
();
auto
*
begin
=
tensor_
->
mutable_data
<
T
>
(
cpu
);
std
::
fill
(
begin
,
begin
+
tensor_
->
numel
(),
static_cast
<
T
>
(
value_
));
}
framework
::
Tensor
*
tensor_
;
float
value_
;
};
template
<
>
void
set_constant_with_place
<
platform
::
XPUPlace
>
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
float
value
)
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"XPUPlace is not supported"
));
}
template
<
>
void
set_constant_with_place
<
platform
::
NPUPlace
>
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
float
value
)
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"NPUPlace is not supported"
));
}
template
<
>
void
set_constant_with_place
<
platform
::
NPUPinnedPlace
>
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
float
value
)
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"NPUPinnedPlace is not supported"
));
}
template
<
>
void
set_constant_with_place
<
platform
::
IPUPlace
>
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
float
value
)
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"IPUPlace is not supported"
));
}
template
<
>
void
set_constant_with_place
<
platform
::
CPUPlace
>
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
float
value
)
{
framework
::
VisitDataType
(
tensor
->
type
(),
TensorSetConstantCPU
(
tensor
,
value
));
}
template
<
>
void
set_constant_with_place
<
platform
::
MLUPlace
>
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
float
value
)
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"MLUPlace is not supported"
));
}
template
<
>
void
set_constant_with_place
<
platform
::
CustomPlace
>
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
float
value
)
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"CustomPlace is not supported"
));
}
template
<
>
void
set_constant_with_place
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
float
value
)
{
framework
::
VisitDataType
(
tensor
->
type
(),
TensorSetConstantCPU
(
tensor
,
value
));
}
struct
TensorSetConstantWithPlace
:
public
boost
::
static_visitor
<
void
>
{
TensorSetConstantWithPlace
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
float
value
)
:
context_
(
context
),
tensor_
(
tensor
),
value_
(
value
)
{}
template
<
typename
Place
>
void
operator
()(
Place
place
)
const
{
set_constant_with_place
<
Place
>
(
context_
,
tensor_
,
value_
);
}
const
platform
::
DeviceContext
&
context_
;
framework
::
Tensor
*
tensor_
;
float
value_
;
};
void
set_constant
(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
*
tensor
,
float
value
)
{
TensorSetConstantWithPlace
func
(
context
,
tensor
,
value
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// tensor->place().apply_visitor(func);
paddle
::
platform
::
VisitPlace
(
tensor
->
place
(),
func
);
#else
func
(
platform
::
CPUPlace
());
#endif
}
template
<
typename
T
>
struct
RowwiseAdd
<
platform
::
CPUDeviceContext
,
T
>
{
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
Tensor
&
input
,
const
framework
::
Tensor
&
vector
,
framework
::
Tensor
*
output
)
{
auto
in_dims
=
input
.
dims
();
auto
out_dims
=
output
->
dims
();
auto
size
=
input
.
numel
()
/
in_dims
[
0
];
PADDLE_ENFORCE_EQ
(
vector
.
numel
(),
size
,
platform
::
errors
::
InvalidArgument
(
"The input vector size"
" should be equal to the size of each row of input tensor."
" Expected vector size=%d, but received %d"
,
size
,
vector
.
numel
()));
const
char
*
in_dims_cstr
=
in_dims
.
to_str
().
c_str
();
const
char
*
out_dims_cstr
=
out_dims
.
to_str
().
c_str
();
PADDLE_ENFORCE_EQ
(
out_dims
,
in_dims
,
platform
::
errors
::
InvalidArgument
(
"The output tensor shape should be same as the input"
" tensor shape. Expected output tensor shape: %s,"
" but received %s"
,
in_dims_cstr
,
out_dims_cstr
));
auto
in
=
framework
::
EigenMatrix
<
T
>::
From
(
input
);
auto
vec
=
framework
::
EigenVector
<
T
>::
Flatten
(
vector
);
auto
out
=
framework
::
EigenMatrix
<
T
>::
From
(
*
output
);
for
(
int64_t
i
=
0
;
i
<
in_dims
[
0
];
++
i
)
{
out
.
chip
(
i
,
0
)
=
in
.
chip
(
i
,
0
)
+
vec
;
}
}
};
template
struct
RowwiseAdd
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
RowwiseAdd
<
platform
::
CPUDeviceContext
,
double
>;
template
struct
ColwiseSum
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
ColwiseSum
<
platform
::
CPUDeviceContext
,
double
>;
template
struct
ColwiseSum
<
platform
::
CPUDeviceContext
,
int
>;
template
struct
ColwiseSum
<
platform
::
CPUDeviceContext
,
int64_t
>;
template
struct
RowwiseSum
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
RowwiseSum
<
platform
::
CPUDeviceContext
,
double
>;
template
struct
RowwiseMean
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
RowwiseMean
<
platform
::
CPUDeviceContext
,
double
>;
template
<
typename
T
>
struct
ElementwiseAddTo
<
platform
::
CPUDeviceContext
,
T
>
{
void
operator
()(
platform
::
CPUDeviceContext
*
ctx
,
const
framework
::
Tensor
&
src
,
framework
::
Tensor
*
dst
)
{
auto
in
=
framework
::
EigenVector
<
T
>::
Flatten
(
src
);
auto
out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
dst
);
auto
&
place
=
*
(
ctx
->
eigen_device
());
out
.
device
(
place
)
=
out
+
in
;
}
};
template
struct
ElementwiseAddTo
<
platform
::
CPUDeviceContext
,
platform
::
float16
>;
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/math/maxouting.cc
浏览文件 @
09096aeb
...
@@ -109,11 +109,6 @@ void MaxOutGradFunctor<DeviceContext, T>::operator()(
...
@@ -109,11 +109,6 @@ void MaxOutGradFunctor<DeviceContext, T>::operator()(
}
}
}
}
template
class
MaxOutGradFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
MaxOutGradFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
class
MaxOutFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
MaxOutFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
class
MaxOutGradFunctor
<
phi
::
CPUContext
,
float
>;
template
class
MaxOutGradFunctor
<
phi
::
CPUContext
,
float
>;
template
class
MaxOutGradFunctor
<
phi
::
CPUContext
,
double
>;
template
class
MaxOutGradFunctor
<
phi
::
CPUContext
,
double
>;
template
class
MaxOutFunctor
<
phi
::
CPUContext
,
float
>;
template
class
MaxOutFunctor
<
phi
::
CPUContext
,
float
>;
...
...
paddle/fluid/operators/math/sample_prob.cc
浏览文件 @
09096aeb
...
@@ -14,19 +14,8 @@ limitations under the License. */
...
@@ -14,19 +14,8 @@ limitations under the License. */
#include "paddle/fluid/operators/math/sample_prob.h"
#include "paddle/fluid/operators/math/sample_prob.h"
namespace
paddle
{
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{}
// namespace math
template
class
SampleWithProb
<
platform
::
CPUDeviceContext
,
float
>;
template
class
SampleWithProb
<
platform
::
CPUDeviceContext
,
double
>;
}
// namespace math
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
paddle/fluid/operators/math/selected_rows_functor.cc
浏览文件 @
09096aeb
...
@@ -276,51 +276,6 @@ struct SelectedRowsSumTo<platform::CPUDeviceContext, T> {
...
@@ -276,51 +276,6 @@ struct SelectedRowsSumTo<platform::CPUDeviceContext, T> {
template
struct
SelectedRowsSumTo
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
SelectedRowsSumTo
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
SelectedRowsSumTo
<
platform
::
CPUDeviceContext
,
double
>;
template
struct
SelectedRowsSumTo
<
platform
::
CPUDeviceContext
,
double
>;
template
<
typename
T
>
struct
SelectedRowsAddToTensor
<
platform
::
CPUDeviceContext
,
T
>
{
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
)
{
if
(
UNLIKELY
(
input1
.
rows
().
size
()
==
0
))
{
LOG
(
WARNING
)
<<
"input selected rows is empty!"
;
return
;
}
auto
in1_height
=
input1
.
height
();
const
auto
&
in2_dims
=
input2
->
dims
();
PADDLE_ENFORCE_EQ
(
in1_height
,
in2_dims
[
0
],
platform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"[%d], second input height = [%d]"
,
in1_height
,
in2_dims
[
0
]));
auto
&
in1_value
=
input1
.
value
();
auto
&
in1_rows
=
input1
.
rows
();
int64_t
in1_row_numel
=
in1_value
.
numel
()
/
in1_rows
.
size
();
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
input2
->
numel
()
/
in1_height
,
platform
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
input2
->
numel
()
/
in1_height
));
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
*
input2_data
=
input2
->
data
<
T
>
();
for
(
size_t
i
=
0
;
i
<
in1_rows
.
size
();
i
++
)
{
for
(
int64_t
j
=
0
;
j
<
in1_row_numel
;
j
++
)
{
input2_data
[
in1_rows
[
i
]
*
in1_row_numel
+
j
]
+=
in1_data
[
i
*
in1_row_numel
+
j
];
}
}
}
};
template
<
typename
T
>
template
<
typename
T
>
struct
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
T
>
{
struct
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
T
>
{
void
operator
()(
const
phi
::
CPUContext
&
context
,
void
operator
()(
const
phi
::
CPUContext
&
context
,
...
@@ -366,13 +321,6 @@ struct SelectedRowsAddToTensor<phi::CPUContext, T> {
...
@@ -366,13 +321,6 @@ struct SelectedRowsAddToTensor<phi::CPUContext, T> {
}
}
};
};
template
struct
SelectedRowsAddToTensor
<
platform
::
CPUDeviceContext
,
float
>;
template
struct
SelectedRowsAddToTensor
<
platform
::
CPUDeviceContext
,
double
>;
template
struct
SelectedRowsAddToTensor
<
platform
::
CPUDeviceContext
,
int
>;
template
struct
SelectedRowsAddToTensor
<
platform
::
CPUDeviceContext
,
int64_t
>;
template
struct
SelectedRowsAddToTensor
<
platform
::
CPUDeviceContext
,
platform
::
bfloat16
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
float
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
float
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
double
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
double
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
int
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
int
>;
...
@@ -582,34 +530,6 @@ struct MergeAddImpl {
...
@@ -582,34 +530,6 @@ struct MergeAddImpl {
}
}
};
};
template
<
typename
T
>
struct
MergeAdd
<
platform
::
CPUDeviceContext
,
T
>
{
// unary functor, merge by adding duplicated rows in
// the input SelectedRows object.
phi
::
SelectedRows
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
phi
::
SelectedRows
&
input
,
const
bool
sorted_result
)
{
return
MergeAddImpl
<
platform
::
CPUDeviceContext
,
T
>
()(
context
,
input
,
sorted_result
);
}
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
phi
::
SelectedRows
&
input
,
phi
::
SelectedRows
*
output
,
const
bool
sorted_result
)
{
MergeAddImpl
<
platform
::
CPUDeviceContext
,
T
>
()(
context
,
input
,
output
,
sorted_result
);
}
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
std
::
vector
<
const
phi
::
SelectedRows
*>&
inputs
,
phi
::
SelectedRows
*
output
,
const
bool
sorted_result
)
{
MergeAddImpl
<
platform
::
CPUDeviceContext
,
T
>
()(
context
,
inputs
,
output
,
sorted_result
);
}
};
template
<
typename
T
>
template
<
typename
T
>
struct
MergeAdd
<
phi
::
CPUContext
,
T
>
{
struct
MergeAdd
<
phi
::
CPUContext
,
T
>
{
// unary functor, merge by adding duplicated rows in
// unary functor, merge by adding duplicated rows in
...
@@ -636,9 +556,7 @@ struct MergeAdd<phi::CPUContext, T> {
...
@@ -636,9 +556,7 @@ struct MergeAdd<phi::CPUContext, T> {
};
};
#define TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(dtype) \
#define TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(dtype) \
template struct MergeAddImpl<platform::CPUDeviceContext, dtype>; \
template struct MergeAddImpl<phi::CPUContext, dtype>; \
template struct MergeAddImpl<phi::CPUContext, dtype>; \
template struct MergeAdd<platform::CPUDeviceContext, dtype>; \
template struct MergeAdd<phi::CPUContext, dtype>;
template struct MergeAdd<phi::CPUContext, dtype>;
TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU
(
float
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU
(
float
)
...
...
paddle/fluid/operators/math/sequence_padding.cc
浏览文件 @
09096aeb
...
@@ -20,13 +20,6 @@ namespace phi {
...
@@ -20,13 +20,6 @@ namespace phi {
class
DenseTensor
;
class
DenseTensor
;
}
// namespace phi
}
// namespace phi
namespace
paddle
{
namespace
framework
{}
// namespace framework
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
...
@@ -101,66 +94,6 @@ static void fast_mem_init(void* dest,
...
@@ -101,66 +94,6 @@ static void fast_mem_init(void* dest,
}
}
}
}
template
<
typename
T
>
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
LoDTensor
&
seq_tensor
,
framework
::
LoDTensor
*
pad_tensor
,
const
framework
::
LoDTensor
&
pad_value
,
int
pad_seq_len
=
-
1
,
int
lod_level
=
0
,
bool
norm_by_times
=
false
,
const
PadLayout
layout
=
kBatchLengthWidth
)
{
auto
seq_lod
=
seq_tensor
.
lod
();
const
auto
seq_offsets
=
framework
::
ToAbsOffset
(
seq_lod
)[
lod_level
];
const
auto
&
seq_tensor_dims
=
seq_tensor
.
dims
();
const
auto
&
pad_tensor_dims
=
pad_tensor
->
dims
();
if
(
pad_seq_len
==
-
1
)
{
pad_seq_len
=
MaximumSequenceLength
(
seq_offsets
);
}
int
step_width
=
seq_tensor
.
numel
()
/
seq_tensor_dims
[
0
];
CheckDims
(
seq_tensor_dims
,
pad_tensor_dims
,
seq_offsets
,
pad_seq_len
,
step_width
,
layout
);
PADDLE_ENFORCE_EQ
(
pad_value
.
numel
()
==
1
||
pad_value
.
numel
()
==
step_width
,
true
,
platform
::
errors
::
InvalidArgument
(
"The numel of 'pad_value' can only be 1 or be equal to the "
"'step_width', but got %ld != 1 and %ld. Please check the input "
"value."
,
pad_value
.
numel
(),
step_width
));
// fill padding value
T
*
pad_data
=
pad_tensor
->
data
<
T
>
();
const
T
*
pad_value_data
=
pad_value
.
data
<
T
>
();
if
(
pad_value
.
numel
()
==
1
)
{
fast_mem_init
<
T
>
(
pad_data
,
pad_tensor
->
numel
(),
pad_value_data
,
sizeof
(
T
));
}
else
{
for
(
int
i
=
0
;
i
<
pad_tensor
->
numel
();
i
+=
step_width
)
{
memcpy
(
pad_data
+
i
,
pad_value_data
,
step_width
*
sizeof
(
T
));
}
}
CopyValidData
<
T
>
(
pad_tensor
,
&
seq_tensor
,
seq_offsets
,
pad_seq_len
,
step_width
,
norm_by_times
,
kSeqToPad
,
layout
);
}
};
template
<
typename
T
>
template
<
typename
T
>
class
PaddingLoDTensorFunctor
<
phi
::
CPUContext
,
T
>
{
class
PaddingLoDTensorFunctor
<
phi
::
CPUContext
,
T
>
{
public:
public:
...
@@ -221,42 +154,6 @@ class PaddingLoDTensorFunctor<phi::CPUContext, T> {
...
@@ -221,42 +154,6 @@ class PaddingLoDTensorFunctor<phi::CPUContext, T> {
}
}
};
};
template
<
typename
T
>
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
LoDTensor
&
pad_tensor
,
framework
::
LoDTensor
*
seq_tensor
,
int
pad_seq_len
=
-
1
,
int
lod_level
=
0
,
bool
norm_by_times
=
false
,
const
PadLayout
layout
=
kBatchLengthWidth
)
{
auto
seq_offsets
=
framework
::
ToAbsOffset
(
seq_tensor
->
lod
())[
lod_level
];
const
auto
&
seq_tensor_dims
=
seq_tensor
->
dims
();
const
auto
&
pad_tensor_dims
=
pad_tensor
.
dims
();
if
(
pad_seq_len
==
-
1
)
{
pad_seq_len
=
MaximumSequenceLength
(
seq_offsets
);
}
int
step_width
=
seq_tensor
->
numel
()
/
seq_tensor_dims
[
0
];
CheckDims
(
seq_tensor_dims
,
pad_tensor_dims
,
seq_offsets
,
pad_seq_len
,
step_width
,
layout
);
CopyValidData
<
T
>
(
seq_tensor
,
&
pad_tensor
,
seq_offsets
,
pad_seq_len
,
step_width
,
norm_by_times
,
kPadToSeq
,
layout
);
}
};
template
<
typename
T
>
template
<
typename
T
>
class
UnpaddingLoDTensorFunctor
<
phi
::
CPUContext
,
T
>
{
class
UnpaddingLoDTensorFunctor
<
phi
::
CPUContext
,
T
>
{
public:
public:
...
@@ -293,16 +190,6 @@ class UnpaddingLoDTensorFunctor<phi::CPUContext, T> {
...
@@ -293,16 +190,6 @@ class UnpaddingLoDTensorFunctor<phi::CPUContext, T> {
}
}
};
};
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
int
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
int64_t
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
PaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
int
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
int64_t
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
UnpaddingLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
class
PaddingLoDTensorFunctor
<
phi
::
CPUContext
,
int
>;
template
class
PaddingLoDTensorFunctor
<
phi
::
CPUContext
,
int
>;
template
class
PaddingLoDTensorFunctor
<
phi
::
CPUContext
,
int64_t
>;
template
class
PaddingLoDTensorFunctor
<
phi
::
CPUContext
,
int64_t
>;
template
class
PaddingLoDTensorFunctor
<
phi
::
CPUContext
,
float
>;
template
class
PaddingLoDTensorFunctor
<
phi
::
CPUContext
,
float
>;
...
...
paddle/fluid/operators/math/sequence_scale.cc
浏览文件 @
09096aeb
...
@@ -24,29 +24,6 @@ namespace paddle {
...
@@ -24,29 +24,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
template
<
typename
T
>
class
ScaleLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
T
*
scales
,
framework
::
LoDTensor
*
seq
)
{
const
size_t
level
=
0
;
auto
lod
=
seq
->
lod
();
const
size_t
num_seq
=
lod
[
level
].
size
()
-
1
;
size_t
seq_width
=
seq
->
dims
()[
1
];
framework
::
LoD
abs_offset_lod
=
framework
::
ToAbsOffset
(
lod
);
T
*
seq_data
=
seq
->
mutable_data
<
T
>
(
context
.
GetPlace
());
for
(
size_t
i
=
0
;
i
<
num_seq
;
++
i
)
{
for
(
size_t
j
=
lod
[
level
][
i
]
*
seq_width
;
j
<
lod
[
level
][
i
+
1
]
*
seq_width
;
++
j
)
{
seq_data
[
j
]
*=
scales
[
i
];
}
}
}
};
template
<
typename
T
>
template
<
typename
T
>
class
ScaleLoDTensorFunctor
<
phi
::
CPUContext
,
T
>
{
class
ScaleLoDTensorFunctor
<
phi
::
CPUContext
,
T
>
{
public:
public:
...
@@ -70,9 +47,6 @@ class ScaleLoDTensorFunctor<phi::CPUContext, T> {
...
@@ -70,9 +47,6 @@ class ScaleLoDTensorFunctor<phi::CPUContext, T> {
}
}
};
};
template
class
ScaleLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
ScaleLoDTensorFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
class
ScaleLoDTensorFunctor
<
phi
::
CPUContext
,
float
>;
template
class
ScaleLoDTensorFunctor
<
phi
::
CPUContext
,
float
>;
template
class
ScaleLoDTensorFunctor
<
phi
::
CPUContext
,
double
>;
template
class
ScaleLoDTensorFunctor
<
phi
::
CPUContext
,
double
>;
...
...
paddle/fluid/operators/math/softmax.cc
浏览文件 @
09096aeb
...
@@ -21,13 +21,6 @@ namespace paddle {
...
@@ -21,13 +21,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
float
,
true
>;
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
float
,
false
>;
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
double
,
true
>;
template
class
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
double
,
false
>;
template
class
SoftmaxGradFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
SoftmaxGradFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
class
SoftmaxFunctor
<
phi
::
CPUContext
,
float
,
true
>;
template
class
SoftmaxFunctor
<
phi
::
CPUContext
,
float
,
true
>;
template
class
SoftmaxFunctor
<
phi
::
CPUContext
,
float
,
false
>;
template
class
SoftmaxFunctor
<
phi
::
CPUContext
,
float
,
false
>;
template
class
SoftmaxFunctor
<
phi
::
CPUContext
,
double
,
true
>;
template
class
SoftmaxFunctor
<
phi
::
CPUContext
,
double
,
true
>;
...
...
paddle/fluid/operators/math/vol2col.cc
浏览文件 @
09096aeb
...
@@ -16,12 +16,6 @@ limitations under the License. */
...
@@ -16,12 +16,6 @@ limitations under the License. */
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
namespace
paddle
{
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
...
@@ -32,126 +26,6 @@ namespace math {
...
@@ -32,126 +26,6 @@ namespace math {
* [input_channels, filter_depth, filter_height, filter_width,
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
* output_depth, output_height, output_width]
*/
*/
template
<
class
T
>
class
Vol2ColFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
Tensor
&
vol
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
col
,
const
DataLayout
data_layout
)
const
{
PADDLE_ENFORCE_EQ
(
vol
.
dims
().
size
(),
4
,
platform
::
errors
::
InvalidArgument
(
"The dimension of vol should be 4, but received %d."
,
vol
.
dims
().
size
()));
PADDLE_ENFORCE_EQ
(
col
->
dims
().
size
(),
7
,
platform
::
errors
::
InvalidArgument
(
"The dimension of col should be 7, but received %d."
,
col
->
dims
().
size
()));
int
input_channels
=
(
data_layout
!=
DataLayout
::
kNHWC
?
vol
.
dims
()[
0
]
:
vol
.
dims
()[
3
]);
int
input_depth
=
(
data_layout
!=
DataLayout
::
kNHWC
?
vol
.
dims
()[
1
]
:
vol
.
dims
()[
0
]);
int
input_height
=
(
data_layout
!=
DataLayout
::
kNHWC
?
vol
.
dims
()[
2
]
:
vol
.
dims
()[
1
]);
int
input_width
=
(
data_layout
!=
DataLayout
::
kNHWC
?
vol
.
dims
()[
3
]
:
vol
.
dims
()[
2
]);
int
filter_depth
=
col
->
dims
()[
1
];
int
filter_height
=
col
->
dims
()[
2
];
int
filter_width
=
col
->
dims
()[
3
];
int
output_depth
=
col
->
dims
()[
4
];
int
output_height
=
col
->
dims
()[
5
];
int
output_width
=
col
->
dims
()[
6
];
int
channels_col
=
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
// changed
bool
paddings_size_is_6
=
(
paddings
.
size
()
==
6
);
int
pad_d_forth
=
paddings_size_is_6
?
paddings
[
0
]
:
paddings
[
0
];
int
pad_d_back
=
paddings_size_is_6
?
paddings
[
1
]
:
paddings
[
0
];
int
pad_h_up
=
paddings_size_is_6
?
paddings
[
2
]
:
paddings
[
1
];
int
pad_h_down
=
paddings_size_is_6
?
paddings
[
3
]
:
paddings
[
1
];
int
pad_w_left
=
paddings_size_is_6
?
paddings
[
4
]
:
paddings
[
2
];
int
pad_w_right
=
paddings_size_is_6
?
paddings
[
5
]
:
paddings
[
2
];
auto
input_depth_tmp
=
(
input_depth
+
pad_d_forth
+
pad_d_back
-
((
dilations
[
0
]
*
(
filter_depth
-
1
)
+
1
)))
/
strides
[
0
]
+
1
;
PADDLE_ENFORCE_EQ
(
input_depth_tmp
,
output_depth
,
platform
::
errors
::
InvalidArgument
(
"input_depth(%d) and output_depth(%d) are mismatching."
,
input_depth_tmp
,
output_depth
));
auto
input_height_tmp
=
(
input_height
+
pad_h_up
+
pad_h_down
-
((
dilations
[
1
]
*
(
filter_height
-
1
)
+
1
)))
/
strides
[
1
]
+
1
;
PADDLE_ENFORCE_EQ
(
input_height_tmp
,
output_height
,
platform
::
errors
::
InvalidArgument
(
"input_height(%d) and output_height(%d) are mismatching."
,
input_height_tmp
,
output_height
));
auto
input_width_tmp
=
(
input_width
+
pad_w_left
+
pad_w_right
-
((
dilations
[
2
]
*
(
filter_width
-
1
)
+
1
)))
/
strides
[
2
]
+
1
;
PADDLE_ENFORCE_EQ
(
input_width_tmp
,
output_width
,
platform
::
errors
::
InvalidArgument
(
"input_width(%d) and output_width(%d) are mismatching."
,
input_width_tmp
,
output_width
));
const
T
*
vol_data
=
vol
.
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
filter_width
;
int
h_offset
=
(
c
/
filter_width
)
%
filter_height
;
int
d_offset
=
(
c
/
filter_width
/
filter_height
)
%
filter_depth
;
int
c_in
=
c
/
filter_width
/
filter_height
/
filter_depth
;
for
(
int
d
=
0
;
d
<
output_depth
;
++
d
)
{
int
d_pad
=
d
*
strides
[
0
]
-
pad_d_forth
+
d_offset
*
dilations
[
0
];
for
(
int
h
=
0
;
h
<
output_height
;
++
h
)
{
int
h_pad
=
h
*
strides
[
1
]
-
pad_h_up
+
h_offset
*
dilations
[
1
];
for
(
int
w
=
0
;
w
<
output_width
;
++
w
)
{
int
w_pad
=
w
*
strides
[
2
]
-
pad_w_left
+
w_offset
*
dilations
[
2
];
int
col_idx
=
((
c
*
output_depth
+
d
)
*
output_height
+
h
)
*
output_width
+
w
;
int
vol_idx
;
if
(
data_layout
!=
DataLayout
::
kNHWC
)
{
vol_idx
=
((
c_in
*
input_depth
+
d_pad
)
*
input_height
+
h_pad
)
*
input_width
+
w_pad
;
}
else
{
vol_idx
=
((
d_pad
*
input_height
+
h_pad
)
*
input_width
+
w_pad
)
*
input_channels
+
c_in
;
}
col_data
[
col_idx
]
=
(
h_pad
<
0
||
h_pad
>=
input_height
||
w_pad
<
0
||
w_pad
>=
input_width
||
d_pad
<
0
||
d_pad
>=
input_depth
)
?
static_cast
<
T
>
(
0
)
:
vol_data
[
vol_idx
];
}
}
}
}
}
};
template
<
class
T
>
template
<
class
T
>
class
Vol2ColFunctor
<
phi
::
CPUContext
,
T
>
{
class
Vol2ColFunctor
<
phi
::
CPUContext
,
T
>
{
public:
public:
...
@@ -278,126 +152,6 @@ class Vol2ColFunctor<phi::CPUContext, T> {
...
@@ -278,126 +152,6 @@ class Vol2ColFunctor<phi::CPUContext, T> {
* [input_channels, filter_depth, filter_height, filter_width,
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
* output_depth, output_height, output_width]
*/
*/
template
<
class
T
>
class
Col2VolFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
public:
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
const
framework
::
Tensor
&
col
,
const
std
::
vector
<
int
>&
dilations
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
paddings
,
framework
::
Tensor
*
vol
,
const
DataLayout
data_layout
)
const
{
PADDLE_ENFORCE_EQ
(
vol
->
dims
().
size
(),
4
,
platform
::
errors
::
InvalidArgument
(
"The dimension of vol should be 4, but received %d."
,
vol
->
dims
().
size
()));
PADDLE_ENFORCE_EQ
(
col
.
dims
().
size
(),
7
,
platform
::
errors
::
InvalidArgument
(
"The dimension of col should be 7, but received %d."
,
col
.
dims
().
size
()));
int
input_channels
=
(
data_layout
!=
DataLayout
::
kNHWC
?
vol
->
dims
()[
0
]
:
vol
->
dims
()[
3
]);
int
input_depth
=
(
data_layout
!=
DataLayout
::
kNHWC
?
vol
->
dims
()[
1
]
:
vol
->
dims
()[
0
]);
int
input_height
=
(
data_layout
!=
DataLayout
::
kNHWC
?
vol
->
dims
()[
2
]
:
vol
->
dims
()[
1
]);
int
input_width
=
(
data_layout
!=
DataLayout
::
kNHWC
?
vol
->
dims
()[
3
]
:
vol
->
dims
()[
2
]);
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
3
];
int
output_depth
=
col
.
dims
()[
4
];
int
output_height
=
col
.
dims
()[
5
];
int
output_width
=
col
.
dims
()[
6
];
int
channels_col
=
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
bool
paddings_size_is_6
=
(
paddings
.
size
()
==
6
);
int
pad_d_forth
=
paddings_size_is_6
?
paddings
[
0
]
:
paddings
[
0
];
int
pad_d_back
=
paddings_size_is_6
?
paddings
[
1
]
:
paddings
[
0
];
int
pad_h_up
=
paddings_size_is_6
?
paddings
[
2
]
:
paddings
[
1
];
int
pad_h_down
=
paddings_size_is_6
?
paddings
[
3
]
:
paddings
[
1
];
int
pad_w_left
=
paddings_size_is_6
?
paddings
[
4
]
:
paddings
[
2
];
int
pad_w_right
=
paddings_size_is_6
?
paddings
[
5
]
:
paddings
[
2
];
auto
input_depth_tmp
=
(
input_depth
+
pad_d_forth
+
pad_d_back
-
((
dilations
[
0
]
*
(
filter_depth
-
1
)
+
1
)))
/
strides
[
0
]
+
1
;
PADDLE_ENFORCE_EQ
(
input_depth_tmp
,
output_depth
,
platform
::
errors
::
InvalidArgument
(
"input_depth(%d) and output_depth(%d) are mismatching."
,
input_depth_tmp
,
output_depth
));
auto
input_height_tmp
=
(
input_height
+
pad_h_up
+
pad_h_down
-
((
dilations
[
1
]
*
(
filter_height
-
1
)
+
1
)))
/
strides
[
1
]
+
1
;
PADDLE_ENFORCE_EQ
(
input_height_tmp
,
output_height
,
platform
::
errors
::
InvalidArgument
(
"input_height(%d) and output_height(%d) are mismatching."
,
input_height_tmp
,
output_height
));
auto
input_width_tmp
=
(
input_width
+
pad_w_left
+
pad_w_right
-
((
dilations
[
2
]
*
(
filter_width
-
1
)
+
1
)))
/
strides
[
2
]
+
1
;
PADDLE_ENFORCE_EQ
(
input_width_tmp
,
output_width
,
platform
::
errors
::
InvalidArgument
(
"input_width(%d) and output_width(%d) are mismatching."
,
input_width_tmp
,
output_width
));
T
*
vol_data
=
vol
->
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
filter_width
;
int
h_offset
=
(
c
/
filter_width
)
%
filter_height
;
int
d_offset
=
(
c
/
filter_width
/
filter_height
)
%
filter_depth
;
int
cIm
=
c
/
filter_width
/
filter_height
/
filter_depth
;
for
(
int
d
=
0
;
d
<
output_depth
;
++
d
)
{
int
d_pad
=
d
*
strides
[
0
]
-
pad_d_forth
+
d_offset
*
dilations
[
0
];
for
(
int
h
=
0
;
h
<
output_height
;
++
h
)
{
int
h_pad
=
h
*
strides
[
1
]
-
pad_h_up
+
h_offset
*
dilations
[
1
];
for
(
int
w
=
0
;
w
<
output_width
;
++
w
)
{
int
w_pad
=
w
*
strides
[
2
]
-
pad_w_left
+
w_offset
*
dilations
[
2
];
if
(
h_pad
>=
0
&&
h_pad
<
input_height
&&
w_pad
>=
0
&&
w_pad
<
input_width
&&
d_pad
>=
0
&&
d_pad
<
input_depth
)
{
int
vol_idx
;
if
(
data_layout
!=
DataLayout
::
kNHWC
)
{
vol_idx
=
((
cIm
*
input_depth
+
d_pad
)
*
input_height
+
h_pad
)
*
input_width
+
w_pad
;
}
else
{
vol_idx
=
((
d_pad
*
input_height
+
h_pad
)
*
input_width
+
w_pad
)
*
input_channels
+
cIm
;
}
int
col_idx
=
((
c
*
output_depth
+
d
)
*
output_height
+
h
)
*
output_width
+
w
;
vol_data
[
vol_idx
]
+=
col_data
[
col_idx
];
}
}
}
}
}
}
};
template
<
class
T
>
template
<
class
T
>
class
Col2VolFunctor
<
phi
::
CPUContext
,
T
>
{
class
Col2VolFunctor
<
phi
::
CPUContext
,
T
>
{
public:
public:
...
@@ -518,13 +272,9 @@ class Col2VolFunctor<phi::CPUContext, T> {
...
@@ -518,13 +272,9 @@ class Col2VolFunctor<phi::CPUContext, T> {
}
}
};
};
template
class
Vol2ColFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
Vol2ColFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
class
Vol2ColFunctor
<
phi
::
CPUContext
,
float
>;
template
class
Vol2ColFunctor
<
phi
::
CPUContext
,
float
>;
template
class
Vol2ColFunctor
<
phi
::
CPUContext
,
double
>;
template
class
Vol2ColFunctor
<
phi
::
CPUContext
,
double
>;
template
class
Col2VolFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
class
Col2VolFunctor
<
platform
::
CPUDeviceContext
,
double
>;
template
class
Col2VolFunctor
<
phi
::
CPUContext
,
float
>;
template
class
Col2VolFunctor
<
phi
::
CPUContext
,
float
>;
template
class
Col2VolFunctor
<
phi
::
CPUContext
,
double
>;
template
class
Col2VolFunctor
<
phi
::
CPUContext
,
double
>;
...
...
paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
浏览文件 @
09096aeb
...
@@ -34,7 +34,6 @@ class DenseTensor;
...
@@ -34,7 +34,6 @@ class DenseTensor;
namespace
paddle
{
namespace
paddle
{
namespace
framework
{}
// namespace framework
namespace
framework
{}
// namespace framework
namespace
platform
{
namespace
platform
{
class
CPUDeviceContext
;
class
MKLDNNDeviceContext
;
class
MKLDNNDeviceContext
;
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
...
...
paddle/fluid/operators/rank_loss_op.cc
浏览文件 @
09096aeb
...
@@ -24,9 +24,6 @@ class OpDesc;
...
@@ -24,9 +24,6 @@ class OpDesc;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc
浏览文件 @
09096aeb
...
@@ -27,9 +27,6 @@ class OpDesc;
...
@@ -27,9 +27,6 @@ class OpDesc;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/reduce_ops/reduce_all_op.cc
浏览文件 @
09096aeb
...
@@ -27,9 +27,6 @@ class EmptyGradOpMaker;
...
@@ -27,9 +27,6 @@ class EmptyGradOpMaker;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
DECLARE_INFER_SHAPE_FUNCTOR
(
reduce_all
,
DECLARE_INFER_SHAPE_FUNCTOR
(
reduce_all
,
...
...
paddle/fluid/operators/reduce_ops/reduce_any_op.cc
浏览文件 @
09096aeb
...
@@ -26,9 +26,6 @@ class EmptyGradOpMaker;
...
@@ -26,9 +26,6 @@ class EmptyGradOpMaker;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
DECLARE_INFER_SHAPE_FUNCTOR
(
reduce_any
,
DECLARE_INFER_SHAPE_FUNCTOR
(
reduce_any
,
...
...
paddle/fluid/operators/reduce_ops/reduce_prod_op.cc
浏览文件 @
09096aeb
...
@@ -25,9 +25,6 @@ class OpDesc;
...
@@ -25,9 +25,6 @@ class OpDesc;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/reduce_ops/reduce_sum_op.cc
浏览文件 @
09096aeb
...
@@ -27,9 +27,6 @@ class OpDesc;
...
@@ -27,9 +27,6 @@ class OpDesc;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/set_value_op.cc
浏览文件 @
09096aeb
...
@@ -31,9 +31,6 @@ class EmptyGradOpMaker;
...
@@ -31,9 +31,6 @@ class EmptyGradOpMaker;
namespace
imperative
{
namespace
imperative
{
class
OpBase
;
class
OpBase
;
}
// namespace imperative
}
// namespace imperative
namespace
platform
{
class
CPUDeviceContext
;
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
09096aeb
...
@@ -367,14 +367,6 @@ DeviceContextPool::DeviceContextPool(
...
@@ -367,14 +367,6 @@ DeviceContextPool::DeviceContextPool(
/*disable_setting_default_stream_for_allocator=*/
false
);
/*disable_setting_default_stream_for_allocator=*/
false
);
}
}
CPUDeviceContext
::
CPUDeviceContext
()
:
phi
::
CPUContext
()
{
phi
::
CPUContext
::
Init
();
}
CPUDeviceContext
::
CPUDeviceContext
(
CPUPlace
place
)
:
phi
::
CPUContext
(
place
)
{
phi
::
CPUContext
::
Init
();
}
#ifdef PADDLE_WITH_IPU
#ifdef PADDLE_WITH_IPU
IPUDeviceContext
::
IPUDeviceContext
(
IPUPlace
place
)
:
place_
(
place
)
{}
IPUDeviceContext
::
IPUDeviceContext
(
IPUPlace
place
)
:
place_
(
place
)
{}
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
09096aeb
...
@@ -134,14 +134,7 @@ constexpr DeviceType kMLU = DeviceType::MLU;
...
@@ -134,14 +134,7 @@ constexpr DeviceType kMLU = DeviceType::MLU;
using
DeviceContext
=
phi
::
DeviceContext
;
using
DeviceContext
=
phi
::
DeviceContext
;
// using CPUDeviceContext = phi::CPUContext;
using
CPUDeviceContext
=
phi
::
CPUContext
;
// TODO(wilber): The place constructor is used in many places, it is more
// difficult to use CPUDeviceContext = phi::CPUContext directly.
class
CPUDeviceContext
:
public
phi
::
CPUContext
{
public:
CPUDeviceContext
();
explicit
CPUDeviceContext
(
CPUPlace
place
);
};
template
<
typename
Place
>
template
<
typename
Place
>
struct
DefaultDeviceContextType
;
struct
DefaultDeviceContextType
;
...
...
paddle/fluid/platform/transform.h
浏览文件 @
09096aeb
...
@@ -69,30 +69,6 @@ struct Transform {
...
@@ -69,30 +69,6 @@ struct Transform {
};
};
// NOTE: After the phi kernel is migrated, it needs to be deleted.
// NOTE: After the phi kernel is migrated, it needs to be deleted.
template
<
>
struct
Transform
<
platform
::
CPUDeviceContext
>
{
template
<
typename
InputIter
,
typename
OutputIter
,
typename
UnaryOperation
>
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
InputIter
first
,
InputIter
last
,
OutputIter
result
,
UnaryOperation
op
)
{
std
::
transform
(
first
,
last
,
result
,
op
);
}
template
<
typename
InputIter1
,
typename
InputIter2
,
typename
OutputIter
,
typename
BinaryOperation
>
void
operator
()(
const
platform
::
CPUDeviceContext
&
context
,
InputIter1
first1
,
InputIter1
last1
,
InputIter2
first2
,
OutputIter
result
,
BinaryOperation
op
)
{
std
::
transform
(
first1
,
last1
,
first2
,
result
,
op
);
}
};
template
<
>
template
<
>
struct
Transform
<
phi
::
CPUContext
>
{
struct
Transform
<
phi
::
CPUContext
>
{
...
...
paddle/infrt/kernel/phi/context_kernels.cc
浏览文件 @
09096aeb
...
@@ -20,7 +20,6 @@ namespace phi {
...
@@ -20,7 +20,6 @@ namespace phi {
::
phi
::
CPUContext
CreateCPUContext
()
{
::
phi
::
CPUContext
CreateCPUContext
()
{
::
phi
::
CPUContext
ctx
{};
::
phi
::
CPUContext
ctx
{};
ctx
.
Init
();
auto
allocator
=
new
backends
::
CpuPhiAllocator
{};
auto
allocator
=
new
backends
::
CpuPhiAllocator
{};
ctx
.
SetAllocator
(
allocator
);
ctx
.
SetAllocator
(
allocator
);
ctx
.
SetHostAllocator
(
allocator
);
ctx
.
SetHostAllocator
(
allocator
);
...
...
paddle/infrt/kernel/phi/infershaped/infershape_launchers_test.cc
浏览文件 @
09096aeb
...
@@ -81,7 +81,6 @@ TEST(ElementwiseAdd, launcher_registry) {
...
@@ -81,7 +81,6 @@ TEST(ElementwiseAdd, launcher_registry) {
::
phi
::
CPUContext
context
;
::
phi
::
CPUContext
context
;
context
.
SetAllocator
(
alloc
);
context
.
SetAllocator
(
alloc
);
context
.
Init
();
host_context
::
KernelFrameBuilder
kernel_frame_builder
;
host_context
::
KernelFrameBuilder
kernel_frame_builder
;
kernel_frame_builder
.
AddArgument
(
new
host_context
::
Value
(
std
::
move
(
context
)));
kernel_frame_builder
.
AddArgument
(
new
host_context
::
Value
(
std
::
move
(
context
)));
...
...
paddle/phi/backends/cpu/cpu_context.cc
浏览文件 @
09096aeb
...
@@ -51,10 +51,14 @@ struct CPUContext::Impl {
...
@@ -51,10 +51,14 @@ struct CPUContext::Impl {
};
};
CPUContext
::
CPUContext
()
CPUContext
::
CPUContext
()
:
DeviceContext
(),
impl_
(
std
::
make_unique
<
CPUContext
::
Impl
>
())
{}
:
DeviceContext
(),
impl_
(
std
::
make_unique
<
CPUContext
::
Impl
>
())
{
impl_
->
Init
();
}
CPUContext
::
CPUContext
(
const
Place
&
place
)
CPUContext
::
CPUContext
(
const
Place
&
place
)
:
DeviceContext
(),
impl_
(
std
::
make_unique
<
CPUContext
::
Impl
>
(
place
))
{}
:
DeviceContext
(),
impl_
(
std
::
make_unique
<
CPUContext
::
Impl
>
(
place
))
{
impl_
->
Init
();
}
CPUContext
::~
CPUContext
()
=
default
;
CPUContext
::~
CPUContext
()
=
default
;
...
@@ -62,8 +66,6 @@ CPUContext::CPUContext(CPUContext&&) = default;
...
@@ -62,8 +66,6 @@ CPUContext::CPUContext(CPUContext&&) = default;
CPUContext
&
CPUContext
::
operator
=
(
CPUContext
&&
)
=
default
;
CPUContext
&
CPUContext
::
operator
=
(
CPUContext
&&
)
=
default
;
void
CPUContext
::
Init
()
{
impl_
->
Init
();
}
Eigen
::
DefaultDevice
*
CPUContext
::
eigen_device
()
const
{
Eigen
::
DefaultDevice
*
CPUContext
::
eigen_device
()
const
{
return
impl_
->
GetEigenDevice
();
return
impl_
->
GetEigenDevice
();
}
}
...
...
paddle/phi/backends/cpu/cpu_context.h
浏览文件 @
09096aeb
...
@@ -34,12 +34,6 @@ class PADDLE_API CPUContext : public DeviceContext {
...
@@ -34,12 +34,6 @@ class PADDLE_API CPUContext : public DeviceContext {
Eigen
::
DefaultDevice
*
eigen_device
()
const
;
Eigen
::
DefaultDevice
*
eigen_device
()
const
;
const
Place
&
GetPlace
()
const
override
;
const
Place
&
GetPlace
()
const
override
;
public:
// NOTE: DeviceContext hold resources. Used in training scenarios.
// The interface used by the training scene, DeviceContext will initialize
// all resources and delete them when destructing.
void
Init
();
protected:
protected:
// NOTE: External users manage resources. Used in inference scenarios.
// NOTE: External users manage resources. Used in inference scenarios.
// The Set interface is for inference only, DeviceContext will mark the
// The Set interface is for inference only, DeviceContext will mark the
...
...
paddle/phi/kernels/funcs/blas/blas_impl.h
浏览文件 @
09096aeb
...
@@ -1003,12 +1003,6 @@ struct CBlas<phi::dtype::float16> {
...
@@ -1003,12 +1003,6 @@ struct CBlas<phi::dtype::float16> {
#ifdef PADDLE_WITH_MKLML
#ifdef PADDLE_WITH_MKLML
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
T
*
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
GEMM_ALLOC
(
const
CBLAS_IDENTIFIER
id
,
const
int
M
,
const
int
N
,
const
int
K
)
const
{
return
CBlas
<
T
>::
GEMM_ALLOC
(
id
,
M
,
N
,
K
);
}
template
<
>
template
<
typename
T
>
T
*
Blas
<
phi
::
CPUContext
>::
GEMM_ALLOC
(
const
CBLAS_IDENTIFIER
id
,
T
*
Blas
<
phi
::
CPUContext
>::
GEMM_ALLOC
(
const
CBLAS_IDENTIFIER
id
,
const
int
M
,
const
int
M
,
const
int
N
,
const
int
N
,
...
@@ -1016,20 +1010,6 @@ T *Blas<phi::CPUContext>::GEMM_ALLOC(const CBLAS_IDENTIFIER id,
...
@@ -1016,20 +1010,6 @@ T *Blas<phi::CPUContext>::GEMM_ALLOC(const CBLAS_IDENTIFIER id,
return
CBlas
<
T
>::
GEMM_ALLOC
(
id
,
M
,
N
,
K
);
return
CBlas
<
T
>::
GEMM_ALLOC
(
id
,
M
,
N
,
K
);
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
GEMM_PACK
(
const
CBLAS_IDENTIFIER
id
,
const
CBLAS_TRANSPOSE
trans
,
int
M
,
int
N
,
int
K
,
const
T
alpha
,
const
T
*
src
,
const
int
ld
,
T
*
dst
)
const
{
CBlas
<
T
>::
GEMM_PACK
(
CblasRowMajor
,
id
,
trans
,
M
,
N
,
K
,
alpha
,
src
,
ld
,
dst
);
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
GEMM_PACK
(
const
CBLAS_IDENTIFIER
id
,
void
Blas
<
phi
::
CPUContext
>::
GEMM_PACK
(
const
CBLAS_IDENTIFIER
id
,
...
@@ -1044,24 +1024,6 @@ void Blas<phi::CPUContext>::GEMM_PACK(const CBLAS_IDENTIFIER id,
...
@@ -1044,24 +1024,6 @@ void Blas<phi::CPUContext>::GEMM_PACK(const CBLAS_IDENTIFIER id,
CBlas
<
T
>::
GEMM_PACK
(
CblasRowMajor
,
id
,
trans
,
M
,
N
,
K
,
alpha
,
src
,
ld
,
dst
);
CBlas
<
T
>::
GEMM_PACK
(
CblasRowMajor
,
id
,
trans
,
M
,
N
,
K
,
alpha
,
src
,
ld
,
dst
);
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
GEMM_COMPUTE
(
int
transA
,
int
transB
,
int
M
,
int
N
,
int
K
,
const
T
*
A
,
const
int
lda
,
const
T
*
B
,
const
int
ldb
,
T
beta
,
T
*
C
,
const
int
ldc
)
const
{
CBlas
<
T
>::
GEMM_COMPUTE
(
CblasRowMajor
,
transA
,
transB
,
M
,
N
,
K
,
A
,
lda
,
B
,
ldb
,
beta
,
C
,
ldc
);
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
GEMM_COMPUTE
(
int
transA
,
void
Blas
<
phi
::
CPUContext
>::
GEMM_COMPUTE
(
int
transA
,
...
@@ -1080,11 +1042,6 @@ void Blas<phi::CPUContext>::GEMM_COMPUTE(int transA,
...
@@ -1080,11 +1042,6 @@ void Blas<phi::CPUContext>::GEMM_COMPUTE(int transA,
CblasRowMajor
,
transA
,
transB
,
M
,
N
,
K
,
A
,
lda
,
B
,
ldb
,
beta
,
C
,
ldc
);
CblasRowMajor
,
transA
,
transB
,
M
,
N
,
K
,
A
,
lda
,
B
,
ldb
,
beta
,
C
,
ldc
);
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
GEMM_FREE
(
T
*
data
)
const
{
CBlas
<
T
>::
GEMM_FREE
(
data
);
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
GEMM_FREE
(
T
*
data
)
const
{
void
Blas
<
phi
::
CPUContext
>::
GEMM_FREE
(
T
*
data
)
const
{
...
@@ -1092,36 +1049,6 @@ void Blas<phi::CPUContext>::GEMM_FREE(T *data) const {
...
@@ -1092,36 +1049,6 @@ void Blas<phi::CPUContext>::GEMM_FREE(T *data) const {
}
}
#endif
#endif
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
GEMM
(
CBLAS_TRANSPOSE
transA
,
CBLAS_TRANSPOSE
transB
,
int
M
,
int
N
,
int
K
,
T
alpha
,
const
T
*
A
,
const
T
*
B
,
T
beta
,
T
*
C
)
const
{
int
lda
=
(
transA
==
CblasNoTrans
)
?
K
:
M
;
int
ldb
=
(
transB
==
CblasNoTrans
)
?
N
:
K
;
int
ldc
=
N
;
CBlas
<
T
>::
GEMM
(
CblasRowMajor
,
transA
,
transB
,
M
,
N
,
K
,
alpha
,
A
,
lda
,
B
,
ldb
,
beta
,
C
,
ldc
);
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
GEMM
(
CBLAS_TRANSPOSE
transA
,
void
Blas
<
phi
::
CPUContext
>::
GEMM
(
CBLAS_TRANSPOSE
transA
,
...
@@ -1153,36 +1080,6 @@ void Blas<phi::CPUContext>::GEMM(CBLAS_TRANSPOSE transA,
...
@@ -1153,36 +1080,6 @@ void Blas<phi::CPUContext>::GEMM(CBLAS_TRANSPOSE transA,
ldc
);
ldc
);
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
GEMM
(
bool
transA
,
bool
transB
,
int
M
,
int
N
,
int
K
,
T
alpha
,
const
T
*
A
,
int
lda
,
const
T
*
B
,
int
ldb
,
T
beta
,
T
*
C
,
int
ldc
)
const
{
CBlas
<
T
>::
GEMM
(
CblasRowMajor
,
transA
==
false
?
CblasNoTrans
:
CblasTrans
,
transB
==
false
?
CblasNoTrans
:
CblasTrans
,
M
,
N
,
K
,
alpha
,
A
,
lda
,
B
,
ldb
,
beta
,
C
,
ldc
);
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
GEMM
(
bool
transA
,
void
Blas
<
phi
::
CPUContext
>::
GEMM
(
bool
transA
,
...
@@ -1214,36 +1111,6 @@ void Blas<phi::CPUContext>::GEMM(bool transA,
...
@@ -1214,36 +1111,6 @@ void Blas<phi::CPUContext>::GEMM(bool transA,
ldc
);
ldc
);
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
GEMM
(
CBLAS_TRANSPOSE
transA
,
CBLAS_TRANSPOSE
transB
,
int
M
,
int
N
,
int
K
,
T
alpha
,
const
T
*
A
,
int
lda
,
const
T
*
B
,
int
ldb
,
T
beta
,
T
*
C
,
int
ldc
)
const
{
CBlas
<
T
>::
GEMM
(
CblasRowMajor
,
transA
,
transB
,
M
,
N
,
K
,
alpha
,
A
,
lda
,
B
,
ldb
,
beta
,
C
,
ldc
);
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
GEMM
(
CBLAS_TRANSPOSE
transA
,
void
Blas
<
phi
::
CPUContext
>::
GEMM
(
CBLAS_TRANSPOSE
transA
,
...
@@ -1323,50 +1190,18 @@ void Blas<DeviceContext>::MatMul(const phi::DenseTensor &mat_a,
...
@@ -1323,50 +1190,18 @@ void Blas<DeviceContext>::MatMul(const phi::DenseTensor &mat_a,
mat_out
->
data
<
T
>
());
mat_out
->
data
<
T
>
());
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
AXPY
(
int
n
,
T
alpha
,
const
T
*
x
,
T
*
y
)
const
{
CBlas
<
T
>::
AXPY
(
n
,
alpha
,
x
,
1
,
y
,
1
);
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
AXPY
(
int
n
,
T
alpha
,
const
T
*
x
,
T
*
y
)
const
{
void
Blas
<
phi
::
CPUContext
>::
AXPY
(
int
n
,
T
alpha
,
const
T
*
x
,
T
*
y
)
const
{
CBlas
<
T
>::
AXPY
(
n
,
alpha
,
x
,
1
,
y
,
1
);
CBlas
<
T
>::
AXPY
(
n
,
alpha
,
x
,
1
,
y
,
1
);
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
VCOPY
(
int
n
,
const
T
*
x
,
T
*
y
)
const
{
CBlas
<
T
>::
VCOPY
(
n
,
x
,
1
,
y
,
1
);
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
VCOPY
(
int
n
,
const
T
*
x
,
T
*
y
)
const
{
void
Blas
<
phi
::
CPUContext
>::
VCOPY
(
int
n
,
const
T
*
x
,
T
*
y
)
const
{
CBlas
<
T
>::
VCOPY
(
n
,
x
,
1
,
y
,
1
);
CBlas
<
T
>::
VCOPY
(
n
,
x
,
1
,
y
,
1
);
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
VADD
(
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VADD
(
n
,
x
,
y
,
z
);
#else
if
(
x
==
z
)
{
this
->
template
AXPY
<
T
>(
n
,
(
T
)(
1.
),
y
,
z
);
}
else
{
this
->
template
VCOPY
<
T
>(
n
,
y
,
z
);
this
->
template
AXPY
<
T
>(
n
,
(
T
)(
1.
),
x
,
z
);
}
#endif
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
VADD
(
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
{
void
Blas
<
phi
::
CPUContext
>::
VADD
(
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
{
...
@@ -1382,21 +1217,6 @@ void Blas<phi::CPUContext>::VADD(int n, const T *x, const T *y, T *z) const {
...
@@ -1382,21 +1217,6 @@ void Blas<phi::CPUContext>::VADD(int n, const T *x, const T *y, T *z) const {
#endif
#endif
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
VSUB
(
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VSUB
(
n
,
x
,
y
,
z
);
#else
// try to find if openblas support vsub
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
z
[
i
]
=
x
[
i
]
-
y
[
i
];
}
#endif
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
VSUB
(
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
{
void
Blas
<
phi
::
CPUContext
>::
VSUB
(
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
{
...
@@ -1410,21 +1230,6 @@ void Blas<phi::CPUContext>::VSUB(int n, const T *x, const T *y, T *z) const {
...
@@ -1410,21 +1230,6 @@ void Blas<phi::CPUContext>::VSUB(int n, const T *x, const T *y, T *z) const {
#endif
#endif
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
VMUL
(
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VMUL
(
n
,
x
,
y
,
z
);
#else
// try to find if openblas support vmul
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
z
[
i
]
=
x
[
i
]
*
y
[
i
];
}
#endif
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
VMUL
(
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
{
void
Blas
<
phi
::
CPUContext
>::
VMUL
(
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
{
...
@@ -1438,21 +1243,6 @@ void Blas<phi::CPUContext>::VMUL(int n, const T *x, const T *y, T *z) const {
...
@@ -1438,21 +1243,6 @@ void Blas<phi::CPUContext>::VMUL(int n, const T *x, const T *y, T *z) const {
#endif
#endif
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
VDIV
(
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VDIV
(
n
,
x
,
y
,
z
);
#else
// try to find if openblas support vdiv
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
z
[
i
]
=
x
[
i
]
/
y
[
i
];
}
#endif
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
VDIV
(
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
{
void
Blas
<
phi
::
CPUContext
>::
VDIV
(
int
n
,
const
T
*
x
,
const
T
*
y
,
T
*
z
)
const
{
...
@@ -1466,20 +1256,6 @@ void Blas<phi::CPUContext>::VDIV(int n, const T *x, const T *y, T *z) const {
...
@@ -1466,20 +1256,6 @@ void Blas<phi::CPUContext>::VDIV(int n, const T *x, const T *y, T *z) const {
#endif
#endif
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
VEXP
(
int
n
,
const
T
*
x
,
T
*
y
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VEXP
(
n
,
x
,
y
);
#else
// try to find if openblas support vexp
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
std
::
exp
(
x
[
i
]);
}
#endif
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
VEXP
(
int
n
,
const
T
*
x
,
T
*
y
)
const
{
void
Blas
<
phi
::
CPUContext
>::
VEXP
(
int
n
,
const
T
*
x
,
T
*
y
)
const
{
...
@@ -1493,19 +1269,6 @@ void Blas<phi::CPUContext>::VEXP(int n, const T *x, T *y) const {
...
@@ -1493,19 +1269,6 @@ void Blas<phi::CPUContext>::VEXP(int n, const T *x, T *y) const {
#endif
#endif
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
VSQUARE
(
int
n
,
const
T
*
x
,
T
*
y
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VSQUARE
(
n
,
x
,
y
);
#else
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
x
[
i
]
*
x
[
i
];
}
#endif
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
VSQUARE
(
int
n
,
const
T
*
x
,
T
*
y
)
const
{
void
Blas
<
phi
::
CPUContext
>::
VSQUARE
(
int
n
,
const
T
*
x
,
T
*
y
)
const
{
...
@@ -1518,20 +1281,6 @@ void Blas<phi::CPUContext>::VSQUARE(int n, const T *x, T *y) const {
...
@@ -1518,20 +1281,6 @@ void Blas<phi::CPUContext>::VSQUARE(int n, const T *x, T *y) const {
#endif
#endif
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
VPOW
(
int
n
,
const
T
*
x
,
T
a
,
T
*
y
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VPOW
(
n
,
x
,
a
,
y
);
#else
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
std
::
pow
(
x
[
i
],
a
);
}
#endif
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
VPOW
(
int
n
,
const
T
*
x
,
T
a
,
T
*
y
)
const
{
void
Blas
<
phi
::
CPUContext
>::
VPOW
(
int
n
,
const
T
*
x
,
T
a
,
T
*
y
)
const
{
...
@@ -1544,22 +1293,6 @@ void Blas<phi::CPUContext>::VPOW(int n, const T *x, T a, T *y) const {
...
@@ -1544,22 +1293,6 @@ void Blas<phi::CPUContext>::VPOW(int n, const T *x, T a, T *y) const {
#endif
#endif
}
}
template
<
>
template
<
typename
T
>
T
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
DOT
(
int
n
,
const
T
*
x
,
const
T
*
y
)
const
{
#ifdef PADDLE_WITH_MKLML
return
CBlas
<
T
>::
DOT
(
n
,
x
,
1
,
y
,
1
);
#else
// try to find if openblas support cblas_dot
T
sum
=
0
;
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
sum
+=
x
[
i
]
*
y
[
i
];
}
return
sum
;
#endif
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
T
Blas
<
phi
::
CPUContext
>::
DOT
(
int
n
,
const
T
*
x
,
const
T
*
y
)
const
{
T
Blas
<
phi
::
CPUContext
>::
DOT
(
int
n
,
const
T
*
x
,
const
T
*
y
)
const
{
...
@@ -1575,20 +1308,6 @@ T Blas<phi::CPUContext>::DOT(int n, const T *x, const T *y) const {
...
@@ -1575,20 +1308,6 @@ T Blas<phi::CPUContext>::DOT(int n, const T *x, const T *y) const {
#endif
#endif
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
SCAL
(
int
n
,
const
T
a
,
T
*
x
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
SCAL
(
n
,
a
,
x
,
1
);
#else
// try to find if openblas support cblas_scal
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
x
[
i
]
=
a
*
x
[
i
];
}
#endif
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
SCAL
(
int
n
,
const
T
a
,
T
*
x
)
const
{
void
Blas
<
phi
::
CPUContext
>::
SCAL
(
int
n
,
const
T
a
,
T
*
x
)
const
{
...
@@ -1602,20 +1321,6 @@ void Blas<phi::CPUContext>::SCAL(int n, const T a, T *x) const {
...
@@ -1602,20 +1321,6 @@ void Blas<phi::CPUContext>::SCAL(int n, const T a, T *x) const {
#endif
#endif
}
}
template
<
>
template
<
typename
T
>
T
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
ASUM
(
int
n
,
T
*
x
,
int
inc
)
const
{
auto
sum
=
static_cast
<
T
>
(
0.0
);
#ifdef PADDLE_WITH_MKLML
sum
=
CBlas
<
T
>::
ASUM
(
n
,
x
,
inc
);
#else
// TODO(jczaja): check if openblas does provide cblas_sasum/cblas_dasum
for
(
int
c
=
0
;
c
<
n
;
++
c
)
{
sum
+=
x
[
c
];
}
#endif
return
sum
;
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
T
Blas
<
phi
::
CPUContext
>::
ASUM
(
int
n
,
T
*
x
,
int
inc
)
const
{
T
Blas
<
phi
::
CPUContext
>::
ASUM
(
int
n
,
T
*
x
,
int
inc
)
const
{
...
@@ -1625,99 +1330,26 @@ T Blas<phi::CPUContext>::ASUM(int n, T *x, int inc) const {
...
@@ -1625,99 +1330,26 @@ T Blas<phi::CPUContext>::ASUM(int n, T *x, int inc) const {
#else
#else
// TODO(jczaja): check if openblas does provide cblas_sasum/cblas_dasum
// TODO(jczaja): check if openblas does provide cblas_sasum/cblas_dasum
for
(
int
c
=
0
;
c
<
n
;
++
c
)
{
for
(
int
c
=
0
;
c
<
n
;
++
c
)
{
sum
+=
x
[
c
];
sum
+=
x
[
c
];
}
#endif
return
sum
;
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
GEMV
(
bool
trans_a
,
int
M
,
int
N
,
T
alpha
,
const
T
*
A
,
const
T
*
B
,
T
beta
,
T
*
C
)
const
{
CBLAS_TRANSPOSE
transA
=
!
trans_a
?
CblasNoTrans
:
CblasTrans
;
CBlas
<
T
>::
GEMV
(
CblasRowMajor
,
transA
,
M
,
N
,
alpha
,
A
,
N
,
B
,
1
,
beta
,
C
,
1
);
}
template
<
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
GEMV
(
bool
trans_a
,
int
M
,
int
N
,
T
alpha
,
const
T
*
A
,
const
T
*
B
,
T
beta
,
T
*
C
)
const
{
CBLAS_TRANSPOSE
transA
=
!
trans_a
?
CblasNoTrans
:
CblasTrans
;
CBlas
<
T
>::
GEMV
(
CblasRowMajor
,
transA
,
M
,
N
,
alpha
,
A
,
N
,
B
,
1
,
beta
,
C
,
1
);
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
BatchedGEMM
(
CBLAS_TRANSPOSE
transA
,
CBLAS_TRANSPOSE
transB
,
int
M
,
int
N
,
int
K
,
T
alpha
,
const
T
*
A
,
const
T
*
B
,
T
beta
,
T
*
C
,
int
batchCount
,
int64_t
strideA
,
int64_t
strideB
)
const
{
PADDLE_ENFORCE_NOT_NULL
(
A
,
phi
::
errors
::
InvalidArgument
(
"Pointer A should not be null."
));
PADDLE_ENFORCE_NOT_NULL
(
B
,
phi
::
errors
::
InvalidArgument
(
"Pointer B should not be null."
));
PADDLE_ENFORCE_NOT_NULL
(
C
,
phi
::
errors
::
InvalidArgument
(
"Pointer C should not be null."
));
#ifdef PADDLE_WITH_MKLML
int
lda
=
(
transA
==
CblasNoTrans
)
?
K
:
M
;
int
ldb
=
(
transB
==
CblasNoTrans
)
?
N
:
K
;
int
ldc
=
N
;
auto
a_array
=
std
::
vector
<
const
T
*>
(
batchCount
);
auto
b_array
=
std
::
vector
<
const
T
*>
(
batchCount
);
auto
c_array
=
std
::
vector
<
T
*>
(
batchCount
);
for
(
int
k
=
0
;
k
<
batchCount
;
++
k
)
{
a_array
[
k
]
=
&
A
[
k
*
strideA
];
b_array
[
k
]
=
&
B
[
k
*
strideB
];
c_array
[
k
]
=
&
C
[
k
*
M
*
N
];
}
CBlas
<
T
>::
GEMM_BATCH
(
CblasRowMajor
,
&
transA
,
&
transB
,
&
M
,
&
N
,
&
K
,
&
alpha
,
a_array
.
data
(),
&
lda
,
b_array
.
data
(),
&
ldb
,
&
beta
,
c_array
.
data
(),
&
ldc
,
1
/* group_count */
,
&
batchCount
);
#else
for
(
int
k
=
0
;
k
<
batchCount
;
++
k
)
{
auto
*
Ak
=
&
A
[
k
*
strideA
];
auto
*
Bk
=
&
B
[
k
*
strideB
];
auto
*
Ck
=
&
C
[
k
*
M
*
N
];
this
->
template
GEMM
<
T
>(
transA
,
transB
,
M
,
N
,
K
,
alpha
,
Ak
,
Bk
,
beta
,
Ck
);
}
}
#endif
#endif
return
sum
;
}
template
<
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
GEMV
(
bool
trans_a
,
int
M
,
int
N
,
T
alpha
,
const
T
*
A
,
const
T
*
B
,
T
beta
,
T
*
C
)
const
{
CBLAS_TRANSPOSE
transA
=
!
trans_a
?
CblasNoTrans
:
CblasTrans
;
CBlas
<
T
>::
GEMV
(
CblasRowMajor
,
transA
,
M
,
N
,
alpha
,
A
,
N
,
B
,
1
,
beta
,
C
,
1
);
}
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
BatchedGEMM
(
CBLAS_TRANSPOSE
transA
,
void
Blas
<
phi
::
CPUContext
>::
BatchedGEMM
(
CBLAS_TRANSPOSE
transA
,
...
@@ -1778,47 +1410,6 @@ void Blas<phi::CPUContext>::BatchedGEMM(CBLAS_TRANSPOSE transA,
...
@@ -1778,47 +1410,6 @@ void Blas<phi::CPUContext>::BatchedGEMM(CBLAS_TRANSPOSE transA,
#endif
#endif
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
BatchedGEMM
(
CBLAS_TRANSPOSE
transA
,
CBLAS_TRANSPOSE
transB
,
int
M
,
int
N
,
int
K
,
T
alpha
,
const
T
**
A
,
const
T
**
B
,
T
beta
,
T
**
C
,
int
batchCount
)
const
{
#ifdef PADDLE_WITH_MKLML
const
int
lda
=
(
std
::
max
)((
transA
==
CblasNoTrans
)
?
K
:
M
,
1
);
const
int
ldb
=
(
std
::
max
)((
transB
==
CblasNoTrans
)
?
N
:
K
,
1
);
const
int
ldc
=
(
std
::
max
)(
N
,
1
);
CBlas
<
T
>::
GEMM_BATCH
(
CblasRowMajor
,
&
transA
,
&
transB
,
&
M
,
&
N
,
&
K
,
&
alpha
,
A
,
&
lda
,
B
,
&
ldb
,
&
beta
,
C
,
&
ldc
,
1
/* group_count */
,
&
batchCount
);
#else
for
(
int
k
=
0
;
k
<
batchCount
;
++
k
)
{
this
->
template
GEMM
<
T
>(
transA
,
transB
,
M
,
N
,
K
,
alpha
,
A
[
k
],
B
[
k
],
beta
,
C
[
k
]);
}
#endif
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
BatchedGEMM
(
CBLAS_TRANSPOSE
transA
,
void
Blas
<
phi
::
CPUContext
>::
BatchedGEMM
(
CBLAS_TRANSPOSE
transA
,
...
@@ -1864,113 +1455,6 @@ void Blas<phi::CPUContext>::BatchedGEMM(CBLAS_TRANSPOSE transA,
...
@@ -1864,113 +1455,6 @@ void Blas<phi::CPUContext>::BatchedGEMM(CBLAS_TRANSPOSE transA,
!defined(PADDLE_WITH_HIP) // @{ Group Blas MKLML: BatchedGEMMWithHead
!defined(PADDLE_WITH_HIP) // @{ Group Blas MKLML: BatchedGEMMWithHead
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
BatchedGEMMWithHead
(
CBLAS_TRANSPOSE
transA
,
CBLAS_TRANSPOSE
transB
,
int
W1
,
int
H1
,
int
W2
,
int
H2
,
T
alpha
,
const
T
*
A
,
const
T
*
B
,
T
beta
,
T
*
C
,
int
batchCount
,
int64_t
strideA
,
int64_t
strideB
,
int64_t
head_number
,
bool
split_b_vertical
)
const
{
int
lda
=
(
transA
==
CblasNoTrans
)
?
W1
:
H1
;
int
ldb
=
(
transB
==
CblasNoTrans
)
?
W2
:
H2
;
auto
a_array
=
std
::
vector
<
const
T
*>
(
batchCount
);
auto
b_array
=
std
::
vector
<
const
T
*>
(
batchCount
);
auto
c_array
=
std
::
vector
<
T
*>
(
batchCount
);
if
(
split_b_vertical
)
{
int
ldc
=
W2
;
int
sub_width
=
W2
/
head_number
;
for
(
int
i
=
0
;
i
<
head_number
;
i
++
)
{
int
sub_matA_offset
=
(
transA
==
CblasNoTrans
)
?
i
*
(
W1
/
head_number
)
:
i
*
(
W1
/
head_number
)
*
H1
;
int
sub_matB_offset
=
(
transB
==
CblasNoTrans
)
?
i
*
(
W2
/
head_number
)
:
i
*
(
W2
/
head_number
)
*
H2
;
int
sub_matC_offset
=
i
*
W2
/
head_number
;
for
(
int
k
=
0
;
k
<
batchCount
;
++
k
)
{
a_array
[
k
]
=
&
A
[
k
*
strideA
]
+
sub_matA_offset
;
b_array
[
k
]
=
&
B
[
k
*
strideB
]
+
sub_matB_offset
;
c_array
[
k
]
=
&
C
[
k
*
H1
*
W2
]
+
sub_matC_offset
;
}
CBlas
<
T
>::
GEMM_BATCH
(
CblasRowMajor
,
&
transA
,
&
transB
,
&
H1
,
&
sub_width
,
&
H2
,
&
alpha
,
a_array
.
data
(),
&
lda
,
b_array
.
data
(),
&
ldb
,
&
beta
,
c_array
.
data
(),
&
ldc
,
1
/* group_count */
,
&
batchCount
);
}
}
else
{
PADDLE_ENFORCE_EQ
(
W1
,
H2
,
phi
::
errors
::
InvalidArgument
(
"The fisrt matrix width should be same as second matrix height,"
"but received fisrt matrix width %d"
", second matrix height %d"
,
W1
,
H2
));
int
ldc
=
W2
*
head_number
;
int
sub_width
=
W1
/
head_number
;
for
(
int
i
=
0
;
i
<
head_number
;
i
++
)
{
int
sub_matA_offset
=
(
transA
==
CblasNoTrans
)
?
i
*
(
W1
/
head_number
)
:
i
*
(
W1
/
head_number
)
*
H1
;
int
sub_matB_offset
=
(
transB
==
CblasNoTrans
)
?
i
*
(
W1
/
head_number
)
*
W2
:
i
*
(
W1
/
head_number
);
int
sub_matC_offset
=
i
*
W2
;
for
(
int
k
=
0
;
k
<
batchCount
;
++
k
)
{
a_array
[
k
]
=
&
A
[
k
*
strideA
]
+
sub_matA_offset
;
b_array
[
k
]
=
&
B
[
k
*
strideB
]
+
sub_matB_offset
;
c_array
[
k
]
=
&
C
[
k
*
H1
*
head_number
*
W2
]
+
sub_matC_offset
;
}
CBlas
<
T
>::
GEMM_BATCH
(
CblasRowMajor
,
&
transA
,
&
transB
,
&
H1
,
&
W2
,
&
sub_width
,
&
alpha
,
a_array
.
data
(),
&
lda
,
b_array
.
data
(),
&
ldb
,
&
beta
,
c_array
.
data
(),
&
ldc
,
1
/* group_count */
,
&
batchCount
);
}
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
BatchedGEMMWithHead
(
CBLAS_TRANSPOSE
transA
,
void
Blas
<
phi
::
CPUContext
>::
BatchedGEMMWithHead
(
CBLAS_TRANSPOSE
transA
,
CBLAS_TRANSPOSE
transB
,
CBLAS_TRANSPOSE
transB
,
int
W1
,
int
W1
,
...
@@ -2097,43 +1581,6 @@ void Blas<DeviceContext>::MatMul(
...
@@ -2097,43 +1581,6 @@ void Blas<DeviceContext>::MatMul(
N
);
N
);
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
MatMul
(
const
int
M
,
const
int
N
,
const
int
K
,
const
T
*
A
,
const
T
*
B
,
T
*
C
)
const
{
#ifdef PADDLE_WITH_LIBXSMM
// Refer to https://github.com/hfp/libxsmm/blob/master/README.md
// But the threshold is custom constexpr int LIBXSMM_THRESHOLD = 20 * 20 * 20;
// Since the matrix is very small,
// so the unit of calculation is already very fast,
// and the if( M*N*K < LIBXSMM_THRESHOLD) would be overhead,
// use xsmm directly.
// Note: SMM use ColMajor
const
char
transa
=
'N'
;
const
char
transb
=
'N'
;
const
T
alpha
=
static_cast
<
T
>
(
1
);
const
T
beta
=
static_cast
<
T
>
(
0
);
CBlas
<
T
>::
SMM_GEMM
(
&
transa
,
&
transb
,
&
N
,
&
M
,
&
K
,
&
alpha
,
B
,
&
N
,
A
,
&
K
,
&
beta
,
C
,
&
N
);
return
;
#endif
CBlas
<
T
>::
GEMM
(
CblasRowMajor
,
CblasNoTrans
,
CblasNoTrans
,
M
,
N
,
K
,
static_cast
<
T
>
(
1
),
A
,
K
,
B
,
N
,
static_cast
<
T
>
(
0
),
C
,
N
);
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
MatMul
(
void
Blas
<
phi
::
CPUContext
>::
MatMul
(
...
@@ -2425,20 +1872,6 @@ void Blas<DeviceContext>::VINV(int n, const T *a, T *y) const {
...
@@ -2425,20 +1872,6 @@ void Blas<DeviceContext>::VINV(int n, const T *a, T *y) const {
#endif
#endif
}
}
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
VMERF
(
int
n
,
const
T
*
a
,
T
*
y
,
int64_t
mode
)
const
{
#ifdef PADDLE_WITH_MKLML
CBlas
<
T
>::
VMERF
(
n
,
a
,
y
,
mode
);
#else
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
std
::
erf
(
a
[
i
]);
}
#endif
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
VMERF
(
int
n
,
const
T
*
a
,
T
*
y
,
int64_t
mode
)
const
{
void
Blas
<
phi
::
CPUContext
>::
VMERF
(
int
n
,
const
T
*
a
,
T
*
y
,
int64_t
mode
)
const
{
...
@@ -2454,39 +1887,6 @@ void Blas<phi::CPUContext>::VMERF(int n, const T *a, T *y, int64_t mode) const {
...
@@ -2454,39 +1887,6 @@ void Blas<phi::CPUContext>::VMERF(int n, const T *a, T *y, int64_t mode) const {
#ifdef PADDLE_WITH_MKLML
#ifdef PADDLE_WITH_MKLML
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
CSRMM
(
const
char
*
transa
,
const
int
*
m
,
const
int
*
n
,
const
int
*
k
,
const
T
*
alpha
,
const
char
*
matdescra
,
const
T
*
val
,
const
int
*
indx
,
const
int
*
pntrb
,
const
int
*
pntre
,
const
T
*
b
,
const
int
*
ldb
,
const
T
*
beta
,
T
*
c
,
const
int
*
ldc
)
const
{
CBlas
<
T
>::
CSRMM
(
transa
,
m
,
n
,
k
,
alpha
,
matdescra
,
val
,
indx
,
pntrb
,
pntre
,
b
,
ldb
,
beta
,
c
,
ldc
);
}
template
<
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
CSRMM
(
const
char
*
transa
,
void
Blas
<
phi
::
CPUContext
>::
CSRMM
(
const
char
*
transa
,
const
int
*
m
,
const
int
*
m
,
const
int
*
n
,
const
int
*
n
,
...
@@ -2520,22 +1920,6 @@ void Blas<phi::CPUContext>::CSRMM(const char *transa,
...
@@ -2520,22 +1920,6 @@ void Blas<phi::CPUContext>::CSRMM(const char *transa,
}
}
#endif
#endif
template
<
>
template
<
typename
T
>
void
Blas
<
paddle
::
platform
::
CPUDeviceContext
>::
TRSM
(
CBLAS_SIDE
side
,
CBLAS_UPLO
uplo
,
CBLAS_TRANSPOSE
transA
,
CBLAS_DIAG
diag
,
int
M
,
int
N
,
T
alpha
,
const
T
*
A
,
int
lda
,
T
*
B
,
int
ldb
)
const
{
CBlas
<
T
>::
TRSM
(
CblasRowMajor
,
side
,
uplo
,
transA
,
diag
,
M
,
N
,
alpha
,
A
,
lda
,
B
,
ldb
);
}
template
<
>
template
<
>
template
<
typename
T
>
template
<
typename
T
>
void
Blas
<
phi
::
CPUContext
>::
TRSM
(
CBLAS_SIDE
side
,
void
Blas
<
phi
::
CPUContext
>::
TRSM
(
CBLAS_SIDE
side
,
...
...
paddle/phi/kernels/funcs/fc_functor.cc
浏览文件 @
09096aeb
...
@@ -96,8 +96,6 @@ void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
...
@@ -96,8 +96,6 @@ void FCFunctor<DeviceContext, T>::operator()(const DeviceContext& context,
}
}
}
}
template
class
FCFunctor
<
paddle
::
platform
::
CPUDeviceContext
,
float
>;
template
class
FCFunctor
<
paddle
::
platform
::
CPUDeviceContext
,
double
>;
template
class
FCFunctor
<
CPUContext
,
float
>;
template
class
FCFunctor
<
CPUContext
,
float
>;
template
class
FCFunctor
<
CPUContext
,
double
>;
template
class
FCFunctor
<
CPUContext
,
double
>;
...
...
paddle/phi/kernels/funcs/for_range.h
浏览文件 @
09096aeb
...
@@ -41,22 +41,6 @@ struct ForRange<phi::CPUContext> {
...
@@ -41,22 +41,6 @@ struct ForRange<phi::CPUContext> {
size_t
limit_
;
size_t
limit_
;
};
};
// NOTE: After the pten kernel is migrated, it needs to be deleted.
template
<
>
struct
ForRange
<
paddle
::
platform
::
CPUDeviceContext
>
{
ForRange
(
const
paddle
::
platform
::
CPUDeviceContext
&
dev_ctx
,
size_t
limit
)
:
dev_ctx_
(
dev_ctx
),
limit_
(
limit
)
{}
template
<
typename
Function
>
void
operator
()(
Function
func
)
const
{
phi
::
funcs
::
ForRange
<
phi
::
CPUContext
>
for_range
(
dev_ctx_
,
limit_
);
for_range
(
func
);
}
const
paddle
::
platform
::
CPUDeviceContext
&
dev_ctx_
;
size_t
limit_
;
};
#if defined(__NVCC__) || defined(__HIPCC__)
#if defined(__NVCC__) || defined(__HIPCC__)
template
<
typename
Function
>
template
<
typename
Function
>
...
...
paddle/phi/kernels/funcs/gru_compute.cc
浏览文件 @
09096aeb
...
@@ -179,60 +179,6 @@ struct GRUUnitGradFunctor<paddle::platform::CPUDeviceContext, T> {
...
@@ -179,60 +179,6 @@ struct GRUUnitGradFunctor<paddle::platform::CPUDeviceContext, T> {
}
}
};
};
template
<
typename
T
>
struct
GRUUnitFunctorV2
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
{
static
void
compute
(
const
paddle
::
platform
::
CPUDeviceContext
&
context
,
GRUMetaValue
<
T
>
value
,
int
frame_size
,
int
batch_size
,
const
phi
::
funcs
::
detail
::
ActivationType
active_node
,
const
phi
::
funcs
::
detail
::
ActivationType
active_gate
)
{
#if !defined(__NVCC__) && !defined(__HIPCC___)
auto
blas
=
phi
::
funcs
::
GetBlas
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
(
context
);
if
(
value
.
prev_out_value
)
{
blas
.
GEMM
(
CblasNoTrans
,
CblasTrans
,
batch_size
,
frame_size
,
frame_size
,
1
,
value
.
prev_out_value
,
value
.
state_weight
,
0
,
value
.
reset_output_value
);
}
detail
::
forward_reset_output
(
phi
::
funcs
::
detail
::
forward
::
gru_resetOutput
<
T
>
(),
value
,
frame_size
,
batch_size
,
active_gate
,
false
,
&
context
);
T
*
cell_state_value
=
value
.
gate_value
+
2
*
frame_size
;
T
*
reset_output_value
=
value
.
reset_output_value
;
for
(
int
b
=
0
;
b
<
batch_size
;
++
b
)
{
blas
.
VADD
(
frame_size
,
cell_state_value
,
reset_output_value
,
cell_state_value
);
cell_state_value
+=
frame_size
*
3
;
reset_output_value
+=
frame_size
;
}
detail
::
forward_final_output
(
phi
::
funcs
::
detail
::
forward
::
gru_finalOutput
<
T
>
(),
value
,
frame_size
,
batch_size
,
active_node
,
true
,
false
,
&
context
);
#endif
}
};
template
<
typename
T
>
template
<
typename
T
>
struct
GRUUnitFunctorV2
<
CPUContext
,
T
>
{
struct
GRUUnitFunctorV2
<
CPUContext
,
T
>
{
static
void
compute
(
const
CPUContext
&
context
,
static
void
compute
(
const
CPUContext
&
context
,
...
@@ -286,131 +232,6 @@ struct GRUUnitFunctorV2<CPUContext, T> {
...
@@ -286,131 +232,6 @@ struct GRUUnitFunctorV2<CPUContext, T> {
}
}
};
};
template
<
typename
T
>
struct
GRUUnitGradFunctorV2
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
{
static
void
compute
(
const
paddle
::
platform
::
CPUDeviceContext
&
context
,
GRUMetaValue
<
T
>
value
,
GRUMetaGrad
<
T
>
grad
,
int
frame_size
,
int
batch_size
,
const
phi
::
funcs
::
detail
::
ActivationType
active_node
,
const
phi
::
funcs
::
detail
::
ActivationType
active_gate
)
{
#if !defined(__NVCC__) && !defined(__HIPCC___)
// calculate grad_update_gate, grad_frame_state,
// grad_reset_output, grad_reset_gate
detail
::
cpu_gru_backward
(
context
,
phi
::
funcs
::
detail
::
backward
::
gru
<
T
>
(),
value
,
grad
,
frame_size
,
batch_size
,
active_node
,
active_gate
);
auto
blas
=
phi
::
funcs
::
GetBlas
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
(
context
);
if
(
grad
.
prev_out_grad
&&
value
.
prev_out_value
)
{
// update prev_out_grad
blas
.
GEMM
(
false
,
false
,
batch_size
,
frame_size
,
frame_size
,
1
,
grad
.
gate_grad
,
frame_size
*
3
,
value
.
gate_weight
,
frame_size
,
1
,
grad
.
prev_out_grad
,
frame_size
);
blas
.
GEMM
(
false
,
false
,
batch_size
,
frame_size
,
frame_size
,
1
,
grad
.
gate_grad
+
frame_size
,
frame_size
*
3
,
value
.
gate_weight
+
frame_size
*
frame_size
,
frame_size
,
1
,
grad
.
prev_out_grad
,
frame_size
);
blas
.
GEMM
(
false
,
false
,
batch_size
,
frame_size
,
frame_size
,
1
,
grad
.
reset_output_grad
,
frame_size
,
value
.
state_weight
,
frame_size
,
1
,
grad
.
prev_out_grad
,
frame_size
);
// update weight_hh_grad
if
(
grad
.
gate_weight_grad
)
{
// reset gate
blas
.
GEMM
(
true
,
false
,
frame_size
,
frame_size
,
batch_size
,
1
,
grad
.
gate_grad
,
frame_size
*
3
,
value
.
prev_out_value
,
frame_size
,
1
,
grad
.
gate_weight_grad
,
frame_size
);
// update gate
blas
.
GEMM
(
true
,
false
,
frame_size
,
frame_size
,
batch_size
,
1
,
grad
.
gate_grad
+
frame_size
,
frame_size
*
3
,
value
.
prev_out_value
,
frame_size
,
1
,
grad
.
gate_weight_grad
+
frame_size
*
frame_size
,
frame_size
);
// cell state
blas
.
GEMM
(
true
,
false
,
frame_size
,
frame_size
,
batch_size
,
1
,
grad
.
reset_output_grad
,
frame_size
,
value
.
prev_out_value
,
frame_size
,
1
,
grad
.
state_weight_grad
,
frame_size
);
}
}
// update bias_hh_grad
T
*
gate_grad
=
grad
.
gate_grad
;
T
*
bias_hh_grad
=
grad
.
bias_hh_grad
;
T
*
state_bias_grad
=
grad
.
bias_hh_grad
+
2
*
frame_size
;
T
*
reset_output_grad
=
grad
.
reset_output_grad
;
for
(
int
b
=
0
;
b
<
batch_size
;
++
b
)
{
blas
.
VADD
(
2
*
frame_size
,
bias_hh_grad
,
gate_grad
,
bias_hh_grad
);
blas
.
VADD
(
frame_size
,
state_bias_grad
,
reset_output_grad
,
state_bias_grad
);
gate_grad
+=
3
*
frame_size
;
reset_output_grad
+=
frame_size
;
}
#endif
}
};
template
<
typename
T
>
template
<
typename
T
>
struct
GRUUnitGradFunctorV2
<
CPUContext
,
T
>
{
struct
GRUUnitGradFunctorV2
<
CPUContext
,
T
>
{
static
void
compute
(
const
CPUContext
&
context
,
static
void
compute
(
const
CPUContext
&
context
,
...
@@ -540,12 +361,6 @@ template struct GRUUnitFunctor<paddle::platform::CPUDeviceContext, double>;
...
@@ -540,12 +361,6 @@ template struct GRUUnitFunctor<paddle::platform::CPUDeviceContext, double>;
template
struct
GRUUnitGradFunctor
<
paddle
::
platform
::
CPUDeviceContext
,
float
>;
template
struct
GRUUnitGradFunctor
<
paddle
::
platform
::
CPUDeviceContext
,
float
>;
template
struct
GRUUnitGradFunctor
<
paddle
::
platform
::
CPUDeviceContext
,
double
>;
template
struct
GRUUnitGradFunctor
<
paddle
::
platform
::
CPUDeviceContext
,
double
>;
template
struct
GRUUnitFunctorV2
<
paddle
::
platform
::
CPUDeviceContext
,
float
>;
template
struct
GRUUnitFunctorV2
<
paddle
::
platform
::
CPUDeviceContext
,
double
>;
template
struct
GRUUnitGradFunctorV2
<
paddle
::
platform
::
CPUDeviceContext
,
float
>;
template
struct
GRUUnitGradFunctorV2
<
paddle
::
platform
::
CPUDeviceContext
,
double
>;
template
struct
GRUUnitFunctorV2
<
CPUContext
,
float
>;
template
struct
GRUUnitFunctorV2
<
CPUContext
,
float
>;
template
struct
GRUUnitFunctorV2
<
CPUContext
,
double
>;
template
struct
GRUUnitFunctorV2
<
CPUContext
,
double
>;
template
struct
GRUUnitGradFunctorV2
<
CPUContext
,
float
>;
template
struct
GRUUnitGradFunctorV2
<
CPUContext
,
float
>;
...
...
paddle/phi/kernels/funcs/lstm_compute.cc
浏览文件 @
09096aeb
...
@@ -21,38 +21,6 @@ limitations under the License. */
...
@@ -21,38 +21,6 @@ limitations under the License. */
namespace
phi
{
namespace
phi
{
namespace
funcs
{
namespace
funcs
{
template
<
class
T
>
struct
LstmUnitFunctor
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
{
static
void
compute
(
const
paddle
::
platform
::
CPUDeviceContext
&
context
,
LstmMetaValue
<
T
>
value
,
int
frame_size
,
int
batch_size
,
T
cell_clip
,
const
phi
::
funcs
::
detail
::
ActivationType
&
gate_act
,
const
phi
::
funcs
::
detail
::
ActivationType
&
cell_act
,
const
phi
::
funcs
::
detail
::
ActivationType
&
cand_act
,
bool
old_api_version
=
true
)
{
for
(
int
b
=
0
;
b
<
batch_size
;
b
++
)
{
detail
::
cpu_lstm_forward
(
context
,
phi
::
funcs
::
detail
::
forward
::
lstm
<
T
>
(),
value
,
frame_size
,
cell_clip
,
cand_act
,
gate_act
,
cell_act
,
old_api_version
);
value
.
gate_value
+=
frame_size
*
4
;
value
.
state_value
+=
frame_size
;
value
.
state_active_value
+=
frame_size
;
value
.
output_value
+=
frame_size
;
if
(
value
.
prev_state_value
)
{
value
.
prev_state_value
+=
frame_size
;
}
}
}
};
template
<
class
T
>
template
<
class
T
>
struct
LstmUnitFunctor
<
CPUContext
,
T
>
{
struct
LstmUnitFunctor
<
CPUContext
,
T
>
{
static
void
compute
(
const
CPUContext
&
context
,
static
void
compute
(
const
CPUContext
&
context
,
...
@@ -85,49 +53,6 @@ struct LstmUnitFunctor<CPUContext, T> {
...
@@ -85,49 +53,6 @@ struct LstmUnitFunctor<CPUContext, T> {
}
}
};
};
template
<
class
T
>
struct
LstmUnitGradFunctor
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
{
static
void
compute
(
const
paddle
::
platform
::
CPUDeviceContext
&
context
,
LstmMetaValue
<
T
>
value
,
LstmMetaGrad
<
T
>
grad
,
int
frame_size
,
int
batch_size
,
T
cell_clip
,
const
phi
::
funcs
::
detail
::
ActivationType
&
gate_act
,
const
phi
::
funcs
::
detail
::
ActivationType
&
cell_act
,
const
phi
::
funcs
::
detail
::
ActivationType
&
cand_act
,
bool
old_api_version
=
true
)
{
for
(
int
b
=
0
;
b
<
batch_size
;
b
++
)
{
detail
::
cpu_lstm_backward
(
context
,
phi
::
funcs
::
detail
::
backward
::
lstm
<
T
>
(),
value
,
grad
,
frame_size
,
cell_clip
,
cand_act
,
gate_act
,
cell_act
,
old_api_version
);
value
.
gate_value
+=
frame_size
*
4
;
value
.
state_value
+=
frame_size
;
value
.
state_active_value
+=
frame_size
;
value
.
output_value
+=
frame_size
;
if
(
value
.
prev_state_value
)
{
value
.
prev_state_value
+=
frame_size
;
}
grad
.
gate_grad
+=
frame_size
*
4
;
grad
.
state_grad
+=
frame_size
;
grad
.
state_active_grad
+=
frame_size
;
grad
.
output_grad
+=
frame_size
;
if
(
grad
.
prev_state_grad
)
{
grad
.
prev_state_grad
+=
frame_size
;
}
}
}
};
template
<
class
T
>
template
<
class
T
>
struct
LstmUnitGradFunctor
<
CPUContext
,
T
>
{
struct
LstmUnitGradFunctor
<
CPUContext
,
T
>
{
static
void
compute
(
const
CPUContext
&
context
,
static
void
compute
(
const
CPUContext
&
context
,
...
@@ -171,11 +96,6 @@ struct LstmUnitGradFunctor<CPUContext, T> {
...
@@ -171,11 +96,6 @@ struct LstmUnitGradFunctor<CPUContext, T> {
}
}
};
};
template
class
LstmUnitFunctor
<
paddle
::
platform
::
CPUDeviceContext
,
float
>;
template
class
LstmUnitFunctor
<
paddle
::
platform
::
CPUDeviceContext
,
double
>;
template
class
LstmUnitGradFunctor
<
paddle
::
platform
::
CPUDeviceContext
,
float
>;
template
class
LstmUnitGradFunctor
<
paddle
::
platform
::
CPUDeviceContext
,
double
>;
template
class
LstmUnitFunctor
<
CPUContext
,
float
>;
template
class
LstmUnitFunctor
<
CPUContext
,
float
>;
template
class
LstmUnitFunctor
<
CPUContext
,
double
>;
template
class
LstmUnitFunctor
<
CPUContext
,
double
>;
template
class
LstmUnitGradFunctor
<
CPUContext
,
float
>;
template
class
LstmUnitGradFunctor
<
CPUContext
,
float
>;
...
...
paddle/phi/kernels/funcs/math_function.cc
浏览文件 @
09096aeb
...
@@ -39,22 +39,6 @@ namespace funcs {
...
@@ -39,22 +39,6 @@ namespace funcs {
using
float16
=
phi
::
dtype
::
float16
;
using
float16
=
phi
::
dtype
::
float16
;
template
struct
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
phi
::
dtype
::
float16
>;
template
struct
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
phi
::
dtype
::
bfloat16
>;
template
struct
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
float
>;
template
struct
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
double
>;
template
struct
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
int16_t
>;
template
struct
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
int
>;
template
struct
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
>;
template
struct
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
bool
>;
template
struct
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
uint8_t
>;
template
struct
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
phi
::
dtype
::
complex
<
float
>
>
;
template
struct
SetConstant
<
paddle
::
platform
::
CPUDeviceContext
,
phi
::
dtype
::
complex
<
double
>
>
;
template
struct
SetConstant
<
phi
::
CPUContext
,
phi
::
dtype
::
float16
>;
template
struct
SetConstant
<
phi
::
CPUContext
,
phi
::
dtype
::
float16
>;
template
struct
SetConstant
<
phi
::
CPUContext
,
phi
::
dtype
::
bfloat16
>;
template
struct
SetConstant
<
phi
::
CPUContext
,
phi
::
dtype
::
bfloat16
>;
template
struct
SetConstant
<
phi
::
CPUContext
,
float
>;
template
struct
SetConstant
<
phi
::
CPUContext
,
float
>;
...
@@ -86,32 +70,6 @@ template struct SetConstant<paddle::platform::XPUDeviceContext,
...
@@ -86,32 +70,6 @@ template struct SetConstant<paddle::platform::XPUDeviceContext,
#endif
#endif
#define DEFINE_CPU_TRANS(RANK) \
#define DEFINE_CPU_TRANS(RANK) \
template struct Transpose<paddle::platform::CPUDeviceContext, \
phi::dtype::float16, \
RANK>; \
template struct Transpose<paddle::platform::CPUDeviceContext, \
phi::dtype::bfloat16, \
RANK>; \
template struct Transpose<paddle::platform::CPUDeviceContext, float, RANK>; \
template struct Transpose<paddle::platform::CPUDeviceContext, double, RANK>; \
template struct Transpose<paddle::platform::CPUDeviceContext, int, RANK>; \
template struct Transpose<paddle::platform::CPUDeviceContext, \
int64_t, \
RANK>; \
template struct Transpose<paddle::platform::CPUDeviceContext, bool, RANK>; \
template struct Transpose<paddle::platform::CPUDeviceContext, \
int16_t, \
RANK>; \
template struct Transpose<paddle::platform::CPUDeviceContext, \
uint8_t, \
RANK>; \
template struct Transpose<paddle::platform::CPUDeviceContext, int8_t, RANK>; \
template struct Transpose<paddle::platform::CPUDeviceContext, \
phi::dtype::complex<float>, \
RANK>; \
template struct Transpose<paddle::platform::CPUDeviceContext, \
phi::dtype::complex<double>, \
RANK>; \
template struct Transpose<phi::CPUContext, phi::dtype::float16, RANK>; \
template struct Transpose<phi::CPUContext, phi::dtype::float16, RANK>; \
template struct Transpose<phi::CPUContext, phi::dtype::bfloat16, RANK>; \
template struct Transpose<phi::CPUContext, phi::dtype::bfloat16, RANK>; \
template struct Transpose<phi::CPUContext, float, RANK>; \
template struct Transpose<phi::CPUContext, float, RANK>; \
...
@@ -164,7 +122,6 @@ void TransposeNormal<DeviceContext, T>::operator()(
...
@@ -164,7 +122,6 @@ void TransposeNormal<DeviceContext, T>::operator()(
// define transpose normal
// define transpose normal
#define DEFINE_CPU_TRANS_NORMAL(TYPE) \
#define DEFINE_CPU_TRANS_NORMAL(TYPE) \
template struct TransposeNormal<paddle::platform::CPUDeviceContext, TYPE>; \
template struct TransposeNormal<phi::CPUContext, TYPE>
template struct TransposeNormal<phi::CPUContext, TYPE>
DEFINE_CPU_TRANS_NORMAL
(
phi
::
dtype
::
float16
);
DEFINE_CPU_TRANS_NORMAL
(
phi
::
dtype
::
float16
);
...
@@ -291,6 +248,31 @@ void set_constant(const paddle::platform::DeviceContext& context,
...
@@ -291,6 +248,31 @@ void set_constant(const paddle::platform::DeviceContext& context,
#endif
#endif
}
}
template
struct
ColwiseSum
<
phi
::
CPUContext
,
float
>;
template
struct
ColwiseSum
<
phi
::
CPUContext
,
double
>;
template
struct
ColwiseSum
<
phi
::
CPUContext
,
int
>;
template
struct
ColwiseSum
<
phi
::
CPUContext
,
int64_t
>;
template
struct
RowwiseMean
<
phi
::
CPUContext
,
float
>;
template
struct
RowwiseMean
<
phi
::
CPUContext
,
double
>;
template
<
typename
T
>
struct
ElementwiseAddTo
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
{
void
operator
()(
paddle
::
platform
::
CPUDeviceContext
*
ctx
,
const
paddle
::
framework
::
Tensor
&
src
,
paddle
::
framework
::
Tensor
*
dst
)
{
auto
in
=
paddle
::
framework
::
EigenVector
<
T
>::
Flatten
(
src
);
auto
out
=
paddle
::
framework
::
EigenVector
<
T
>::
Flatten
(
*
dst
);
auto
&
place
=
*
(
ctx
->
eigen_device
());
out
.
device
(
place
)
=
out
+
in
;
}
};
template
struct
ElementwiseAddTo
<
paddle
::
platform
::
CPUDeviceContext
,
phi
::
dtype
::
float16
>;
template
struct
ElementwiseAddTo
<
paddle
::
platform
::
CPUDeviceContext
,
phi
::
dtype
::
bfloat16
>;
template
<
typename
T
>
template
<
typename
T
>
struct
RowwiseAdd
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
{
struct
RowwiseAdd
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
{
void
operator
()(
const
paddle
::
platform
::
CPUDeviceContext
&
context
,
void
operator
()(
const
paddle
::
platform
::
CPUDeviceContext
&
context
,
...
@@ -333,41 +315,5 @@ struct RowwiseAdd<paddle::platform::CPUDeviceContext, T> {
...
@@ -333,41 +315,5 @@ struct RowwiseAdd<paddle::platform::CPUDeviceContext, T> {
template
struct
RowwiseAdd
<
paddle
::
platform
::
CPUDeviceContext
,
float
>;
template
struct
RowwiseAdd
<
paddle
::
platform
::
CPUDeviceContext
,
float
>;
template
struct
RowwiseAdd
<
paddle
::
platform
::
CPUDeviceContext
,
double
>;
template
struct
RowwiseAdd
<
paddle
::
platform
::
CPUDeviceContext
,
double
>;
template
struct
ColwiseSum
<
paddle
::
platform
::
CPUDeviceContext
,
float
>;
template
struct
ColwiseSum
<
paddle
::
platform
::
CPUDeviceContext
,
double
>;
template
struct
ColwiseSum
<
paddle
::
platform
::
CPUDeviceContext
,
int
>;
template
struct
ColwiseSum
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
>;
template
struct
ColwiseSum
<
phi
::
CPUContext
,
float
>;
template
struct
ColwiseSum
<
phi
::
CPUContext
,
double
>;
template
struct
ColwiseSum
<
phi
::
CPUContext
,
int
>;
template
struct
ColwiseSum
<
phi
::
CPUContext
,
int64_t
>;
template
struct
RowwiseSum
<
paddle
::
platform
::
CPUDeviceContext
,
float
>;
template
struct
RowwiseSum
<
paddle
::
platform
::
CPUDeviceContext
,
double
>;
template
struct
RowwiseMean
<
paddle
::
platform
::
CPUDeviceContext
,
float
>;
template
struct
RowwiseMean
<
paddle
::
platform
::
CPUDeviceContext
,
double
>;
template
struct
RowwiseMean
<
phi
::
CPUContext
,
float
>;
template
struct
RowwiseMean
<
phi
::
CPUContext
,
double
>;
template
<
typename
T
>
struct
ElementwiseAddTo
<
paddle
::
platform
::
CPUDeviceContext
,
T
>
{
void
operator
()(
paddle
::
platform
::
CPUDeviceContext
*
ctx
,
const
paddle
::
framework
::
Tensor
&
src
,
paddle
::
framework
::
Tensor
*
dst
)
{
auto
in
=
paddle
::
framework
::
EigenVector
<
T
>::
Flatten
(
src
);
auto
out
=
paddle
::
framework
::
EigenVector
<
T
>::
Flatten
(
*
dst
);
auto
&
place
=
*
(
ctx
->
eigen_device
());
out
.
device
(
place
)
=
out
+
in
;
}
};
template
struct
ElementwiseAddTo
<
paddle
::
platform
::
CPUDeviceContext
,
phi
::
dtype
::
float16
>;
template
struct
ElementwiseAddTo
<
paddle
::
platform
::
CPUDeviceContext
,
phi
::
dtype
::
bfloat16
>;
}
// namespace funcs
}
// namespace funcs
}
// namespace phi
}
// namespace phi
paddle/phi/kernels/funcs/matrix_inverse.cc
浏览文件 @
09096aeb
...
@@ -29,9 +29,5 @@ void MatrixInverseFunctor<Context, T>::operator()(const Context& dev_ctx,
...
@@ -29,9 +29,5 @@ void MatrixInverseFunctor<Context, T>::operator()(const Context& dev_ctx,
template
class
MatrixInverseFunctor
<
CPUContext
,
float
>;
template
class
MatrixInverseFunctor
<
CPUContext
,
float
>;
template
class
MatrixInverseFunctor
<
CPUContext
,
double
>;
template
class
MatrixInverseFunctor
<
CPUContext
,
double
>;
// TODO(chenweihang): remove these instantiations later
template
class
MatrixInverseFunctor
<
paddle
::
platform
::
CPUDeviceContext
,
float
>;
template
class
MatrixInverseFunctor
<
paddle
::
platform
::
CPUDeviceContext
,
double
>;
}
// namespace funcs
}
// namespace funcs
}
// namespace phi
}
// namespace phi
paddle/phi/tests/api/test_sparse_utils_api.cc
浏览文件 @
09096aeb
...
@@ -48,7 +48,6 @@ TEST(API, to_sparse_coo) {
...
@@ -48,7 +48,6 @@ TEST(API, to_sparse_coo) {
std
::
copy
(
&
dense_data
[
0
][
0
],
&
dense_data
[
0
][
0
]
+
9
,
dense_x_data
);
std
::
copy
(
&
dense_data
[
0
][
0
],
&
dense_data
[
0
][
0
]
+
9
,
dense_x_data
);
phi
::
CPUContext
dev_ctx_cpu
;
phi
::
CPUContext
dev_ctx_cpu
;
dev_ctx_cpu
.
Init
();
// 1. test dense_to_sparse_coo
// 1. test dense_to_sparse_coo
paddle
::
experimental
::
Tensor
x
(
dense_x
);
paddle
::
experimental
::
Tensor
x
(
dense_x
);
...
...
paddle/phi/tests/common/test_scalar.cu
浏览文件 @
09096aeb
...
@@ -47,7 +47,6 @@ TEST(Scalar, ConstructFromDenseTensor1) {
...
@@ -47,7 +47,6 @@ TEST(Scalar, ConstructFromDenseTensor1) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
phi
::
CPUPlace
())
.
GetAllocator
(
phi
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
float16
>
(
&
dense_x
);
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
float16
>
(
&
dense_x
);
dense_x_data
[
0
]
=
1
;
dense_x_data
[
0
]
=
1
;
...
@@ -67,7 +66,6 @@ TEST(Scalar, ConstructFromDenseTensor2) {
...
@@ -67,7 +66,6 @@ TEST(Scalar, ConstructFromDenseTensor2) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
phi
::
CPUPlace
())
.
GetAllocator
(
phi
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
int16_t
>
(
&
dense_x
);
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
int16_t
>
(
&
dense_x
);
dense_x_data
[
0
]
=
1
;
dense_x_data
[
0
]
=
1
;
...
@@ -87,7 +85,6 @@ TEST(Scalar, ConstructFromDenseTensor3) {
...
@@ -87,7 +85,6 @@ TEST(Scalar, ConstructFromDenseTensor3) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
phi
::
CPUPlace
())
.
GetAllocator
(
phi
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
int8_t
>
(
&
dense_x
);
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
int8_t
>
(
&
dense_x
);
dense_x_data
[
0
]
=
1
;
dense_x_data
[
0
]
=
1
;
...
@@ -107,7 +104,6 @@ TEST(Scalar, ConstructFromDenseTensor4) {
...
@@ -107,7 +104,6 @@ TEST(Scalar, ConstructFromDenseTensor4) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
phi
::
CPUPlace
())
.
GetAllocator
(
phi
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
bool
>
(
&
dense_x
);
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
bool
>
(
&
dense_x
);
dense_x_data
[
0
]
=
true
;
dense_x_data
[
0
]
=
true
;
...
@@ -127,7 +123,6 @@ TEST(Scalar, ConstructFromDenseTensor5) {
...
@@ -127,7 +123,6 @@ TEST(Scalar, ConstructFromDenseTensor5) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
phi
::
CPUPlace
())
.
GetAllocator
(
phi
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
complex64
>
(
&
dense_x
);
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
complex64
>
(
&
dense_x
);
dense_x_data
[
0
]
=
1
;
dense_x_data
[
0
]
=
1
;
...
@@ -148,7 +143,6 @@ TEST(Scalar, ConstructFromDenseTensor6) {
...
@@ -148,7 +143,6 @@ TEST(Scalar, ConstructFromDenseTensor6) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
phi
::
CPUPlace
())
.
GetAllocator
(
phi
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
complex128
>
(
&
dense_x
);
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
complex128
>
(
&
dense_x
);
dense_x_data
[
0
]
=
1
;
dense_x_data
[
0
]
=
1
;
...
@@ -170,7 +164,6 @@ TEST(Scalar, ConstructFromDenseTensor7) {
...
@@ -170,7 +164,6 @@ TEST(Scalar, ConstructFromDenseTensor7) {
.
GetAllocator
(
phi
::
GPUPlace
())
.
GetAllocator
(
phi
::
GPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
dev_ctx
.
Init
();
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
float
>
(
&
dense_x
);
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
float
>
(
&
dense_x
);
FillTensor
<<<
1
,
1
,
0
,
dev_ctx
.
stream
()
>>>
(
dense_x_data
);
FillTensor
<<<
1
,
1
,
0
,
dev_ctx
.
stream
()
>>>
(
dense_x_data
);
dev_ctx
.
Wait
();
dev_ctx
.
Wait
();
...
...
paddle/phi/tests/core/CMakeLists.txt
浏览文件 @
09096aeb
...
@@ -24,10 +24,6 @@ cc_test(
...
@@ -24,10 +24,6 @@ cc_test(
test_op_utils
test_op_utils
SRCS test_op_utils.cc
SRCS test_op_utils.cc
DEPS op_compat_infos
)
DEPS op_compat_infos
)
cc_test
(
test_phi_device_context
SRCS test_device_context.cc
DEPS phi_context cpu_context
)
cc_test
(
cc_test
(
test_meta_fn_utils
test_meta_fn_utils
SRCS test_meta_fn_utils.cc
SRCS test_meta_fn_utils.cc
...
...
paddle/phi/tests/core/test_device_context.cc
已删除
100644 → 0
浏览文件 @
8d9f00a8
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "gtest/gtest.h"
// TODO(wilber): will remove after the cpu, gpu context megre.
#include "paddle/phi/backends/cpu/cpu_context.h"
// #include "paddle/phi/backends/all_context.h"
// NOTE: The paddle framework should add WITH_EIGEN option to support compile
// without eigen.
#include "unsupported/Eigen/CXX11/Tensor"
namespace
phi
{
namespace
tests
{
class
InferenceCPUContext
:
public
CPUContext
{
public:
void
SetEigenDevice
(
Eigen
::
DefaultDevice
*
eigen_device
)
{
CPUContext
::
SetEigenDevice
(
eigen_device
);
}
};
TEST
(
DeviceContext
,
cpu_context
)
{
std
::
cout
<<
"test training scenarios"
<<
std
::
endl
;
{
phi
::
CPUContext
ctx
;
ctx
.
Init
();
EXPECT_TRUE
(
ctx
.
eigen_device
()
!=
nullptr
);
}
std
::
cout
<<
"test inference scenarios"
<<
std
::
endl
;
Eigen
::
DefaultDevice
*
device
=
new
Eigen
::
DefaultDevice
();
{
InferenceCPUContext
ctx
;
ctx
.
SetEigenDevice
(
device
);
EXPECT_TRUE
(
ctx
.
eigen_device
()
!=
nullptr
);
}
delete
device
;
}
}
// namespace tests
}
// namespace phi
paddle/phi/tests/kernels/test_cast_dev_api.cc
浏览文件 @
09096aeb
...
@@ -52,7 +52,6 @@ TEST(DEV_API, cast) {
...
@@ -52,7 +52,6 @@ TEST(DEV_API, cast) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
phi
::
DataType
out_dtype
=
phi
::
DataType
::
FLOAT64
;
phi
::
DataType
out_dtype
=
phi
::
DataType
::
FLOAT64
;
// 2. test API
// 2. test API
...
...
paddle/phi/tests/kernels/test_concat_dev_api.cc
浏览文件 @
09096aeb
...
@@ -60,7 +60,6 @@ TEST(DEV_API, concat) {
...
@@ -60,7 +60,6 @@ TEST(DEV_API, concat) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
out
=
phi
::
Concat
<
float
>
(
dev_ctx
,
inputs
,
0
);
auto
out
=
phi
::
Concat
<
float
>
(
dev_ctx
,
inputs
,
0
);
// 3. check result
// 3. check result
...
...
paddle/phi/tests/kernels/test_conj_dev_api.cc
浏览文件 @
09096aeb
...
@@ -48,7 +48,6 @@ TEST(DEV_API, conj) {
...
@@ -48,7 +48,6 @@ TEST(DEV_API, conj) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
// 2. test API
// 2. test API
auto
out
=
phi
::
Conj
<
paddle
::
complex64
>
(
dev_ctx
,
dense_x
);
auto
out
=
phi
::
Conj
<
paddle
::
complex64
>
(
dev_ctx
,
dense_x
);
...
...
paddle/phi/tests/kernels/test_copy_dev_api.cc
浏览文件 @
09096aeb
...
@@ -65,7 +65,6 @@ TEST(DEV_API, copy) {
...
@@ -65,7 +65,6 @@ TEST(DEV_API, copy) {
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
phi
::
Copy
(
phi
::
Copy
(
dev_ctx
,
*
(
dense_src
.
get
()),
phi
::
CPUPlace
(),
false
,
dense_dst
.
get
());
dev_ctx
,
*
(
dense_src
.
get
()),
phi
::
CPUPlace
(),
false
,
dense_dst
.
get
());
...
...
paddle/phi/tests/kernels/test_creation_dev_api.cc
浏览文件 @
09096aeb
...
@@ -36,7 +36,6 @@ TEST(DEV_API, empty) {
...
@@ -36,7 +36,6 @@ TEST(DEV_API, empty) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
// 2. test API
// 2. test API
auto
out
=
phi
::
Empty
<
int
>
(
dev_ctx
,
{
3
,
2
});
auto
out
=
phi
::
Empty
<
int
>
(
dev_ctx
,
{
3
,
2
});
...
@@ -66,7 +65,6 @@ TEST(DEV_API, empty_like) {
...
@@ -66,7 +65,6 @@ TEST(DEV_API, empty_like) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
out
=
phi
::
EmptyLike
<
float
>
(
dev_ctx
,
dense_x
);
auto
out
=
phi
::
EmptyLike
<
float
>
(
dev_ctx
,
dense_x
);
// 3. check result
// 3. check result
...
@@ -86,7 +84,6 @@ TEST(DEV_API, full) {
...
@@ -86,7 +84,6 @@ TEST(DEV_API, full) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
out
=
phi
::
Full
<
float
>
(
dev_ctx
,
{
3
,
2
},
val
);
auto
out
=
phi
::
Full
<
float
>
(
dev_ctx
,
{
3
,
2
},
val
);
// 3. check result
// 3. check result
...
@@ -119,7 +116,6 @@ TEST(DEV_API, full_like) {
...
@@ -119,7 +116,6 @@ TEST(DEV_API, full_like) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
// 2. test API
// 2. test API
auto
out
=
phi
::
FullLike
<
float
>
(
dev_ctx
,
dense_x
,
val
);
auto
out
=
phi
::
FullLike
<
float
>
(
dev_ctx
,
dense_x
,
val
);
...
...
paddle/phi/tests/kernels/test_dot_dev_api.cc
浏览文件 @
09096aeb
...
@@ -61,7 +61,6 @@ TEST(DEV_API, dot) {
...
@@ -61,7 +61,6 @@ TEST(DEV_API, dot) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
out
=
phi
::
Dot
<
float
>
(
dev_ctx
,
dense_x
,
dense_y
);
auto
out
=
phi
::
Dot
<
float
>
(
dev_ctx
,
dense_x
,
dense_y
);
// 3. check result
// 3. check result
...
...
paddle/phi/tests/kernels/test_elementwise_dev_api.cc
浏览文件 @
09096aeb
...
@@ -66,7 +66,6 @@ TEST(DEV_API, add) {
...
@@ -66,7 +66,6 @@ TEST(DEV_API, add) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
dense_out
=
phi
::
Add
<
float
>
(
dev_ctx
,
dense_x
,
dense_y
);
auto
dense_out
=
phi
::
Add
<
float
>
(
dev_ctx
,
dense_x
,
dense_y
);
// 3. check result
// 3. check result
...
@@ -118,7 +117,6 @@ TEST(DEV_API, subtract) {
...
@@ -118,7 +117,6 @@ TEST(DEV_API, subtract) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
dense_out
=
phi
::
Subtract
<
float
>
(
dev_ctx
,
dense_x
,
dense_y
);
auto
dense_out
=
phi
::
Subtract
<
float
>
(
dev_ctx
,
dense_x
,
dense_y
);
// 3. check result
// 3. check result
...
@@ -170,7 +168,6 @@ TEST(DEV_API, divide) {
...
@@ -170,7 +168,6 @@ TEST(DEV_API, divide) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
dense_out
=
phi
::
Divide
<
float
>
(
dev_ctx
,
dense_x
,
dense_y
);
auto
dense_out
=
phi
::
Divide
<
float
>
(
dev_ctx
,
dense_x
,
dense_y
);
// 3. check result
// 3. check result
...
@@ -222,7 +219,6 @@ TEST(DEV_API, multiply) {
...
@@ -222,7 +219,6 @@ TEST(DEV_API, multiply) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
dense_out
=
phi
::
Multiply
<
float
>
(
dev_ctx
,
dense_x
,
dense_y
);
auto
dense_out
=
phi
::
Multiply
<
float
>
(
dev_ctx
,
dense_x
,
dense_y
);
// 3. check result
// 3. check result
...
...
paddle/phi/tests/kernels/test_flatten_dev_api.cc
浏览文件 @
09096aeb
...
@@ -52,7 +52,6 @@ TEST(DEV_API, flatten) {
...
@@ -52,7 +52,6 @@ TEST(DEV_API, flatten) {
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
// 2. test API
// 2. test API
auto
out
=
phi
::
Flatten
<
float
>
(
dev_ctx
,
dense_x
,
start_axis
,
stop_axis
);
auto
out
=
phi
::
Flatten
<
float
>
(
dev_ctx
,
dense_x
,
start_axis
,
stop_axis
);
...
...
paddle/phi/tests/kernels/test_math_function.cc
浏览文件 @
09096aeb
...
@@ -273,7 +273,6 @@ TEST(math_funciton, set_constant) {
...
@@ -273,7 +273,6 @@ TEST(math_funciton, set_constant) {
t
.
Resize
({
10
,
10
});
t
.
Resize
({
10
,
10
});
t
.
mutable_data
<
int
>
(
paddle
::
platform
::
CPUPlace
());
t
.
mutable_data
<
int
>
(
paddle
::
platform
::
CPUPlace
());
auto
*
ctx
=
new
paddle
::
platform
::
CPUDeviceContext
();
auto
*
ctx
=
new
paddle
::
platform
::
CPUDeviceContext
();
ctx
->
Init
();
phi
::
funcs
::
set_constant
(
*
ctx
,
&
t
,
10
);
phi
::
funcs
::
set_constant
(
*
ctx
,
&
t
,
10
);
for
(
int64_t
i
=
0
;
i
<
t
.
numel
();
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
t
.
numel
();
++
i
)
{
PADDLE_ENFORCE_EQ
(
10
,
PADDLE_ENFORCE_EQ
(
10
,
...
...
paddle/phi/tests/kernels/test_matmul_dev_api.cc
浏览文件 @
09096aeb
...
@@ -58,7 +58,6 @@ TEST(DEV_API, dot) {
...
@@ -58,7 +58,6 @@ TEST(DEV_API, dot) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
out
=
Matmul
<
float
,
CPUContext
>
(
dev_ctx
,
dense_x
,
dense_y
,
false
,
false
);
auto
out
=
Matmul
<
float
,
CPUContext
>
(
dev_ctx
,
dense_x
,
dense_y
,
false
,
false
);
// 3. check result
// 3. check result
...
...
paddle/phi/tests/kernels/test_mean_dev_api.cc
浏览文件 @
09096aeb
...
@@ -51,7 +51,6 @@ TEST(DEV_API, mean) {
...
@@ -51,7 +51,6 @@ TEST(DEV_API, mean) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
out
=
phi
::
Mean
<
float
>
(
dev_ctx
,
dense_x
,
dims
,
false
);
auto
out
=
phi
::
Mean
<
float
>
(
dev_ctx
,
dense_x
,
dims
,
false
);
// 3. check result
// 3. check result
...
...
paddle/phi/tests/kernels/test_reshape_dev_api.cc
浏览文件 @
09096aeb
...
@@ -54,7 +54,6 @@ TEST(DEV_API, reshape) {
...
@@ -54,7 +54,6 @@ TEST(DEV_API, reshape) {
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
out
=
phi
::
Reshape
<
float
>
(
dev_ctx
,
dense_x
,
shape
);
auto
out
=
phi
::
Reshape
<
float
>
(
dev_ctx
,
dense_x
,
shape
);
// 3. check result
// 3. check result
std
::
vector
<
int64_t
>
expect_shape
=
{
12
,
3
};
std
::
vector
<
int64_t
>
expect_shape
=
{
12
,
3
};
...
...
paddle/phi/tests/kernels/test_scale_dev_api.cc
浏览文件 @
09096aeb
...
@@ -51,7 +51,6 @@ TEST(DEV_API, scale) {
...
@@ -51,7 +51,6 @@ TEST(DEV_API, scale) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
out
=
phi
::
Scale
<
float
>
(
dev_ctx
,
dense_x
,
scale
,
bias
,
bias_after_scale
);
auto
out
=
phi
::
Scale
<
float
>
(
dev_ctx
,
dense_x
,
scale
,
bias
,
bias_after_scale
);
...
@@ -93,7 +92,6 @@ TEST(DEV_API, scale_host) {
...
@@ -93,7 +92,6 @@ TEST(DEV_API, scale_host) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
out
=
phi
::
Scale
<
float
>
(
dev_ctx
,
dense_x
,
scale
,
bias
,
bias_after_scale
);
auto
out
=
phi
::
Scale
<
float
>
(
dev_ctx
,
dense_x
,
scale
,
bias
,
bias_after_scale
);
...
...
paddle/phi/tests/kernels/test_sparse_activation_dev_api.cc
浏览文件 @
09096aeb
...
@@ -42,7 +42,6 @@ TEST(DEV_API, sparse_relu) {
...
@@ -42,7 +42,6 @@ TEST(DEV_API, sparse_relu) {
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx_cpu
.
Init
();
DenseTensor
dense_x
=
DenseTensor
dense_x
=
phi
::
Empty
(
dev_ctx_cpu
,
phi
::
Empty
(
dev_ctx_cpu
,
...
...
paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc
浏览文件 @
09096aeb
...
@@ -75,7 +75,6 @@ void TestConv3dBase(const std::vector<IntT>& indices,
...
@@ -75,7 +75,6 @@ void TestConv3dBase(const std::vector<IntT>& indices,
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx_cpu
.
Init
();
const
int
in_channels
=
kernel_dims
[
3
];
const
int
in_channels
=
kernel_dims
[
3
];
const
int
out_channels
=
kernel_dims
[
4
];
const
int
out_channels
=
kernel_dims
[
4
];
...
...
paddle/phi/tests/kernels/test_sparse_elementwise_dev_api.cc
浏览文件 @
09096aeb
...
@@ -113,7 +113,6 @@ TEST(DEV_API, sparse_elementwise_coo_kernel_double) {
...
@@ -113,7 +113,6 @@ TEST(DEV_API, sparse_elementwise_coo_kernel_double) {
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx_cpu
.
Init
();
auto
coo_x
=
sparse
::
DenseToSparseCoo
<
T
>
(
dev_ctx_cpu
,
dense_x
,
sparse_dim
);
auto
coo_x
=
sparse
::
DenseToSparseCoo
<
T
>
(
dev_ctx_cpu
,
dense_x
,
sparse_dim
);
auto
coo_y
=
sparse
::
DenseToSparseCoo
<
T
>
(
dev_ctx_cpu
,
dense_y
,
sparse_dim
);
auto
coo_y
=
sparse
::
DenseToSparseCoo
<
T
>
(
dev_ctx_cpu
,
dense_y
,
sparse_dim
);
...
@@ -159,7 +158,6 @@ TEST(DEV_API, sparse_elementwise_csr_kernel_float) {
...
@@ -159,7 +158,6 @@ TEST(DEV_API, sparse_elementwise_csr_kernel_float) {
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx_cpu
.
Init
();
auto
csr_x
=
sparse
::
DenseToSparseCsr
<
T
>
(
dev_ctx_cpu
,
dense_x
);
auto
csr_x
=
sparse
::
DenseToSparseCsr
<
T
>
(
dev_ctx_cpu
,
dense_x
);
auto
csr_y
=
sparse
::
DenseToSparseCsr
<
T
>
(
dev_ctx_cpu
,
dense_y
);
auto
csr_y
=
sparse
::
DenseToSparseCsr
<
T
>
(
dev_ctx_cpu
,
dense_y
);
...
@@ -357,7 +355,6 @@ TEST(DEV_API, sparse_elementwise_csr_grad_kernel_float) {
...
@@ -357,7 +355,6 @@ TEST(DEV_API, sparse_elementwise_csr_grad_kernel_float) {
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx_cpu
.
Init
();
auto
csr_x
=
sparse
::
DenseToSparseCsr
<
T
>
(
dev_ctx_cpu
,
dense_x
);
auto
csr_x
=
sparse
::
DenseToSparseCsr
<
T
>
(
dev_ctx_cpu
,
dense_x
);
auto
csr_y
=
sparse
::
DenseToSparseCsr
<
T
>
(
dev_ctx_cpu
,
dense_y
);
auto
csr_y
=
sparse
::
DenseToSparseCsr
<
T
>
(
dev_ctx_cpu
,
dense_y
);
...
@@ -404,7 +401,6 @@ TEST(DEV_API, sparse_elementwise_coo_grad_kernel_double) {
...
@@ -404,7 +401,6 @@ TEST(DEV_API, sparse_elementwise_coo_grad_kernel_double) {
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx_cpu
.
Init
();
auto
csr_x
=
sparse
::
DenseToSparseCoo
<
T
>
(
dev_ctx_cpu
,
dense_x
,
sparse_dim
);
auto
csr_x
=
sparse
::
DenseToSparseCoo
<
T
>
(
dev_ctx_cpu
,
dense_x
,
sparse_dim
);
auto
csr_y
=
sparse
::
DenseToSparseCoo
<
T
>
(
dev_ctx_cpu
,
dense_y
,
sparse_dim
);
auto
csr_y
=
sparse
::
DenseToSparseCoo
<
T
>
(
dev_ctx_cpu
,
dense_y
,
sparse_dim
);
...
...
paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc
浏览文件 @
09096aeb
...
@@ -60,7 +60,6 @@ void TestMaxPoolBase(const std::vector<IntT>& indices,
...
@@ -60,7 +60,6 @@ void TestMaxPoolBase(const std::vector<IntT>& indices,
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
phi
::
CPUPlace
())
.
GetAllocator
(
phi
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx_cpu
.
Init
();
const
int
in_channels
=
x_dims
[
4
];
const
int
in_channels
=
x_dims
[
4
];
const
int
out_channels
=
in_channels
;
const
int
out_channels
=
in_channels
;
...
...
paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc
浏览文件 @
09096aeb
...
@@ -88,7 +88,6 @@ void TestDenseToSparseCoo(const DenseTensor& dense_x,
...
@@ -88,7 +88,6 @@ void TestDenseToSparseCoo(const DenseTensor& dense_x,
paddle
::
platform
::
CPUPlace
());
paddle
::
platform
::
CPUPlace
());
phi
::
CPUContext
dev_ctx_cpu
;
phi
::
CPUContext
dev_ctx_cpu
;
dev_ctx_cpu
.
Init
();
dev_ctx_cpu
.
SetAllocator
(
dev_ctx_cpu
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
phi
::
CPUPlace
())
.
GetAllocator
(
phi
::
CPUPlace
())
...
@@ -307,7 +306,6 @@ void TestSparseCsrToCoo(const DDim& dense_dims,
...
@@ -307,7 +306,6 @@ void TestSparseCsrToCoo(const DDim& dense_dims,
// 1. test cpu
// 1. test cpu
phi
::
CPUContext
dev_ctx_cpu
;
phi
::
CPUContext
dev_ctx_cpu
;
dev_ctx_cpu
.
Init
();
dev_ctx_cpu
.
SetAllocator
(
dev_ctx_cpu
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
phi
::
CPUPlace
())
.
GetAllocator
(
phi
::
CPUPlace
())
...
@@ -489,7 +487,6 @@ void TestCooToCsr(const DDim& dense_dims,
...
@@ -489,7 +487,6 @@ void TestCooToCsr(const DDim& dense_dims,
// 1. test cpu
// 1. test cpu
phi
::
CPUContext
dev_ctx_cpu
;
phi
::
CPUContext
dev_ctx_cpu
;
dev_ctx_cpu
.
Init
();
dev_ctx_cpu
.
SetAllocator
(
dev_ctx_cpu
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
phi
::
CPUPlace
())
.
GetAllocator
(
phi
::
CPUPlace
())
...
@@ -588,7 +585,6 @@ void TestDenseToSparseCsr(const DenseTensor& dense_x,
...
@@ -588,7 +585,6 @@ void TestDenseToSparseCsr(const DenseTensor& dense_x,
const
auto
alloc
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
const
auto
alloc
=
std
::
make_shared
<
paddle
::
experimental
::
DefaultAllocator
>
(
paddle
::
platform
::
CPUPlace
());
paddle
::
platform
::
CPUPlace
());
phi
::
CPUContext
dev_ctx_cpu
;
phi
::
CPUContext
dev_ctx_cpu
;
dev_ctx_cpu
.
Init
();
dev_ctx_cpu
.
SetAllocator
(
dev_ctx_cpu
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
phi
::
CPUPlace
())
.
GetAllocator
(
phi
::
CPUPlace
())
...
@@ -701,7 +697,6 @@ void TestSparseCooToDense(const DDim& dense_dims,
...
@@ -701,7 +697,6 @@ void TestSparseCooToDense(const DDim& dense_dims,
const
int64_t
non_zero_num
,
const
int64_t
non_zero_num
,
const
int64_t
sparse_dim
)
{
const
int64_t
sparse_dim
)
{
phi
::
CPUContext
dev_ctx_cpu
;
phi
::
CPUContext
dev_ctx_cpu
;
dev_ctx_cpu
.
Init
();
dev_ctx_cpu
.
SetAllocator
(
dev_ctx_cpu
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
phi
::
CPUPlace
())
.
GetAllocator
(
phi
::
CPUPlace
())
...
@@ -879,7 +874,6 @@ void TestSparseCsrToDense(const DDim& dense_dims,
...
@@ -879,7 +874,6 @@ void TestSparseCsrToDense(const DDim& dense_dims,
// 1. test cpu
// 1. test cpu
phi
::
CPUContext
dev_ctx_cpu
;
phi
::
CPUContext
dev_ctx_cpu
;
dev_ctx_cpu
.
Init
();
dev_ctx_cpu
.
SetAllocator
(
dev_ctx_cpu
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
phi
::
CPUPlace
())
.
GetAllocator
(
phi
::
CPUPlace
())
...
...
paddle/phi/tests/kernels/test_split_dev_api.cc
浏览文件 @
09096aeb
...
@@ -40,7 +40,6 @@ TEST(DEV_API, split) {
...
@@ -40,7 +40,6 @@ TEST(DEV_API, split) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
float
>
(
&
dense_x
);
auto
*
dense_x_data
=
dev_ctx
.
Alloc
<
float
>
(
&
dense_x
);
for
(
size_t
i
=
0
;
i
<
4
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
4
;
++
i
)
{
...
...
paddle/phi/tests/kernels/test_sum_dev_api.cc
浏览文件 @
09096aeb
...
@@ -49,7 +49,6 @@ TEST(DEV_API, sum) {
...
@@ -49,7 +49,6 @@ TEST(DEV_API, sum) {
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
dev_ctx
.
SetAllocator
(
paddle
::
memory
::
allocation
::
AllocatorFacade
::
Instance
()
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
GetAllocator
(
paddle
::
platform
::
CPUPlace
())
.
get
());
.
get
());
dev_ctx
.
Init
();
// 2. test API
// 2. test API
auto
out
=
auto
out
=
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录