未验证 提交 0a4ffbc7 编写于 作者: C Chen Weihang 提交者: GitHub

[PTen] Move dot kernel impl (#38359)

* move dot kernel impl

* remove needless cmake items
上级 ebbd3564
......@@ -20,13 +20,13 @@ limitations under the License. */
// the kernel declare statement is automatically generated according to the
// file name of the kernel, and this header file will be removed
PT_DECLARE_KERNEL(dot, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(cast, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(sign, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_KERNEL(dot, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(cast, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(sign, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(conj, GPU, ALL_LAYOUT);
......
......@@ -18,6 +18,7 @@
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/linalg.h"
#include "paddle/pten/kernels/dot_kernel.h"
#include "paddle/pten/kernels/gpu/linalg.h"
namespace pten {
......@@ -31,7 +32,7 @@ DenseTensor Dot(const ContextT& dev_ctx,
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Dot<T>(dev_ctx, x, y, &dense_out);
Dot<T, ContextT>(dev_ctx, x, y, &dense_out);
return dense_out;
}
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/pten/kernels/dot_kernel.h"
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/complex.h"
namespace pten {
template <typename T, typename ContextT>
void Dot(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
auto const *x_ptr = x.data<T>(), *x_ptr_ = &x_ptr[0];
auto const *y_ptr = y.data<T>(), *y_ptr_ = &y_ptr[0];
auto* z = out->mutable_data<T>();
// Loop over the total N elements of both operands while sum-reducing every
// B pairs along the way where B is the dimension of the least ordered axis
auto&& d = x.dims();
auto const N = x.numel();
auto const B = d[d.size() - 1];
for (int j = 0; j < N / B; j++) {
T ss = 0;
for (int i = 0; i < B; i++) ss += (*x_ptr_++) * (*y_ptr_++);
z[j] = ss;
}
}
} // namespace pten
using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>;
PT_REGISTER_CTX_KERNEL(dot,
CPU,
ALL_LAYOUT,
pten::Dot,
float,
double,
int,
int64_t,
complex64,
complex128) {}
......@@ -25,28 +25,6 @@
namespace pten {
template <typename T>
void Dot(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
auto const *x_ptr = x.data<T>(), *x_ptr_ = &x_ptr[0];
auto const *y_ptr = y.data<T>(), *y_ptr_ = &y_ptr[0];
auto* z = out->mutable_data<T>();
// Loop over the total N elements of both operands while sum-reducing every
// B pairs along the way where B is the dimension of the least ordered axis
auto&& d = x.dims();
auto const N = x.numel();
auto const B = d[d.size() - 1];
for (int j = 0; j < N / B; j++) {
T ss = 0;
for (int i = 0; i < B; i++) ss += (*x_ptr_++) * (*y_ptr_++);
z[j] = ss;
}
}
template <typename T>
void Matmul(const CPUContext& dev_ctx,
const DenseTensor& x,
......@@ -73,17 +51,6 @@ void Matmul(const CPUContext& dev_ctx,
using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>;
PT_REGISTER_KERNEL(dot,
CPU,
ALL_LAYOUT,
pten::Dot,
float,
double,
int,
int64_t,
complex64,
complex128) {}
PT_REGISTER_KERNEL(matmul,
CPU,
ALL_LAYOUT,
......
......@@ -22,12 +22,6 @@
namespace pten {
template <typename T>
void Dot(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T>
void Matmul(const CPUContext& dev_ctx,
const DenseTensor& x,
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/pten/core/dense_tensor.h"
namespace pten {
template <typename T, typename ContextT>
void Dot(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
} // namespace pten
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
#include "paddle/pten/kernels/dot_kernel.h"
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/hybird/eigen/common.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/complex.h"
namespace pten {
namespace eigen {
template <typename DevCtx, typename T>
void Dot(const DevCtx& dev_ctx,
template <typename T, typename ContextT>
void Dot(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
......@@ -46,5 +47,18 @@ void Dot(const DevCtx& dev_ctx,
}
}
} // namespace eigen
} // namespace pten
using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>;
PT_REGISTER_CTX_KERNEL(dot,
GPU,
ALL_LAYOUT,
pten::Dot,
float,
double,
int,
int64_t,
complex64,
complex128) {}
......@@ -15,7 +15,6 @@
#include "paddle/pten/kernels/gpu/linalg.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/hybird/eigen/dot.h"
#include "paddle/pten/kernels/hybird/math/matmul_func.h"
// See Note [ Why still include the fluid headers? ]
......@@ -23,14 +22,6 @@
namespace pten {
template <typename T>
void Dot(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
eigen::Dot<GPUContext, T>(dev_ctx, x, y, out);
}
template <typename T>
void Matmul(const GPUContext& dev_ctx,
const DenseTensor& x,
......@@ -58,17 +49,6 @@ using float16 = paddle::platform::float16;
using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>;
PT_REGISTER_KERNEL(dot,
GPU,
ALL_LAYOUT,
pten::Dot,
float,
double,
int,
int64_t,
complex64,
complex128) {}
PT_REGISTER_KERNEL(matmul,
GPU,
ALL_LAYOUT,
......
......@@ -22,12 +22,6 @@
namespace pten {
template <typename T>
void Dot(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T>
void Matmul(const GPUContext& dev_ctx,
const DenseTensor& x,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册