未验证 提交 ac2a94c7 编写于 作者: H houj04 提交者: GitHub

[XPU] add cumsum op. test=kunlun (#47585)

* [XPU] add cumsum op. test=kunlun

* try to fix linker. test=kunlun

* try to fix linker. test=kunlun

* try to fix linker. test=kunlun

* debug. test=kunlun

* update xpu.cmake. remove unnecessary codes. test=kunlun.
上级 eb9e4601
......@@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
if(NOT DEFINED XPU_BASE_URL)
set(XPU_BASE_URL_WITHOUT_DATE
"https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221016")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221104")
else()
set(XPU_BASE_URL "${XPU_BASE_URL}")
endif()
......@@ -19,7 +19,7 @@ endif()
if(NOT DEFINED XPU_XDNN_BASE_URL)
set(XPU_XDNN_BASE_URL_WITHOUT_DATE
"https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20221016")
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20221103")
else()
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
endif()
......
......@@ -123,6 +123,10 @@ XPUOpMap& get_kl2_ops() {
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"conv2d_transpose",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"cumsum",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::INT32, XPUPlace()),
pOpKernelType(vartype::INT64, XPUPlace())})},
{"deformable_conv_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"deformable_conv",
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/cum_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T, typename Context>
void CumsumKernel(const Context& dev_ctx,
const DenseTensor& x,
const Scalar& axis,
bool flatten,
bool exclusive,
bool reverse,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
dev_ctx.template Alloc<T>(out);
// prepare for call xdnn api
std::vector<int> x_shape = phi::vectorize<int>(x.dims());
int axis_as_int = axis.to<int>();
if (flatten) {
// flatten to 1-dim vector
x_shape = {static_cast<int>(x.numel())};
axis_as_int = 0;
} else {
// not flatten
// check axis_as_int
auto out_dims = out->dims();
PADDLE_ENFORCE_EQ(
axis_as_int < out_dims.size() && axis_as_int >= (0 - out_dims.size()),
true,
phi::errors::OutOfRange(
"Attr(axis) is out of range, It's expected "
"to be in range of [-%d, %d]. But received Attr(axis) = %d.",
out_dims.size(),
out_dims.size() - 1,
axis_as_int));
if (axis_as_int < 0) {
axis_as_int += out_dims.size();
}
}
// template<typename T> DLL_EXPORT int cumsum(Context* ctx, const T* x, T* y,
// const std::vector<int>& xshape, bool reverse, bool exclusive, int axis);
int r = cumsum(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
reinterpret_cast<XPUType*>(out->data<T>()),
x_shape,
reverse,
exclusive,
axis_as_int);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "cumsum");
}
} // namespace phi
PD_REGISTER_KERNEL(
cumsum, XPU, ALL_LAYOUT, phi::CumsumKernel, float, int, int64_t) {}
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import sys
sys.path.append("..")
import paddle
import paddle.fluid.core as core
from op_test_xpu import XPUOpTest
from xpu.get_test_cover_info import (
create_test_class,
get_xpu_op_support_types,
XPUOpTestWrapper,
)
paddle.enable_static()
class XPUTestCumsumOP(XPUOpTestWrapper):
def __init__(self):
self.op_name = 'cumsum'
self.use_dynamic_create_class = False
class TestCumsumOPBase(XPUOpTest):
def setUp(self):
self.place = paddle.XPUPlace(0)
self.xpu_version = core.get_xpu_device_version(0)
self.init_dtype()
self.set_case()
def set_case(self):
self.op_type = 'cumsum'
self.init_config()
self.data = np.random.uniform(
-100.0, 100.0, self.input_shape
).astype(self.dtype)
reference_out = np.cumsum(self.data, axis=self.axis)
self.inputs = {
'X': self.data,
}
self.attrs = {
'use_xpu': True,
'axis': self.axis,
'flatten': True if self.axis is None else False,
}
self.outputs = {'Out': reference_out}
def init_dtype(self):
self.dtype = self.in_type
def test_check_output(self):
self.check_output_with_place(self.place)
def init_config(self):
self.input_shape = (2, 5)
self.axis = None
class XPUTestCumsum1(TestCumsumOPBase):
def init_config(self):
self.input_shape = [2, 768]
self.axis = 0
class XPUTestCumsum2(TestCumsumOPBase):
def init_config(self):
self.input_shape = [3, 8, 4096]
self.axis = 1
class XPUTestCumsum3(TestCumsumOPBase):
def init_config(self):
self.input_shape = [1024]
self.axis = 0
class XPUTestCumsum4(TestCumsumOPBase):
def init_config(self):
self.input_shape = [2, 2, 255]
self.axis = -1
support_types = get_xpu_op_support_types('cumsum')
for stype in support_types:
create_test_class(globals(), XPUTestCumsumOP, stype)
if __name__ == "__main__":
unittest.main()
......@@ -570,9 +570,11 @@ if '${WITH_XPU}' == 'ON':
if os.system(command) != 0:
raise Exception("patch ${XPU_API_LIB} failed, command: %s" % command)
shutil.copy('${XPU_API_LIB}', libs_path)
shutil.copy('${XPU_RT_LIB}', libs_path)
package_data['paddle.libs']+=['${XPU_API_LIB_NAME}',
'${XPU_RT_LIB_NAME}']
package_data['paddle.libs']+=['${XPU_API_LIB_NAME}']
xpu_rt_lib_list = glob.glob('${XPU_RT_LIB}*')
for xpu_rt_lib_file in xpu_rt_lib_list:
shutil.copy(xpu_rt_lib_file, libs_path)
package_data['paddle.libs']+=[os.path.basename(xpu_rt_lib_file)]
if '${WITH_XPU_BKCL}' == 'ON':
shutil.copy('${XPU_BKCL_LIB}', libs_path)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册