未验证 提交 8cc7146d 编写于 作者: Q Qi Li 提交者: GitHub

[NPU] add int64 kernel for slice, test=develop (#36328)

* [NPU] add int64 kernel for scale and slice, test=develop

* remove int64 for scale, test=develop
上级 e275e423
......@@ -12,11 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory>
#include <string>
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/operators/scale_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
namespace paddle {
namespace operators {
......
......@@ -12,18 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the Licnse. */
#include <memory>
#include <string>
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/operators/slice_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using NPUDeviceContext = platform::NPUDeviceContext;
void UpdateAttr(const framework::DDim& in_dims, const std::vector<int> axes,
const std::vector<int> starts, const std::vector<int> ends,
......@@ -54,7 +50,7 @@ void UpdateAttr(const framework::DDim& in_dims, const std::vector<int> axes,
}
}
template <typename DeviceContext, typename T>
template <typename T>
class SliceNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -128,17 +124,14 @@ class SliceNPUKernel : public framework::OpKernel<T> {
UpdateAttr(in_dims, axes, starts, ends, &offsets, &size);
auto stream = ctx.template device_context<NPUDeviceContext>().stream();
const auto& runner = NpuOpRunner("SliceD", {*input}, {*out},
{{"offsets", offsets}, {"size", size}});
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
runner.Run(stream);
}
};
template <typename DeviceContext, typename T>
template <typename T>
class SliceGradNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -221,15 +214,13 @@ class SliceGradNPUKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
REGISTER_OP_NPU_KERNEL(
slice, ops::SliceNPUKernel<paddle::platform::NPUDeviceContext, float>,
ops::SliceNPUKernel<paddle::platform::NPUDeviceContext, int>,
ops::SliceNPUKernel<paddle::platform::NPUDeviceContext,
paddle::platform::float16>);
REGISTER_OP_NPU_KERNEL(
slice_grad,
ops::SliceGradNPUKernel<paddle::platform::NPUDeviceContext, float>,
ops::SliceGradNPUKernel<paddle::platform::NPUDeviceContext, int>,
ops::SliceGradNPUKernel<paddle::platform::NPUDeviceContext,
paddle::platform::float16>);
REGISTER_OP_NPU_KERNEL(slice, ops::SliceNPUKernel<float>,
ops::SliceNPUKernel<int>,
#ifdef PADDLE_WITH_ASCEND_INT64
ops::SliceNPUKernel<int64_t>,
#endif
ops::SliceNPUKernel<paddle::platform::float16>);
REGISTER_OP_NPU_KERNEL(slice_grad, ops::SliceGradNPUKernel<float>,
ops::SliceGradNPUKernel<int>,
ops::SliceGradNPUKernel<paddle::platform::float16>);
......@@ -527,5 +527,69 @@ class TestSliceOpDecsDimStartsListTensorFP16(
self.dtype = np.float16
class TestSliceOpInt64(OpTest):
def set_npu(self):
self.__class__.use_npu = True
self.place = paddle.NPUPlace(0)
def setUp(self):
self.op_type = "slice"
self.set_npu()
self.init_dtype()
self.config()
self.inputs = {'Input': self.input}
self.outputs = {'Out': self.out}
self.attrs = {
'axes': self.axes,
'starts': self.starts,
'ends': self.ends,
'infer_flags': self.infer_flags
}
def config(self):
self.input = np.random.randint(
100, size=(3, 4, 5, 6)).astype(self.dtype)
self.starts = [1, 0, 2]
self.ends = [3, 3, 4]
self.axes = [0, 1, 2]
self.infer_flags = [1, 1, 1]
self.out = self.input[1:3, 0:3, 2:4, :]
def init_dtype(self):
self.dtype = np.int64
def test_check_output(self):
self.check_output_with_place(self.place)
class TestSliceOpTensorInt64(TestSliceOpInt64):
def setUp(self):
self.op_type = "slice"
self.set_npu()
self.init_dtype()
self.config()
self.inputs = {
'Input': self.input,
'StartsTensor': self.starts,
'EndsTensor': self.ends
}
self.outputs = {'Out': self.out}
self.attrs = {
'axes': self.axes,
'starts': [-1, -1, -1],
'ends': [-1, -1, -1],
'infer_flags': self.infer_flags
}
def config(self):
self.input = np.random.randint(
100, size=(3, 4, 5, 6)).astype(self.dtype)
self.starts = np.array([1, 0, 2]).astype('int32')
self.ends = np.array([3, 3, 4]).astype('int32')
self.axes = [0, 1, 2]
self.infer_flags = [-1, -1, -1]
self.out = self.input[1:3, 0:3, 2:4, :]
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册