未验证 提交 8cc7146d 编写于 作者: Q Qi Li 提交者: GitHub

[NPU] add int64 kernel for slice, test=develop (#36328)

* [NPU] add int64 kernel for scale and slice, test=develop

* remove int64 for scale, test=develop
上级 e275e423
...@@ -12,11 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,11 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <memory>
#include <string>
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/operators/scale_op.h" #include "paddle/fluid/operators/scale_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -12,18 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,18 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the Licnse. */ limitations under the Licnse. */
#include <memory>
#include <string>
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/npu_op_runner.h"
#include "paddle/fluid/operators/slice_op.h" #include "paddle/fluid/operators/slice_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
using NPUDeviceContext = platform::NPUDeviceContext;
void UpdateAttr(const framework::DDim& in_dims, const std::vector<int> axes, void UpdateAttr(const framework::DDim& in_dims, const std::vector<int> axes,
const std::vector<int> starts, const std::vector<int> ends, const std::vector<int> starts, const std::vector<int> ends,
...@@ -54,7 +50,7 @@ void UpdateAttr(const framework::DDim& in_dims, const std::vector<int> axes, ...@@ -54,7 +50,7 @@ void UpdateAttr(const framework::DDim& in_dims, const std::vector<int> axes,
} }
} }
template <typename DeviceContext, typename T> template <typename T>
class SliceNPUKernel : public framework::OpKernel<T> { class SliceNPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -128,17 +124,14 @@ class SliceNPUKernel : public framework::OpKernel<T> { ...@@ -128,17 +124,14 @@ class SliceNPUKernel : public framework::OpKernel<T> {
UpdateAttr(in_dims, axes, starts, ends, &offsets, &size); UpdateAttr(in_dims, axes, starts, ends, &offsets, &size);
auto stream = ctx.template device_context<NPUDeviceContext>().stream();
const auto& runner = NpuOpRunner("SliceD", {*input}, {*out}, const auto& runner = NpuOpRunner("SliceD", {*input}, {*out},
{{"offsets", offsets}, {"size", size}}); {{"offsets", offsets}, {"size", size}});
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
runner.Run(stream); runner.Run(stream);
} }
}; };
template <typename DeviceContext, typename T> template <typename T>
class SliceGradNPUKernel : public framework::OpKernel<T> { class SliceGradNPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -221,15 +214,13 @@ class SliceGradNPUKernel : public framework::OpKernel<T> { ...@@ -221,15 +214,13 @@ class SliceGradNPUKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_NPU_KERNEL( REGISTER_OP_NPU_KERNEL(slice, ops::SliceNPUKernel<float>,
slice, ops::SliceNPUKernel<paddle::platform::NPUDeviceContext, float>, ops::SliceNPUKernel<int>,
ops::SliceNPUKernel<paddle::platform::NPUDeviceContext, int>, #ifdef PADDLE_WITH_ASCEND_INT64
ops::SliceNPUKernel<paddle::platform::NPUDeviceContext, ops::SliceNPUKernel<int64_t>,
paddle::platform::float16>); #endif
ops::SliceNPUKernel<paddle::platform::float16>);
REGISTER_OP_NPU_KERNEL(
slice_grad, REGISTER_OP_NPU_KERNEL(slice_grad, ops::SliceGradNPUKernel<float>,
ops::SliceGradNPUKernel<paddle::platform::NPUDeviceContext, float>, ops::SliceGradNPUKernel<int>,
ops::SliceGradNPUKernel<paddle::platform::NPUDeviceContext, int>, ops::SliceGradNPUKernel<paddle::platform::float16>);
ops::SliceGradNPUKernel<paddle::platform::NPUDeviceContext,
paddle::platform::float16>);
...@@ -527,5 +527,69 @@ class TestSliceOpDecsDimStartsListTensorFP16( ...@@ -527,5 +527,69 @@ class TestSliceOpDecsDimStartsListTensorFP16(
self.dtype = np.float16 self.dtype = np.float16
class TestSliceOpInt64(OpTest):
def set_npu(self):
self.__class__.use_npu = True
self.place = paddle.NPUPlace(0)
def setUp(self):
self.op_type = "slice"
self.set_npu()
self.init_dtype()
self.config()
self.inputs = {'Input': self.input}
self.outputs = {'Out': self.out}
self.attrs = {
'axes': self.axes,
'starts': self.starts,
'ends': self.ends,
'infer_flags': self.infer_flags
}
def config(self):
self.input = np.random.randint(
100, size=(3, 4, 5, 6)).astype(self.dtype)
self.starts = [1, 0, 2]
self.ends = [3, 3, 4]
self.axes = [0, 1, 2]
self.infer_flags = [1, 1, 1]
self.out = self.input[1:3, 0:3, 2:4, :]
def init_dtype(self):
self.dtype = np.int64
def test_check_output(self):
self.check_output_with_place(self.place)
class TestSliceOpTensorInt64(TestSliceOpInt64):
def setUp(self):
self.op_type = "slice"
self.set_npu()
self.init_dtype()
self.config()
self.inputs = {
'Input': self.input,
'StartsTensor': self.starts,
'EndsTensor': self.ends
}
self.outputs = {'Out': self.out}
self.attrs = {
'axes': self.axes,
'starts': [-1, -1, -1],
'ends': [-1, -1, -1],
'infer_flags': self.infer_flags
}
def config(self):
self.input = np.random.randint(
100, size=(3, 4, 5, 6)).astype(self.dtype)
self.starts = np.array([1, 0, 2]).astype('int32')
self.ends = np.array([3, 3, 4]).astype('int32')
self.axes = [0, 1, 2]
self.infer_flags = [-1, -1, -1]
self.out = self.input[1:3, 0:3, 2:4, :]
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册