未验证 提交 1435b4c0 编写于 作者: L liym27 提交者: GitHub

[NPU] Support executor with NPU (#31057)

* [NPU] Support executor with NPU

* Fix code according to reviews

* Fix code

* Add unittest for sub op npu
上级 678a3e8f
......@@ -466,6 +466,14 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
#else
PADDLE_THROW(
platform::errors::Unimplemented("No XPU gc found in CPU/GPU paddle"));
#endif
} else if (platform::is_npu_place(place_)) {
#ifdef PADDLE_WITH_ASCEND_CL
// TODO(ascendrc): Support garbage collector on NPUPlace
VLOG(4) << "Skip NPU gc because it is not implemented now.";
#else
PADDLE_THROW(platform::errors::Unimplemented(
"No NPU gc found in CPU/GPU/XPU paddle"));
#endif
}
}
......
......@@ -1275,6 +1275,16 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx,
expected_kernel_key.place_ = platform::CPUPlace();
kernel_iter = kernels.find(expected_kernel_key);
}
#endif
#ifdef PADDLE_WITH_ASCEND_CL
if (kernel_iter == kernels.end() &&
is_npu_place(expected_kernel_key.place_)) {
VLOG(3) << "missing NPU kernel: " << type_
<< ", expected_kernel_key:" << expected_kernel_key
<< ", fallbacking to CPU one!";
expected_kernel_key.place_ = platform::CPUPlace();
kernel_iter = kernels.find(expected_kernel_key);
}
#endif
PADDLE_ENFORCE_NE(kernel_iter, kernels.end(),
platform::errors::NotFound(
......
......@@ -614,6 +614,9 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
const BuildStrategy &build_strategy,
ir::Graph *graph)
: member_(new ParallelExecutorPrivate(places, scope)) {
PADDLE_ENFORCE(places.size() > 0 && !is_npu_place(places[0]),
platform::errors::Unavailable(
"NPU is not supported in ParallelExecutor"));
InitP2P(places);
ir::InitReaderQueueDeviceCount(graph, *(member_->global_scope_),
member_->places_.size());
......
......@@ -101,15 +101,19 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
// TODO(zhiqiu): handle different condition like CUDA code below
else if (platform::is_npu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
auto stream = reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream();
auto stream =
reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream();
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::NPUPlace, src_place), src_ptr, size, stream);
BOOST_GET_CONST(platform::NPUPlace, src_place), src_ptr, size,
stream);
}
else if (platform::is_cpu_place(src_place) && // NOLINT
platform::is_npu_place(dst_place)) {
auto stream = reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream();
auto stream =
reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream();
memory::Copy(BOOST_GET_CONST(platform::NPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size, stream);
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size,
stream);
}
else if (platform::is_npu_place(src_place) && // NOLINT
platform::is_npu_place(dst_place)) {
......@@ -118,9 +122,11 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
<< dst_place;
return;
}
auto stream = reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream();
auto stream =
reinterpret_cast<const platform::NPUDeviceContext&>(ctx).stream();
memory::Copy(BOOST_GET_CONST(platform::NPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::NPUPlace, src_place), src_ptr, size, stream);
BOOST_GET_CONST(platform::NPUPlace, src_place), src_ptr, size,
stream);
}
else { // NOLINT
PADDLE_THROW(platform::errors::Unimplemented(
......@@ -336,24 +342,27 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
#endif
#ifdef PADDLE_WITH_ASCEND_CL
else if (platform::is_npu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
platform::is_cpu_place(dst_place)) { /* npu -> cpu*/
memory::Copy(BOOST_GET_CONST(platform::CPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::NPUPlace, src_place), src_ptr, size, nullptr);
BOOST_GET_CONST(platform::NPUPlace, src_place), src_ptr, size,
nullptr);
}
else if (platform::is_cpu_place(src_place) && // NOLINT
platform::is_npu_place(dst_place)) {
platform::is_npu_place(dst_place)) { /* cpu -> npu*/
memory::Copy(BOOST_GET_CONST(platform::NPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size, nullptr);
BOOST_GET_CONST(platform::CPUPlace, src_place), src_ptr, size,
nullptr);
}
else if (platform::is_npu_place(src_place) && // NOLINT
platform::is_npu_place(dst_place)) {
platform::is_npu_place(dst_place)) { /* npu -> npu*/
if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data sync from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(BOOST_GET_CONST(platform::NPUPlace, dst_place), dst_ptr,
BOOST_GET_CONST(platform::NPUPlace, src_place), src_ptr, size, nullptr);
BOOST_GET_CONST(platform::NPUPlace, src_place), src_ptr, size,
nullptr);
}
else { // NOLINT
PADDLE_THROW(platform::errors::Unimplemented(
......
......@@ -154,6 +154,14 @@ bool IsCompiledWithXPU() {
#endif
}
bool IsCompiledWithNPU() {
#ifndef PADDLE_WITH_ASCEND_CL
return false;
#else
return true;
#endif
}
bool IsCompiledWithMKLDNN() {
#ifndef PADDLE_WITH_MKLDNN
return false;
......@@ -567,6 +575,10 @@ PYBIND11_MODULE(core_noavx, m) {
[](Tensor &self, paddle::platform::CPUPlace &place) {
self.mutable_data<float>(place);
})
.def("_alloc_float",
[](Tensor &self, paddle::platform::NPUPlace &place) {
self.mutable_data<float>(place);
})
.def("_alloc_double",
[](Tensor &self, paddle::platform::CPUPlace &place) {
self.mutable_data<double>(place);
......@@ -611,6 +623,11 @@ PYBIND11_MODULE(core_noavx, m) {
paddle::framework::proto::VarType::Type type) {
return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
})
.def("_mutable_data",
[](Tensor &self, paddle::platform::NPUPlace &place,
paddle::framework::proto::VarType::Type type) {
return reinterpret_cast<uintptr_t>(self.mutable_data(place, type));
})
.def("_clear", &Tensor::clear)
.def("set", SetTensorFromPyArray<paddle::platform::CPUPlace>,
py::arg("array"), py::arg("place"), py::arg("zero_copy") = false)
......@@ -618,6 +635,8 @@ PYBIND11_MODULE(core_noavx, m) {
py::arg("array"), py::arg("place"), py::arg("zero_copy") = false)
.def("set", SetTensorFromPyArray<paddle::platform::CUDAPlace>,
py::arg("array"), py::arg("place"), py::arg("zero_copy") = false)
.def("set", SetTensorFromPyArray<paddle::platform::NPUPlace>,
py::arg("array"), py::arg("place"), py::arg("zero_copy") = false)
.def("set", SetTensorFromPyArray<paddle::platform::CUDAPinnedPlace>,
py::arg("array"), py::arg("place"), py::arg("zero_copy") = false,
R"DOC(
......@@ -625,7 +644,7 @@ PYBIND11_MODULE(core_noavx, m) {
Args:
lod (numpy.ndarray): The data to set.
place (CPUPlace|CUDAPlace|XPUPlace|CUDAPinnedPlace): The place where the
place (CPUPlace|CUDAPlace|XPUPlace|CUDAPinnedPlace|NPUPlace): The place where the
LoDTensor is to be set.
zero_copy (bool, optional): Whether to share memory with the input numpy array.
This parameter only works with CPUPlace. Default: False.
......@@ -1348,6 +1367,18 @@ All parameter, weight, gradient are variables in Paddle.
return new paddle::platform::XPUDeviceContext(place);
#endif
})
.def_static("create",
[](paddle::platform::NPUPlace& place)
-> paddle::platform::DeviceContext* {
#ifndef PADDLE_WITH_ASCEND_CL
PADDLE_THROW(
platform::errors::PermissionDenied(
"Cannot use NPUPlace in CPU/GPU/XPU version, "
"Please recompile or reinstall Paddle with NPU support."));
#else
return new paddle::platform::NPUDeviceContext(place);
#endif
})
.def_static("create",
[](paddle::platform::CUDAPlace& place)
-> paddle::platform::DeviceContext* {
......@@ -1448,6 +1479,7 @@ All parameter, weight, gradient are variables in Paddle.
.def("_equals", &IsSamePlace<platform::CUDAPlace, platform::CUDAPlace>)
.def("_equals", &IsSamePlace<platform::CUDAPlace, platform::CPUPlace>)
.def("_equals", &IsSamePlace<platform::CUDAPlace, platform::XPUPlace>)
.def("_equals", &IsSamePlace<platform::CUDAPlace, platform::NPUPlace>)
.def("_equals",
&IsSamePlace<platform::CUDAPlace, platform::CUDAPinnedPlace>)
.def("_get_device_id",
......@@ -1517,6 +1549,7 @@ All parameter, weight, gradient are variables in Paddle.
#ifdef PADDLE_WITH_XPU
m.def("get_xpu_device_count", platform::GetXPUDeviceCount);
#endif
py::class_<paddle::platform::CPUPlace>(m, "CPUPlace", R"DOC(
CPUPlace is a descriptor of a device.
It represents a CPU device on which a tensor will be allocated and a model will run.
......@@ -1532,6 +1565,7 @@ All parameter, weight, gradient are variables in Paddle.
.def("_type", &PlaceIndex<platform::CPUPlace>)
.def("_equals", &IsSamePlace<platform::CPUPlace, platform::Place>)
.def("_equals", &IsSamePlace<platform::CPUPlace, platform::XPUPlace>)
.def("_equals", &IsSamePlace<platform::CPUPlace, platform::NPUPlace>)
.def("_equals", &IsSamePlace<platform::CPUPlace, platform::CUDAPlace>)
.def("_equals", &IsSamePlace<platform::CPUPlace, platform::CPUPlace>)
.def("_equals",
......@@ -1569,6 +1603,8 @@ All parameter, weight, gradient are variables in Paddle.
&IsSamePlace<platform::CUDAPinnedPlace, platform::CUDAPlace>)
.def("_equals",
&IsSamePlace<platform::CUDAPinnedPlace, platform::XPUPlace>)
.def("_equals",
&IsSamePlace<platform::CUDAPinnedPlace, platform::NPUPlace>)
.def("_equals",
&IsSamePlace<platform::CUDAPinnedPlace, platform::CPUPlace>)
.def("_equals",
......@@ -1576,6 +1612,65 @@ All parameter, weight, gradient are variables in Paddle.
.def("__repr__", string::to_string<const platform::CUDAPinnedPlace &>)
.def("__str__", string::to_string<const platform::CUDAPinnedPlace &>);
// NPUPlace
py::class_<platform::NPUPlace>(m, "NPUPlace", R"DOC(
NPUPlace is a descriptor of a device.
It represents a NPU device on which a tensor will be allocated and a model will run.
Examples:
.. code-block:: python
import paddle
npu_place = paddle.NPUPlace(0)
)DOC")
.def("__init__",
[](platform::NPUPlace &self, int dev_id) {
#ifdef PADDLE_WITH_ASCEND_CL
if (UNLIKELY(dev_id < 0)) {
LOG(ERROR) << string::Sprintf(
"Invalid NPUPlace(%d), device id must be 0 or "
"positive integer",
dev_id);
std::exit(-1);
}
if (UNLIKELY(dev_id >= platform::GetNPUDeviceCount())) {
if (platform::GetNPUDeviceCount() == 0) {
LOG(ERROR) << "Cannot use NPU because there is no NPU "
"detected on your "
"machine.";
std::exit(-1);
} else {
LOG(ERROR) << string::Sprintf(
"Invalid NPUPlace(%d), must inside [0, %d), because NPU "
"number on your machine is %d",
dev_id, platform::GetNPUDeviceCount(),
platform::GetNPUDeviceCount());
std::exit(-1);
}
}
new (&self) platform::NPUPlace(dev_id);
#else
LOG(ERROR) << string::Sprintf(
"Cannot use NPU because you have installed CPU/GPU version "
"PaddlePaddle.\n"
"If you want to use NPU, please try to install NPU version "
"PaddlePaddle by: pip install paddlepaddle-xpu\n"
"If you only have CPU, please change NPUPlace(%d) to be "
"CPUPlace().\n",
dev_id);
std::exit(-1);
#endif
})
.def("_type", &PlaceIndex<platform::NPUPlace>)
.def("_equals", &IsSamePlace<platform::NPUPlace, platform::Place>)
.def("_equals", &IsSamePlace<platform::NPUPlace, platform::CUDAPlace>)
.def("_equals", &IsSamePlace<platform::NPUPlace, platform::CPUPlace>)
.def("_equals", &IsSamePlace<platform::NPUPlace, platform::XPUPlace>)
.def("_equals", &IsSamePlace<platform::NPUPlace, platform::NPUPlace>)
.def("_equals",
&IsSamePlace<platform::NPUPlace, platform::CUDAPinnedPlace>)
.def("__str__", string::to_string<const platform::NPUPlace &>);
py::class_<platform::Place>(m, "Place")
.def(py::init<>())
.def("_type", &PlaceIndex<platform::Place>)
......@@ -1583,6 +1678,7 @@ All parameter, weight, gradient are variables in Paddle.
.def("_equals", &IsSamePlace<platform::Place, platform::CUDAPlace>)
.def("_equals", &IsSamePlace<platform::Place, platform::CPUPlace>)
.def("_equals", &IsSamePlace<platform::Place, platform::XPUPlace>)
.def("_equals", &IsSamePlace<platform::Place, platform::NPUPlace>)
.def("_equals", &IsSamePlace<platform::Place, platform::CUDAPinnedPlace>)
.def("is_gpu_place",
[](platform::Place &self) { return platform::is_gpu_place(self); })
......@@ -1590,6 +1686,8 @@ All parameter, weight, gradient are variables in Paddle.
[](platform::Place &self) { return platform::is_cpu_place(self); })
.def("is_xpu_place",
[](platform::Place &self) { return platform::is_xpu_place(self); })
.def("is_npu_place",
[](platform::Place &self) { return platform::is_npu_place(self); })
.def("is_cuda_pinned_place",
[](platform::Place &self) {
return platform::is_cuda_pinned_place(self);
......@@ -1602,6 +1700,10 @@ All parameter, weight, gradient are variables in Paddle.
[](platform::Place &self) {
return BOOST_GET_CONST(platform::XPUPlace, self).device;
})
.def("npu_device_id",
[](platform::Place &self) {
return BOOST_GET_CONST(platform::NPUPlace, self).device;
})
.def("set_place", [](platform::Place &self,
const platform::Place &other) { self = other; })
.def("set_place",
......@@ -1621,6 +1723,10 @@ All parameter, weight, gradient are variables in Paddle.
const platform::CUDAPinnedPlace &cuda_pinned_place) {
self = cuda_pinned_place;
})
.def("set_place",
[](platform::Place &self, const platform::NPUPlace &npu_place) {
self = npu_place;
})
.def("__repr__", string::to_string<const platform::Place &>)
.def("__str__", string::to_string<const platform::Place &>);
......@@ -1645,6 +1751,9 @@ All parameter, weight, gradient are variables in Paddle.
.def("run",
[](OperatorBase &self, const Scope &scope,
const platform::XPUPlace &place) { self.Run(scope, place); })
.def("run",
[](OperatorBase &self, const Scope &scope,
const platform::NPUPlace &place) { self.Run(scope, place); })
.def("run",
[](OperatorBase &self, const Scope &scope,
const platform::CUDAPlace &place) { self.Run(scope, place); })
......@@ -1745,6 +1854,7 @@ All parameter, weight, gradient are variables in Paddle.
m.def("is_compiled_with_cuda", IsCompiledWithCUDA);
m.def("is_compiled_with_ascend", IsCompiledWithAscend);
m.def("is_compiled_with_npu", IsCompiledWithNPU);
m.def("is_compiled_with_xpu", IsCompiledWithXPU);
m.def("is_compiled_with_mkldnn", IsCompiledWithMKLDNN);
m.def("supports_bfloat16", SupportsBfloat16);
......
......@@ -285,6 +285,22 @@ void SetTensorFromPyArrayT(
PADDLE_THROW(platform::errors::PermissionDenied(
"Cannot use XPUPlace in CPU/GPU version, "
"Please recompile or reinstall Paddle with XPU support."));
#endif
} else if (paddle::platform::is_npu_place(place)) {
#ifdef PADDLE_WITH_ASCEND_CL
platform::Place tmp_place = place;
platform::NPUDeviceGuard guard(
BOOST_GET_CONST(platform::NPUPlace, tmp_place).device);
auto dst = self->mutable_data<T>(place);
platform::NPUMemcpySync(dst, array.data(), array.nbytes(),
ACL_MEMCPY_HOST_TO_DEVICE);
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &ctx = *pool.Get(place);
ctx.Wait();
#else
PADDLE_THROW(platform::errors::PermissionDenied(
"Cannot use NPUPlace in CPU/GPU/XPU version. "
"Please recompile or reinstall Paddle with NPU support."));
#endif
} else {
#ifdef PADDLE_WITH_CUDA
......
......@@ -232,6 +232,7 @@ from .framework import ParamAttr #DEFINE_ALIAS
from .framework import create_parameter #DEFINE_ALIAS
from .framework import CPUPlace #DEFINE_ALIAS
from .framework import CUDAPlace #DEFINE_ALIAS
from .framework import NPUPlace #DEFINE_ALIAS
from .framework import CUDAPinnedPlace #DEFINE_ALIAS
from .framework import grad #DEFINE_ALIAS
......@@ -256,6 +257,7 @@ from .device import set_device
from .device import get_device
from .device import is_compiled_with_cuda #DEFINE_ALIAS
from .device import is_compiled_with_xpu
from .device import is_compiled_with_npu
from .device import XPUPlace
# from .tensor.tensor import Tensor #DEFINE_ALIAS
# from .tensor.tensor import LoDTensor #DEFINE_ALIAS
......
......@@ -32,12 +32,28 @@ __all__ = [
# 'cuda_places',
# 'CUDAPinnedPlace',
# 'CUDAPlace',
'is_compiled_with_cuda'
'is_compiled_with_cuda',
'is_compiled_with_npu'
]
_cudnn_version = None
def is_compiled_with_npu():
"""
Whether this whl package can be used to run the model on NPU.
Returns (bool): `True` if NPU is supported, otherwise `False`.
Examples:
.. code-block:: python
import paddle
support_npu = paddle.is_compiled_with_npu()
"""
return core.is_compiled_with_npu()
def is_compiled_with_xpu():
"""
Whether paddle was built with WITH_XPU=ON to support Baidu Kunlun
......@@ -163,6 +179,7 @@ def set_device(device):
device_id = device_info_list[1]
device_id = int(device_id)
place = core.XPUPlace(device_id)
framework._set_expected_place(place)
return place
......
......@@ -68,7 +68,8 @@ from .input import embedding, one_hot
from . import distribute_lookup_table
from .param_attr import ParamAttr, WeightNormParamAttr
from .data_feeder import DataFeeder
from .core import LoDTensor, LoDTensorArray, CPUPlace, XPUPlace, CUDAPlace, CUDAPinnedPlace, Scope, _Scope
from .core import LoDTensor, LoDTensorArray, Scope, _Scope
from .core import CPUPlace, XPUPlace, CUDAPlace, CUDAPinnedPlace, NPUPlace
from .incubate import fleet
from .incubate import data_generator
from .transpiler import DistributeTranspiler, \
......@@ -124,6 +125,7 @@ __all__ = framework.__all__ + executor.__all__ + \
'XPUPlace',
'CUDAPlace',
'CUDAPinnedPlace',
'NPUPlace',
'Tensor',
'ParamAttr',
'WeightNormParamAttr',
......
......@@ -1213,6 +1213,7 @@ class Executor(object):
# In distributed training, the compiled program is saved in Program._graph
has_compiled_graph = isinstance(program._graph,
compiler.CompiledProgram)
if has_compiled_graph:
program._graph._compile(scope, self.place)
# _graph in program does not support inference since the _graph is optimized
......
......@@ -5854,7 +5854,7 @@ def _get_paddle_place(place):
if place is None:
return place
if isinstance(place, (core.Place, core.XPUPlace, core.CPUPlace,
core.CUDAPinnedPlace, core.CUDAPlace)):
core.CUDAPinnedPlace, core.CUDAPlace, core.NPUPlace)):
return place
if not isinstance(place, str):
......@@ -5864,9 +5864,11 @@ def _get_paddle_place(place):
place = place.lower()
if (place == "cpu"):
return core.CPUPlace()
if (place == "device"):
return core.Place()
# GPU
avaliable_gpu_place = re.match(r'gpu:\d+', place)
if place == "gpu_pinned" or place == "gpu" or avaliable_gpu_place:
if not core.is_compiled_with_cuda():
......@@ -5882,6 +5884,8 @@ def _get_paddle_place(place):
device_id = place_info_list[1]
device_id = int(device_id)
return core.CUDAPlace(device_id)
# XPU
avaliable_xpu_place = re.match(r'xpu:\d+', place)
if avaliable_xpu_place:
if not core.is_compiled_with_xpu():
......@@ -5892,9 +5896,22 @@ def _get_paddle_place(place):
device_id = place_info_list[1]
device_id = int(device_id)
return core.XPUPlace(device_id)
# NPU
avaliable_npu_place = re.match(r'npu:\d+', place)
if avaliable_npu_place:
if not core.is_compiled_with_npu():
raise ValueError(
"The device should not be {}, since PaddlePaddle is " \
"not compiled with NPU".format(avaliable_npu_place))
place_info_list = place.split(':', 1)
device_id = place_info_list[1]
device_id = int(device_id)
return core.NPUPlace(device_id)
raise ValueError(
"paddle support CPUPlace, CUDAPlace,CUDAPinnedPlace and XPUPlace, Please check your Place Input"
)
"Paddle supports CPUPlace, CUDAPlace,CUDAPinnedPlace, XPUPlace and NPUPlace, but received {}.".
format(place))
def _get_paddle_place_list(places):
......
......@@ -608,6 +608,10 @@ if (WITH_XPU)
add_subdirectory(xpu)
endif()
if (WITH_ASCEND_CL)
add_subdirectory(npu)
endif()
if (WITH_MKLDNN)
add_subdirectory(mkldnn)
endif()
......
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP})
endforeach(TEST_OP)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import unittest
import sys
sys.path.append("..")
from op_test import OpTest, _set_use_system_allocator
import paddle
import paddle.fluid as fluid
paddle.enable_static()
@unittest.skipIf(not paddle.is_compiled_with_npu(),
"core is not compiled with NPU")
class TestElementwiseAddOp(OpTest):
def setUp(self):
self.set_npu()
self.op_type = "elementwise_add"
self.place = paddle.NPUPlace(0)
self.init_dtype()
self.init_input_output()
self.init_kernel_type()
self.init_axis()
self.inputs = {
'X': OpTest.np_dtype_to_fluid_dtype(self.x),
'Y': OpTest.np_dtype_to_fluid_dtype(self.y)
}
self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn}
self.outputs = {'Out': self.out}
def set_npu(self):
self.__class__.use_npu = True
def init_kernel_type(self):
self.use_mkldnn = False
def init_input_output(self):
self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.out = np.add(self.x, self.y)
def init_dtype(self):
self.dtype = np.float32
def init_axis(self):
self.axis = -1
def test_check_output(self):
self.check_output_with_place(self.place, check_dygraph=False)
# TODO(ascendrc): Test grad op after it is implemented.
# def test_check_grad_normal(self):
# self.check_grad_with_place(
# self.place, ['X', 'Y'],
# 'Out',
# max_relative_error=0.006,
# check_dygraph=False)
#
# def test_check_grad_ingore_x(self):
# self.check_grad_with_place(
# self.place, ['Y'],
# 'Out',
# no_grad_set=set("X"),
# max_relative_error=0.006,
# check_dygraph=False)
#
# def test_check_grad_ingore_y(self):
# self.check_grad_with_place(
# self.place, ['X'],
# 'Out',
# no_grad_set=set("Y"),
# max_relative_error=0.006,check_dygraph=False)
@unittest.skipIf(not paddle.is_compiled_with_npu(),
"core is not compiled with NPU")
class TestAddAPI(unittest.TestCase):
def test_name(self):
with paddle.static.program_guard(paddle.static.Program()):
x = paddle.static.data(name="x", shape=[2, 3], dtype="float32")
y = paddle.static.data(name='y', shape=[2, 3], dtype='float32')
y_1 = paddle.add(x, y, name='add_res')
self.assertEqual(('add_res' in y_1.name), True)
def test_static(self):
with paddle.static.program_guard(paddle.static.Program()):
x_np = np.array([2, 3, 4]).astype('float32')
y_np = np.array([1, 5, 2]).astype('float32')
x = paddle.static.data(name="x", shape=[3], dtype='float32')
y = paddle.static.data(name="y", shape=[3], dtype='float32')
x_reshape = paddle.reshape(x, [3, 1])
y_reshape = paddle.reshape(y, [3, 1])
z = paddle.add(x_reshape, y_reshape)
z = paddle.reshape(z, shape=[3])
place = paddle.NPUPlace(0)
exe = paddle.static.Executor(place)
x_value, y_value, z_value = exe.run(feed={"x": x_np,
"y": y_np},
fetch_list=[x, y, z])
z_expected = np.array([3., 8., 6.])
self.assertEqual(
(x_value == x_np).all(),
True,
msg="x_value = {}, but expected {}".format(x_value, x_np))
self.assertEqual(
(y_value == y_np).all(),
True,
msg="y_value = {}, but expected {}".format(y_value, y_np))
self.assertEqual(
(z_value == z_expected).all(),
True,
msg="z_value = {}, but expected {}".format(z_value, z_expected))
def test_backward(self):
# TODO(ascendrc): Test backward after add grad npu op implemented.
pass
@unittest.skipIf(not paddle.is_compiled_with_npu(),
"core is not compiled with NPU")
class TestAddError(unittest.TestCase):
def test_errors(self):
with paddle.static.program_guard(paddle.static.Program()):
# the input of elementwise_add must be Variable.
x1 = fluid.create_lod_tensor(
np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.NPUPlace(0))
y1 = fluid.create_lod_tensor(
np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.NPUPlace(0))
self.assertRaises(TypeError, paddle.add, x1, y1)
# the input dtype must be float16 or float32 or float64 or int32 or int64
x2 = paddle.static.data(
name='x2', shape=[3, 4, 5, 6], dtype="uint8")
y2 = paddle.static.data(
name='y2', shape=[3, 4, 5, 6], dtype="uint8")
self.assertRaises(TypeError, paddle.add, x2, y2)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import unittest
import sys
sys.path.append("..")
from op_test import OpTest
import paddle
import paddle.fluid as fluid
paddle.enable_static()
SEED = 2021
@unittest.skipIf(not paddle.is_compiled_with_npu(),
"core is not compiled with NPU")
class TestElementwiseSubOp(OpTest):
def setUp(self):
self.set_npu()
self.op_type = "elementwise_sub"
self.place = paddle.NPUPlace(0)
self.init_dtype()
self.init_input_output()
self.init_kernel_type()
self.init_axis()
self.inputs = {
'X': OpTest.np_dtype_to_fluid_dtype(self.x),
'Y': OpTest.np_dtype_to_fluid_dtype(self.y)
}
self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn}
self.outputs = {'Out': self.out}
def set_npu(self):
self.__class__.use_npu = True
def init_kernel_type(self):
self.use_mkldnn = False
def init_input_output(self):
self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype)
self.out = np.subtract(self.x, self.y)
def init_dtype(self):
self.dtype = np.float32
def init_axis(self):
self.axis = 0
def test_check_output(self):
self.check_output_with_place(self.place, check_dygraph=False)
# TODO(ascendrc): For grad tests, OpTest raises FatalError:Segmentation fault
# when call op.run, which may be caused by system environment exception
# and the exact cause has not be located.
# def test_check_grad_normal(self):
# self.check_grad_with_place(
# self.place, ['X', 'Y'],
# 'Out',
# max_relative_error=0.006,
# check_dygraph=False)
#
# def test_check_grad_ingore_x(self):
# self.check_grad_with_place(
# self.place, ['Y'],
# 'Out',
# no_grad_set=set("X"),
# max_relative_error=0.006,
# check_dygraph=False)
#
# def test_check_grad_ingore_y(self):
# self.check_grad_with_place(
# self.place, ['X'],
# 'Out',
# no_grad_set=set("Y"),
# max_relative_error=0.006,check_dygraph=False)
@unittest.skipIf(not paddle.is_compiled_with_npu(),
"core is not compiled with NPU")
class TestSubtractAPI(unittest.TestCase):
def test_name(self):
with paddle.static.program_guard(paddle.static.Program()):
x = paddle.static.data(name="x", shape=[2, 3], dtype="float32")
y = paddle.static.data(name='y', shape=[2, 3], dtype='float32')
y_1 = paddle.subtract(x, y, name='add_res')
self.assertEqual(('add_res' in y_1.name), True)
def test_static(self):
with paddle.static.program_guard(paddle.static.Program()):
x_np = np.array([2, 3, 4]).astype('float32')
y_np = np.array([1, 5, 2]).astype('float32')
x = paddle.static.data(name="x", shape=[3], dtype='float32')
y = paddle.static.data(name="y", shape=[3], dtype='float32')
x_reshape = paddle.reshape(x, [3, 1])
y_reshape = paddle.reshape(y, [3, 1])
z = paddle.subtract(x_reshape, y_reshape)
z = paddle.reshape(z, shape=[3])
place = paddle.NPUPlace(0)
exe = paddle.static.Executor(place)
x_value, y_value, z_value = exe.run(feed={"x": x_np,
"y": y_np},
fetch_list=[x, y, z])
z_expected = np.array([1., -2., 2.])
self.assertEqual(
(x_value == x_np).all(),
True,
msg="x_value = {}, but expected {}".format(x_value, x_np))
self.assertEqual(
(y_value == y_np).all(),
True,
msg="y_value = {}, but expected {}".format(y_value, y_np))
self.assertEqual(
(z_value == z_expected).all(),
True,
msg="z_value = {}, but expected {}".format(z_value, z_expected))
@unittest.skipIf(not paddle.is_compiled_with_npu(),
"core is not compiled with NPU")
class TestSubtractError(unittest.TestCase):
def test_errors(self):
with paddle.static.program_guard(paddle.static.Program()):
# the input of elementwise_add must be Variable.
x1 = fluid.create_lod_tensor(
np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.NPUPlace(0))
y1 = fluid.create_lod_tensor(
np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.NPUPlace(0))
self.assertRaises(TypeError, paddle.subtract, x1, y1)
# the input dtype must be float16 or float32 or float64 or int32 or int64
x2 = paddle.static.data(
name='x2', shape=[3, 4, 5, 6], dtype="uint8")
y2 = paddle.static.data(
name='y2', shape=[3, 4, 5, 6], dtype="uint8")
self.assertRaises(TypeError, paddle.subtract, x2, y2)
@unittest.skipIf(not paddle.is_compiled_with_npu(),
"core is not compiled with NPU")
class TestSubtractNet(unittest.TestCase):
def _test(self, run_npu=True):
main_prog = paddle.static.Program()
startup_prog = paddle.static.Program()
main_prog.random_seed = SEED
startup_prog.random_seed = SEED
np.random.seed(SEED)
a_np = np.random.random(size=(32, 32)).astype('float32')
b_np = np.random.random(size=(32, 32)).astype('float32')
label_np = np.random.randint(2, size=(32, 1)).astype('int64')
with paddle.static.program_guard(main_prog, startup_prog):
a = paddle.static.data(name="a", shape=[32, 32], dtype='float32')
b = paddle.static.data(name="b", shape=[32, 32], dtype='float32')
label = paddle.static.data(
name="label", shape=[32, 1], dtype='int64')
sum = paddle.add(a, b)
c = paddle.assign(b)
z = paddle.subtract(sum, c)
fc_1 = fluid.layers.fc(input=z, size=128)
prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
loss = fluid.layers.reduce_mean(cost)
sgd = fluid.optimizer.SGD(learning_rate=0.01)
sgd.minimize(loss)
if run_npu:
place = paddle.NPUPlace(0)
else:
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
exe.run(startup_prog)
for epoch in range(100):
pred_res, loss_res = exe.run(
main_prog,
feed={"a": a_np,
"b": b_np,
"label": label_np},
fetch_list=[prediction, loss])
if epoch % 10 == 0:
print("Epoch {} | Prediction[0]: {}, Loss: {}".format(
epoch, pred_res[0], loss_res))
return pred_res, loss_res
def test_npu(self):
npu_pred, npu_loss = self._test(True)
cpu_pred, cpu_loos = self._test(False)
self.assertTrue(np.allclose(npu_pred, cpu_pred))
self.assertTrue(np.allclose(npu_loss, cpu_loos))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import paddle
import numpy as np
from paddle.fluid import core
paddle.enable_static()
@unittest.skipIf(not paddle.is_compiled_with_npu(),
"core is not compiled with NPU")
class TestNpuPlace(unittest.TestCase):
def test(self):
p = core.Place()
p.set_place(paddle.NPUPlace(0))
self.assertTrue(p.is_npu_place())
self.assertEqual(p.npu_device_id(), 0)
@unittest.skipIf(not paddle.is_compiled_with_npu(),
"core is not compiled with NPU")
class TestNpuPlaceError(unittest.TestCase):
def test_static(self):
# NPU is not supported in ParallelExecutor
prog = paddle.static.Program()
with paddle.static.program_guard(prog):
x_np = np.array([2, 3, 4]).astype('float32')
y_np = np.array([1, 5, 2]).astype('float32')
x = paddle.static.data(name="x", shape=[3], dtype='float32')
y = paddle.static.data(name="y", shape=[3], dtype='float32')
z = paddle.add(x, y)
compiled_prog = paddle.static.CompiledProgram(prog)
place = paddle.NPUPlace(0)
exe = paddle.static.Executor(place)
with self.assertRaisesRegex(RuntimeError,
"NPU is not supported in ParallelExecutor"):
exe.run(compiled_prog, feed={"x": x_np, "y": y_np})
if __name__ == '__main__':
unittest.main()
......@@ -243,7 +243,10 @@ class OpTest(unittest.TestCase):
np.random.seed(123)
random.seed(124)
cls._use_system_allocator = _set_use_system_allocator(True)
if paddle.is_compiled_with_npu():
cls._use_system_allocator = _set_use_system_allocator(False)
else:
cls._use_system_allocator = _set_use_system_allocator(True)
@classmethod
def tearDownClass(cls):
......@@ -272,6 +275,9 @@ class OpTest(unittest.TestCase):
def is_mkldnn_op_test():
return hasattr(cls, "use_mkldnn") and cls.use_mkldnn == True
def is_npu_op_test():
return hasattr(cls, "use_npu") and cls.use_npu == True
if not hasattr(cls, "op_type"):
raise AssertionError(
"This test do not have op_type in class attrs, "
......@@ -292,7 +298,8 @@ class OpTest(unittest.TestCase):
and cls.op_type not in op_accuracy_white_list.NO_FP64_CHECK_GRAD_OP_LIST \
and not hasattr(cls, 'exist_fp64_check_grad') \
and not is_xpu_op_test() \
and not is_mkldnn_op_test():
and not is_mkldnn_op_test() \
and not is_npu_op_test():
raise AssertionError(
"This test of %s op needs check_grad with fp64 precision." %
cls.op_type)
......@@ -1183,7 +1190,8 @@ class OpTest(unittest.TestCase):
# Check inplace for given op, its grad op, its grad_grad op, etc.
# No effect on original OpTest
# Currently not support ParallelExecutor on XPUPlace.
if not paddle.is_compiled_with_xpu():
if not paddle.is_compiled_with_xpu(
) and not paddle.is_compiled_with_npu():
self.check_inplace_output_with_place(
place, no_check_set=no_check_set, inplace_atol=inplace_atol)
......
......@@ -15,54 +15,39 @@
from __future__ import print_function
import unittest
from op_test import OpTest
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.framework as framework
import warnings
import paddle
class TestStaticDeviceManage(unittest.TestCase):
def test_cpu_device(self):
paddle.set_device('cpu')
def _test_device(self, device_name, device_class):
paddle.set_device(device_name)
out1 = paddle.zeros(shape=[1, 3], dtype='float32')
out2 = paddle.ones(shape=[1, 3], dtype='float32')
out3 = paddle.concat(x=[out1, out2], axis=0)
exe = paddle.fluid.Executor()
exe = paddle.static.Executor()
exe.run(paddle.fluid.default_startup_program())
res = exe.run(fetch_list=[out3])
device = paddle.get_device()
self.assertEqual(isinstance(exe.place, core.CPUPlace), True)
self.assertEqual(device, "cpu")
self.assertEqual(isinstance(exe.place, device_class), True)
self.assertEqual(device, device_name)
def test_cpu_device(self):
self._test_device("cpu", core.CPUPlace)
def test_gpu_device(self):
if core.is_compiled_with_cuda():
out1 = paddle.zeros(shape=[1, 3], dtype='float32')
out2 = paddle.ones(shape=[1, 3], dtype='float32')
out3 = paddle.concat(x=[out1, out2], axis=0)
paddle.set_device('gpu:0')
exe = paddle.fluid.Executor()
exe.run(paddle.fluid.default_startup_program())
res = exe.run(fetch_list=[out3])
device = paddle.get_device()
self.assertEqual(isinstance(exe.place, core.CUDAPlace), True)
self.assertEqual(device, "gpu:0")
self._test_device("gpu:0", core.CUDAPlace)
def test_xpu_device(self):
if core.is_compiled_with_xpu():
out1 = paddle.zeros(shape=[1, 3], dtype='float32')
out2 = paddle.ones(shape=[1, 3], dtype='float32')
out3 = paddle.concat(x=[out1, out2], axis=0)
paddle.set_device('xpu:0')
exe = paddle.fluid.Executor()
exe.run(paddle.fluid.default_startup_program())
res = exe.run(fetch_list=[out3])
device = paddle.get_device()
self.assertEqual(isinstance(exe.place, core.XPUPlace), True)
self.assertEqual(device, "xpu:0")
self._test_device("xpu:0", core.XPUPlace)
class TestImperativeDeviceManage(unittest.TestCase):
......
......@@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# TODO: import framework api under this directory
# TODO: import framework api under this directory
__all__ = [
'create_parameter', 'ParamAttr', 'CPUPlace', 'CUDAPlace', 'CUDAPinnedPlace',
'get_default_dtype', 'set_default_dtype'
'NPUPlace', 'get_default_dtype', 'set_default_dtype'
]
__all__ += ['grad', 'LayerList', 'load', 'save', 'no_grad', 'DataParallel']
......@@ -31,6 +31,7 @@ from ..fluid.layers.tensor import create_parameter #DEFINE_ALIAS
from ..fluid.core import CPUPlace #DEFINE_ALIAS
from ..fluid.core import CUDAPlace #DEFINE_ALIAS
from ..fluid.core import CUDAPinnedPlace #DEFINE_ALIAS
from ..fluid.core import NPUPlace #DEFINE_ALIAS
from ..fluid.core import VarBase #DEFINE_ALIAS
from paddle.fluid import core #DEFINE_ALIAS
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册