未验证 提交 127da101 编写于 作者: K Kai Song 提交者: GitHub

fix 'BlasAXPBY unimplemented' error with custom device (#48762)

* fix 'BlasAXPBY unimplemented' error with custom device

* fix utils CmakeLists bug
上级 01f5210e
...@@ -13,9 +13,12 @@ set(eager_deps ...@@ -13,9 +13,12 @@ set(eager_deps
eager_nan_inf_utils eager_nan_inf_utils
grad_node_info grad_node_info
grad_tensor_holder grad_tensor_holder
accumulation_node
custom_operator_node) custom_operator_node)
if(NOT (NOT WITH_PYTHON AND ON_INFER))
set(eager_deps ${eager_deps} accumulation_node)
endif()
set(fluid_deps set(fluid_deps
tracer tracer
layer layer
...@@ -33,9 +36,9 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) ...@@ -33,9 +36,9 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
endif() endif()
add_subdirectory(api) add_subdirectory(api)
add_subdirectory(accumulation)
add_subdirectory(custom_operator) add_subdirectory(custom_operator)
if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
add_subdirectory(accumulation)
add_subdirectory(tests) add_subdirectory(tests)
add_subdirectory(pylayer) add_subdirectory(pylayer)
cc_library( cc_library(
......
cc_library( if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library(
accumulation_node accumulation_node
SRCS accumulation_node.cc SRCS accumulation_node.cc
DEPS gradient_accumulator phi_api grad_node_info) DEPS gradient_accumulator phi_api grad_node_info)
endif()
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/accumulation/accumulation_node.h"
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/utils.h" #include "paddle/fluid/eager/utils.h"
#include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/imperative/gradient_accumulator.h"
...@@ -44,8 +45,12 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, ...@@ -44,8 +45,12 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
// Accumulation // Accumulation
if (LIKELY(t.is_dense_tensor())) { if (LIKELY(t.is_dense_tensor())) {
if (LIKELY(tensor->is_dense_tensor())) { if (LIKELY(tensor->is_dense_tensor())) {
if (t.is_custom_device()) {
*tensor = add_ad_func(t, *tensor);
} else {
paddle::imperative::TensorAdd<paddle::experimental::Tensor>(t, paddle::imperative::TensorAdd<paddle::experimental::Tensor>(t,
tensor); tensor);
}
} else { } else {
// TODO(jiabin): Support Other TensorBase later // TODO(jiabin): Support Other TensorBase later
// TODO(zhanlve): Replace SelectedRowsAddTensor with // TODO(zhanlve): Replace SelectedRowsAddTensor with
...@@ -68,9 +73,13 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, ...@@ -68,9 +73,13 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
paddle::experimental::Tensor tensor_values( paddle::experimental::Tensor tensor_values(
std::make_shared<phi::DenseTensor>( std::make_shared<phi::DenseTensor>(
tensor_sparse->non_zero_elements())); tensor_sparse->non_zero_elements()));
if (t.is_custom_device()) {
tensor_values = add_ad_func(t_values, tensor_values);
} else {
paddle::imperative::TensorAdd<paddle::experimental::Tensor>( paddle::imperative::TensorAdd<paddle::experimental::Tensor>(
t_values, &tensor_values); t_values, &tensor_values);
} }
}
} else { } else {
// TODO(jiabin): Support Other TensorBase later // TODO(jiabin): Support Other TensorBase later
// TODO(zhanlve): Replace SelectedRowsAddTensor with // TODO(zhanlve): Replace SelectedRowsAddTensor with
......
cc_library( cc_library(
global_utils
SRCS global_utils.cc
DEPS place tracer)
if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library(
tensor_utils tensor_utils
SRCS tensor_utils.cc SRCS tensor_utils.cc
DEPS phi_api autograd_meta grad_node_info accumulation_node) DEPS phi_api autograd_meta grad_node_info accumulation_node)
cc_library( cc_library(
hook_utils hook_utils
SRCS hook_utils.cc SRCS hook_utils.cc
DEPS phi tensor_utils autograd_meta grad_node_info utils accumulation_node) DEPS phi tensor_utils autograd_meta grad_node_info utils accumulation_node)
cc_library( else()
global_utils cc_library(
SRCS global_utils.cc tensor_utils
DEPS place tracer) SRCS tensor_utils.cc
DEPS phi_api autograd_meta grad_node_info)
cc_library(
hook_utils
SRCS hook_utils.cc
DEPS phi tensor_utils autograd_meta grad_node_info utils)
endif()
...@@ -2,14 +2,14 @@ cc_test_old(test_egr_ds_eager_tensor SRCS eager_tensor_test.cc DEPS ...@@ -2,14 +2,14 @@ cc_test_old(test_egr_ds_eager_tensor SRCS eager_tensor_test.cc DEPS
${eager_deps}) ${eager_deps})
cc_test_old(test_egr_ds_auotgrad_meta SRCS autograd_meta_test.cc DEPS cc_test_old(test_egr_ds_auotgrad_meta SRCS autograd_meta_test.cc DEPS
${eager_deps}) ${eager_deps})
cc_test_old(test_egr_ds_grad_node_info SRCS grad_node_info_test.cc DEPS
${eager_deps})
cc_test_old(test_egr_ds_accumulation_node SRCS accumulation_node_test.cc DEPS
${eager_deps})
cc_test_old(test_egr_ds_tensor_wrapper SRCS tensor_wrapper_test.cc DEPS
${eager_deps})
if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
cc_test_old(test_egr_ds_grad_tensor_holder SRCS grad_tensor_holder_test.cc cc_test_old(test_egr_ds_grad_tensor_holder SRCS grad_tensor_holder_test.cc
DEPS ${eager_deps} ${generated_deps}) DEPS ${eager_deps} ${generated_deps})
cc_test_old(test_egr_ds_grad_node_info SRCS grad_node_info_test.cc DEPS
${eager_deps} ${generated_deps})
cc_test_old(test_egr_ds_accumulation_node SRCS accumulation_node_test.cc DEPS
${eager_deps} ${generated_deps})
cc_test_old(test_egr_ds_tensor_wrapper SRCS tensor_wrapper_test.cc DEPS
${eager_deps} ${generated_deps})
endif() endif()
cc_test(
test_egr_task_tensor_utils
SRCS tensor_utils_test.cc
DEPS ${eager_deps})
cc_test(
test_egr_task_eager_utils
SRCS eager_utils_test.cc
DEPS ${eager_deps})
cc_test(
test_egr_task_forward_autograd
SRCS forward_autograd_test.cc
DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node)
cc_test( cc_test(
test_egr_task_nan_inf_utils test_egr_task_nan_inf_utils
SRCS nan_inf_utils_test.cc SRCS nan_inf_utils_test.cc
...@@ -44,4 +32,16 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) ...@@ -44,4 +32,16 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
test_egr_task_autocodegen test_egr_task_autocodegen
SRCS generated_test.cc SRCS generated_test.cc
DEPS ${eager_deps} ${fluid_deps} ${generated_deps}) DEPS ${eager_deps} ${fluid_deps} ${generated_deps})
cc_test(
test_egr_task_tensor_utils
SRCS tensor_utils_test.cc
DEPS ${eager_deps} ${generated_deps})
cc_test(
test_egr_task_eager_utils
SRCS eager_utils_test.cc
DEPS ${eager_deps} ${generated_deps})
cc_test(
test_egr_task_forward_autograd
SRCS forward_autograd_test.cc
DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node)
endif() endif()
...@@ -59,6 +59,7 @@ class TestCustomCPUPlugin(unittest.TestCase): ...@@ -59,6 +59,7 @@ class TestCustomCPUPlugin(unittest.TestCase):
self._test_eager_copy_to() self._test_eager_copy_to()
self._test_fallback_kernel() self._test_fallback_kernel()
self._test_scalar() self._test_scalar()
self._test_custom_device_gradient_accumulation()
self._test_custom_device_dataloader() self._test_custom_device_dataloader()
self._test_custom_device_mnist() self._test_custom_device_mnist()
...@@ -208,6 +209,60 @@ class TestCustomCPUPlugin(unittest.TestCase): ...@@ -208,6 +209,60 @@ class TestCustomCPUPlugin(unittest.TestCase):
k_t = paddle.to_tensor([3], dtype="int32") k_t = paddle.to_tensor([3], dtype="int32")
value_1, indices_1 = paddle.topk(data_1, k=k_t) value_1, indices_1 = paddle.topk(data_1, k=k_t)
def _test_custom_device_gradient_accumulation(self):
import paddle
class MNIST(paddle.nn.Layer):
def __init__(self):
super().__init__()
self.shape = 1 * 28 * 28
self.size = 10
self.output_weight = self.create_parameter(
[self.shape, self.size]
)
self.accuracy = paddle.metric.Accuracy()
def forward(self, inputs, label=None):
x = paddle.reshape(inputs, shape=[-1, self.shape])
x = paddle.matmul(x, self.output_weight)
x = paddle.nn.functional.softmax(x)
if label is not None:
self.accuracy.reset()
correct = self.accuracy.compute(x, label)
self.accuracy.update(correct)
acc = self.accuracy.accumulate()
return x, acc
else:
return x
paddle.set_device('custom_cpu')
dataset = paddle.vision.datasets.MNIST(
mode='train',
transform=paddle.vision.transforms.Compose(
[paddle.vision.transforms.ToTensor()]
),
)
loader = paddle.io.DataLoader(
dataset, batch_size=64, num_workers=1, shuffle=True
)
mnist = MNIST()
sgd = paddle.optimizer.SGD(
learning_rate=0.01, parameters=mnist.parameters()
)
data = next(loader())
img = data[0]
label = data[1]
label_int32 = paddle.cast(label, 'int32')
pred, acc = mnist(img, label_int32)
avg_loss = paddle.nn.functional.cross_entropy(pred, label_int32)
avg_loss.backward(retain_graph=True)
avg_loss = paddle.nn.functional.cross_entropy(pred, label_int32)
avg_loss.backward()
sgd.step()
if __name__ == '__main__': if __name__ == '__main__':
if os.name == 'nt' or sys.platform.startswith('darwin'): if os.name == 'nt' or sys.platform.startswith('darwin'):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册