未验证 提交 a119686c 编写于 作者: R ronnywang 提交者: GitHub

[NPU] fix pool_op, interpolate_op (#45445)

* [NPU] fix pool_op, interpolate_op

* fix slice_op_npu

* fix test_mixed_precision_npu
上级 45a91158
......@@ -25,12 +25,14 @@ using DataLayout = framework::DataLayout;
inline static void CheckArgument(const framework::ExecutionContext& ctx) {
const std::string interp_method = ctx.Attr<std::string>("interp_method");
#if (CANN_VERSION_CODE < 512000)
bool align_corners = ctx.Attr<bool>("align_corners");
PADDLE_ENFORCE_EQ(
align_corners,
false,
platform::errors::InvalidArgument(
"NPU Interpolate Kernel has diff when align_corners is true."));
#endif
PADDLE_ENFORCE_EQ(
interp_method,
"nearest",
......
......@@ -77,6 +77,7 @@ class NPUPoolOpKernel : public framework::OpKernel<T> {
data_dims,
strides,
ksize);
#if (CANN_VERSION_CODE < 512000)
PADDLE_ENFORCE_LT(
std::max(paddings[0], paddings[1]),
ksize[0],
......@@ -91,7 +92,7 @@ class NPUPoolOpKernel : public framework::OpKernel<T> {
"Paddings should be less than %d, but max(pads[2], pads[3]) is %d.",
ksize[1],
std::max(paddings[2], paddings[3])));
#endif
if (adaptive) {
std::string pooling_mode = "AdaptiveAvgPool2d";
if (pooling_type == "max") {
......@@ -228,7 +229,7 @@ class NPUPoolGradOpKernel : public framework::OpKernel<T> {
data_dims,
strides,
ksize);
#if (CANN_VERSION_CODE < 512000)
PADDLE_ENFORCE_LT(
std::max(paddings[0], paddings[1]),
ksize[0],
......@@ -243,7 +244,7 @@ class NPUPoolGradOpKernel : public framework::OpKernel<T> {
"Paddings should be less than %d, but max(pads[2], pads[3]) is %d.",
ksize[1],
std::max(paddings[2], paddings[3])));
#endif
if (adaptive || (global_pooling && pooling_type == "max")) {
PADDLE_ENFORCE_EQ(data_dims[0] % out_data_dims[0],
0,
......
......@@ -130,9 +130,22 @@ class SliceNPUKernel : public framework::OpKernel<T> {
UpdateAttr(in_dims, axes, starts, ends, &offsets, &size);
auto stream = ctx.template device_context<NPUDeviceContext>().stream();
const auto& runner = NpuOpRunner(
"SliceD", {*input}, {*out}, {{"offsets", offsets}, {"size", size}});
auto& dev_ctx = ctx.template device_context<NPUDeviceContext>();
auto stream = dev_ctx.stream();
#if CANN_VERSION_CODE < 512000
const auto& runner =
NpuOpRunner("SliceD", {*input}, {*out}, {{"offsets", offsets}, {
"size",
size
}});
#else
NpuOpRunner runner;
runner.SetType("Slice")
.AddInput(*input)
.AddInput(std::move(offsets))
.AddInput(std::move(size))
.AddOutput(*out);
#endif
runner.Run(stream);
}
};
......
......@@ -15,18 +15,121 @@
import unittest
import sys
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.contrib.mixed_precision import fp16_utils
import paddle.nn as nn
import paddle.static as static
import numpy as np
sys.path.append("..")
import test_mixed_precision
paddle.enable_static()
class AMPTestNpu(test_mixed_precision.AMPTest):
class SimpleNet(nn.Layer):
def __init__(self, input_size, output_size):
super(SimpleNet, self).__init__()
self.linear1 = nn.Linear(input_size, output_size)
self.relu1 = nn.ReLU()
self.linear2 = nn.Linear(input_size, output_size)
self.relu2 = nn.ReLU()
self.linear3 = nn.Linear(input_size, output_size)
def forward(self, x):
x = self.linear1(x)
# currently, paddle's relu may hide nan/inf, relu(nan) = 0, relu(inf)= inf
# so, do not use it here.
#x = self.relu1(x)
x = self.linear2(x)
#x = self.relu2(x)
x = self.linear3(x)
return x
class AMPTestNpu(unittest.TestCase):
def setUp(self):
self.place = paddle.NPUPlace(0)
def net(self):
input_size = 4096
output_size = 4096
x = static.data(name='X', shape=[1000, 4096], dtype='float32')
label = static.data(name='Y', shape=[1000, 4096], dtype='float32')
model = SimpleNet(input_size, output_size) # 定义模型
mse = paddle.nn.MSELoss()
out = model(x)
loss = mse(out, label)
opt = paddle.fluid.optimizer.Adam(
learning_rate=0.0001, parameter_list=model.parameters()) # 定义优化器
opt = paddle.static.amp.decorate(opt,
init_loss_scaling=128.0,
use_dynamic_loss_scaling=True)
opt.minimize(loss)
return model, loss, opt
def test_skip_update(self):
input_size = 4096
output_size = 4096
batch_size = 1000
nums_batch = 10
startup_prog = paddle.static.Program()
main_prog = paddle.static.Program()
with static.program_guard(main_prog, startup_prog):
model, loss, opt = self.net()
weight = model.linear1.weight
moment1 = opt._optimizer._get_accumulator(
opt._optimizer._moment1_acc_str, weight)
beta_pow1 = opt._optimizer._get_accumulator(
opt._optimizer._beta1_pow_acc_str, weight)
fetch_list = [
loss, weight, moment1, beta_pow1, 'find_infinite_scale.tmp_0'
]
exe = paddle.static.Executor(self.place)
train_data = [
np.random.rand(batch_size, input_size).astype(np.float32)
for _ in range(nums_batch)
]
labels = [
np.random.rand(batch_size, output_size).astype(np.float32)
for _ in range(nums_batch)
]
weight_, moment1_, beta_pow1_ = exe.run(
startup_prog, fetch_list=[weight, moment1, beta_pow1])
pre_weight_, pre_moment1_, pre_beta_pow1_ = weight_, moment1_, beta_pow1_
for i in range(nums_batch):
if i % 2:
train_data[i][10] = np.inf
loss_, weight_, moment1_, beta_pow1_, found_inf = exe.run(
main_prog,
feed={
"X": train_data[i],
"Y": labels[i]
},
fetch_list=fetch_list)
print(loss_, weight_[0][0], moment1_[0][0], beta_pow1_,
found_inf)
if i % 2:
self.assertTrue(found_inf)
np.testing.assert_array_equal(weight_, pre_weight_)
np.testing.assert_array_equal(moment1_, pre_moment1_)
np.testing.assert_array_equal(beta_pow1_, pre_beta_pow1_)
else:
self.assertFalse(found_inf)
self.assertFalse(np.array_equal(weight_, pre_weight_))
self.assertFalse(np.array_equal(moment1_, pre_moment1_))
self.assertFalse(np.array_equal(beta_pow1_, pre_beta_pow1_))
pre_weight_, pre_moment1_, pre_beta_pow1_ = weight_, moment1_, beta_pow1_
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册