未验证 提交 d4359b0f 编写于 作者: J Jack Zhou 提交者: GitHub

add the kunlun kernel for the paddle 2.0

Add xpu kernel for KUNLUN core:

* accuracy op
* sign op
* scale op
* sum op

Add default atol in xpu unittest.
上级 4676f03c
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/metrics/accuracy_op.h"
#include "paddle/fluid/platform/xpu_header.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class AccuracyXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* inference = ctx.Input<Tensor>("Out");
auto* indices = ctx.Input<Tensor>("Indices");
auto* label = ctx.Input<Tensor>("Label");
auto* accuracy = ctx.Output<Tensor>("Accuracy");
auto* correct = ctx.Output<Tensor>("Correct");
auto* total = ctx.Output<Tensor>("Total");
int* correct_data = correct->mutable_data<int>(ctx.GetPlace());
int* total_data = total->mutable_data<int>(ctx.GetPlace());
float* accuracy_data = accuracy->mutable_data<float>(ctx.GetPlace());
const int64_t* indices_data = indices->data<int64_t>();
const int64_t* label_data = label->data<int64_t>();
size_t num_samples = inference->dims()[0];
size_t class_dim = inference->dims()[1];
if (num_samples == 0) {
return;
}
size_t indices_int32_size = num_samples * class_dim * sizeof(int);
size_t indices_int64_size = num_samples * class_dim * sizeof(int64_t);
size_t label_int32_size = num_samples * sizeof(int);
size_t label_int64_size = num_samples * sizeof(int64_t);
auto& dev_ctx = ctx.template device_context<DeviceContext>();
int* indices_int32_device = NULL;
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void**>(&indices_int32_device),
indices_int32_size),
XPU_SUCCESS,
platform::errors::ResourceExhausted(
"\n\nOut of memory error on XPU, Cannot allocate %s memory"
" on XPU. \n\nPlease check whether there is any other process "
"using XPU.\n",
string::HumanReadableSize(indices_int32_size)));
int* label_int32_device = NULL;
PADDLE_ENFORCE_EQ(
xpu_malloc(reinterpret_cast<void**>(&label_int32_device),
label_int32_size),
XPU_SUCCESS,
platform::errors::ResourceExhausted(
"\n\nOut of memory error on XPU, Cannot allocate %s memory"
" on XPU. \n\nPlease check whether there is any other process "
"using XPU.\n",
string::HumanReadableSize(label_int32_size)));
int* indices_int32_host =
reinterpret_cast<int*>(std::malloc(indices_int32_size));
int64_t* indices_int64_host =
reinterpret_cast<int64_t*>(std::malloc(indices_int64_size));
int* label_int32_host =
reinterpret_cast<int*>(std::malloc(label_int32_size));
int64_t* label_int64_host =
reinterpret_cast<int64_t*>(std::malloc(label_int64_size));
dev_ctx.Wait();
memory::Copy(platform::CPUPlace(), indices_int64_host,
BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace()),
indices_data, indices_int64_size);
memory::Copy(platform::CPUPlace(), label_int64_host,
BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace()),
label_data, label_int64_size);
for (int i = 0; i < num_samples; ++i) {
label_int32_host[i] = label_int64_host[i];
for (int j = 0; j < class_dim; ++j) {
indices_int32_host[i * class_dim + j] =
indices_int64_host[i * class_dim + j];
}
}
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace()),
indices_int32_device, platform::CPUPlace(), indices_int32_host,
indices_int32_size);
memory::Copy(BOOST_GET_CONST(platform::XPUPlace, ctx.GetPlace()),
label_int32_device, platform::CPUPlace(), label_int32_host,
label_int32_size);
int r = xpu::accuracy(dev_ctx.x_context(), indices_int32_device,
label_int32_device, num_samples, class_dim,
correct_data, total_data, accuracy_data);
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Fatal("XPU kernel error!"));
dev_ctx.Wait();
xpu_free(indices_int32_device);
xpu_free(label_int32_device);
std::free(indices_int32_host);
std::free(indices_int64_host);
std::free(label_int32_host);
std::free(label_int64_host);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
accuracy,
ops::AccuracyXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/scale_op.h"
#include <string>
#include "paddle/fluid/platform/xpu_header.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class ScaleXPUKernel : public framework::OpKernel<T> {
public:
virtual void Compute(const framework::ExecutionContext& ctx) const {
auto* in_var = ctx.InputVar("X");
auto* in = framework::GetLoDTensorOrSelectedRowsValueFromVar(*in_var);
auto scale = static_cast<T>(ctx.Attr<float>("scale"));
auto bias = static_cast<T>(ctx.Attr<float>("bias"));
auto bias_after_scale = ctx.Attr<bool>("bias_after_scale");
auto* out_var = ctx.OutputVar("Out");
if (in_var->IsType<framework::SelectedRows>() && in_var != out_var) {
auto& in_slr = in_var->Get<framework::SelectedRows>();
auto* out_slr = out_var->GetMutable<framework::SelectedRows>();
out_slr->set_rows(in_slr.rows());
out_slr->set_height(in_slr.height());
}
auto* out =
framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(out_var);
out->mutable_data<T>(in->place());
PADDLE_ENFORCE_EQ(
in->dims(), out->dims(),
platform::errors::InvalidArgument("In and out should have the same dim,"
" expected %s, but got %s.",
in->dims().to_str().c_str(),
out->dims().to_str().c_str()));
auto& dev_ctx = ctx.template device_context<DeviceContext>();
int r = xpu::scale(dev_ctx.x_context(), in->numel(), scale, bias,
bias_after_scale, in->data<float>(), out->data<float>());
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Fatal("XPU kernel error!"));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
scale, ops::ScaleXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/sign_op.h"
#include "paddle/fluid/platform/xpu_header.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class SignXPUKernel : public framework::OpKernel<T> {
public:
virtual void Compute(const framework::ExecutionContext& context) const {
auto* out = context.Output<framework::Tensor>("Out");
auto* in = context.Input<framework::Tensor>("X");
out->mutable_data<T>(in->place());
auto xpu_context = context.device_context<DeviceContext>().x_context();
int r = xpu::activation_forward(xpu_context, xpu::Activation_t::SIGN,
in->numel(), in->data<T>(), out->data<T>());
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Fatal("XPU kernel error!"));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
sign, ops::SignXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/sum_op.h"
#include <vector>
#include "paddle/fluid/platform/xpu_header.h"
namespace paddle {
namespace operators {
using framework::Tensor;
template <typename DeviceContext, typename T>
class SumXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
auto in_vars = context.MultiInputVar("X");
auto out_var = context.OutputVar("Out");
auto *out = context.Output<LoDTensor>("Out");
bool in_place = out_var == in_vars[0];
int N = in_vars.size();
PADDLE_ENFORCE_EQ(
out_var->IsType<framework::LoDTensor>(), true,
platform::errors::InvalidArgument("XPU only surpport LodTensor"));
if (!in_place) {
out->mutable_data<T>(context.GetPlace());
}
auto &dev_ctx = context.template device_context<DeviceContext>();
std::vector<const float *> ptrs(N, nullptr);
int valid_count = 0;
for (int i = 0; i < N; ++i) {
PADDLE_ENFORCE_EQ(
in_vars[i]->IsType<framework::LoDTensor>(), true,
platform::errors::InvalidArgument("XPU only surpport LodTensor"));
auto &in_t = in_vars[i]->Get<framework::LoDTensor>();
if (in_t.numel() == 0) {
continue;
}
ptrs[valid_count] = reinterpret_cast<const float *>(in_t.data<T>());
valid_count++;
}
int r = xpu::sum_batch(dev_ctx.x_context(), ptrs.data(), out->data<T>(),
valid_count, out->numel());
PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
platform::errors::Fatal("XPU kernel error!"));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
sum, ops::SumXPUKernel<paddle::platform::XPUDeviceContext, float>);
#endif
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import sys
sys.path.append("..")
from op_test import OpTest
import paddle.fluid as fluid
from paddle.fluid import compiler, Program, program_guard
import paddle
paddle.enable_static()
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUAccuracyOp(OpTest):
def setUp(self):
self.op_type = "accuracy"
self.init_dtype()
n = 8192
infer = np.random.random((n, 1)).astype(self.dtype)
indices = np.random.randint(0, 2, (n, 1)).astype('int64')
label = np.random.randint(0, 2, (n, 1)).astype('int64')
self.inputs = {'Out': infer, 'Indices': indices, "Label": label}
num_correct = 0
for rowid in range(n):
for ele in indices[rowid]:
if ele == label[rowid]:
num_correct += 1
break
self.outputs = {
'Accuracy': np.array([num_correct / float(n)]).astype(self.dtype),
'Correct': np.array([num_correct]).astype("int32"),
'Total': np.array([n]).astype("int32")
}
self.attrs = {'use_xpu': True}
def init_dtype(self):
self.dtype = np.float32
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import sys
sys.path.append("..")
from op_test import OpTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.op import Operator
import paddle
paddle.enable_static()
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUScaleOp(OpTest):
def setUp(self):
self.op_type = "scale"
self.dtype = np.float32
self.inputs = {'X': np.random.random((10, 10)).astype(self.dtype)}
self.attrs = {'scale': -2.3, 'use_xpu': True}
self.outputs = {
'Out': self.inputs['X'] * self.dtype(self.attrs['scale'])
}
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def test_check_grad(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X'], 'Out')
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import sys
sys.path.append("..")
from op_test import OpTest
import paddle
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
import paddle
paddle.enable_static()
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUSignOp(OpTest):
def setUp(self):
self.op_type = "sign"
self.dtype = np.float32
self.inputs = {
'X': np.random.uniform(-10, 10, (10, 10)).astype(self.dtype)
}
self.outputs = {'Out': np.sign(self.inputs['X'])}
self.attrs = {'use_xpu': True}
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def test_check_grad(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['X'], 'Out')
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import sys
sys.path.append("..")
from op_test import OpTest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.op import Operator
import paddle
paddle.enable_static()
@unittest.skipIf(not paddle.is_compiled_with_xpu(),
"core is not compiled with XPU")
class TestXPUSumOp(OpTest):
def setUp(self):
self.op_type = "sum"
self.use_mkldnn = False
self.init_kernel_type()
x0 = np.random.random((3, 40)).astype(self.dtype)
x1 = np.random.random((3, 40)).astype(self.dtype)
x2 = np.random.random((3, 40)).astype(self.dtype)
self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]}
y = x0 + x1 + x2
self.outputs = {'Out': y}
self.attrs = {'use_mkldnn': self.use_mkldnn, 'use_xpu': True}
def init_kernel_type(self):
self.dtype = np.float32
def test_check_output(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_output_with_place(place)
def test_check_grad(self):
if paddle.is_compiled_with_xpu():
place = paddle.XPUPlace(0)
self.check_grad_with_place(place, ['x0'], 'Out')
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册