From 2c1bba02e48cfe753531b441bb8f7c3aa1ac8ff3 Mon Sep 17 00:00:00 2001 From: QingshuChen Date: Wed, 13 Jan 2021 13:48:14 +0800 Subject: [PATCH] optimize memcpy perf for kunlun (#30291) * optimize memcpy perf for kunlun * remove useless unitest for kunlun mean * minor --- paddle/fluid/memory/memcpy.cc | 15 ++--- paddle/fluid/platform/device_context.cc | 18 +++++- .../tests/unittests/xpu/test_mean_op_xpu.py | 63 ------------------- 3 files changed, 25 insertions(+), 71 deletions(-) diff --git a/paddle/fluid/memory/memcpy.cc b/paddle/fluid/memory/memcpy.cc index 10e8bb1f4a..b17da7f69a 100644 --- a/paddle/fluid/memory/memcpy.cc +++ b/paddle/fluid/memory/memcpy.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include // for memcpy +#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/profiler.h" @@ -186,13 +187,13 @@ void Copy(platform::XPUPlace dst_place, ret)); free(tmp); } else { - int ret = xpu_memcpy(dst, src, num, XPUMemcpyKind::XPU_DEVICE_TO_DEVICE); - PADDLE_ENFORCE_EQ( - ret, XPU_SUCCESS, - platform::errors::External( - "XPU API return wrong value[%d], please check whether " - "Baidu Kunlun Card is properly installed.", - ret)); + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.GetByPlace(src_place); + dev_ctx->Wait(); + int ret = xpu::memcpy_device(dev_ctx->x_context(), dst, src, num); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, platform::errors::External( + "XPU API return wrong value[%d %s]", + ret, XPUAPIErrorMsg[ret])); } } #endif diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 57c5ccefae..fb94768984 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -172,7 +172,16 @@ Place CPUDeviceContext::GetPlace() const { return place_; } #ifdef PADDLE_WITH_XPU XPUDeviceContext::XPUDeviceContext() { context_ = xpu::create_context(); } -XPUDeviceContext::~XPUDeviceContext() { xpu::destroy_context(context_); } +XPUDeviceContext::~XPUDeviceContext() { + xpu::destroy_context(context_); + void* l3ptr = nullptr; + int l3_size = 13.5 * 1024 * 1024; + xpu_malloc(static_cast(&l3ptr), l3_size, XPU_MEM_L3); + if (l3ptr != nullptr) { + context_->_l3_mgr.set(l3ptr, l3_size); + std::cout << "set l3 size " << l3_size << std::endl; + } +} XPUDeviceContext::XPUDeviceContext(XPUPlace place) : place_(place) { int dev_id = -1; @@ -189,6 +198,13 @@ XPUDeviceContext::XPUDeviceContext(XPUPlace place) : place_(place) { "Baidu Kunlun Card is properly installed.", ret)); context_ = xpu::create_context(); + void* l3ptr = nullptr; + int l3_size = 13.5 * 1024 * 1024; + xpu_malloc(static_cast(&l3ptr), l3_size, XPU_MEM_L3); + if (l3ptr != nullptr) { + context_->_l3_mgr.set(l3ptr, l3_size); + std::cout << "set l3 size " << l3_size << std::endl; + } ret = xpu_set_device(dev_id); PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, platform::errors::External( diff --git a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py index 3ebdd110d3..bbdb0984ed 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py @@ -77,68 +77,5 @@ class TestXPUMeanOp(TestMeanOp): self.check_grad_with_place(place, ['X'], 'Out') -class TestMeanAPI(unittest.TestCase): - # test paddle.tensor.stat.mean - - def setUp(self): - self.x_shape = [2, 3, 4, 5] - self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32) - self.place = paddle.XPUPlace(0) - - def test_api_static(self): - paddle.enable_static() - with paddle.static.program_guard(paddle.static.Program()): - x = paddle.fluid.data('X', self.x_shape) - out1 = paddle.mean(x) - out2 = paddle.tensor.mean(x) - out3 = paddle.tensor.stat.mean(x) - axis = np.arange(len(self.x_shape)).tolist() - out4 = paddle.mean(x, axis) - out5 = paddle.mean(x, tuple(axis)) - - exe = paddle.static.Executor(self.place) - res = exe.run(feed={'X': self.x}, - fetch_list=[out1, out2, out3, out4, out5]) - out_ref = np.mean(self.x) - for out in res: - self.assertEqual(np.allclose(out, out_ref, rtol=1e-04), True) - - def test_api_dygraph(self): - paddle.disable_static(self.place) - - def test_case(x, axis=None, keepdim=False): - x_tensor = paddle.to_tensor(x) - out = paddle.mean(x_tensor, axis, keepdim) - if isinstance(axis, list): - axis = tuple(axis) - if len(axis) == 0: - axis = None - out_ref = np.mean(x, axis, keepdims=keepdim) - self.assertEqual( - np.allclose( - out.numpy(), out_ref, rtol=1e-04), True) - - test_case(self.x) - test_case(self.x, []) - test_case(self.x, -1) - test_case(self.x, keepdim=True) - test_case(self.x, 2, keepdim=True) - test_case(self.x, [0, 2]) - test_case(self.x, (0, 2)) - test_case(self.x, [0, 1, 2, 3]) - paddle.enable_static() - - def test_errors(self): - paddle.disable_static() - x = np.random.uniform(-1, 1, [10, 12]).astype('float32') - x = paddle.to_tensor(x) - self.assertRaises(Exception, paddle.mean, x, -3) - self.assertRaises(Exception, paddle.mean, x, 2) - paddle.enable_static() - with paddle.static.program_guard(paddle.static.Program()): - x = paddle.fluid.data('X', [10, 12], 'int32') - self.assertRaises(TypeError, paddle.mean, x) - - if __name__ == "__main__": unittest.main() -- GitLab