From 7335b679c1192c4faae84656cdd02692e7fe452c Mon Sep 17 00:00:00 2001 From: houj04 <35131887+houj04@users.noreply.github.com> Date: Thu, 4 Aug 2022 10:35:56 +0800 Subject: [PATCH] [XPU] fleet dist_model support xpu (#44854) * [XPU] fleet dist_model support xpu. test=kunlun * [XPU] fleet dist_model support xpu. test=kunlun * move unittest file location. test=kunlun --- .../distributed/fleet_executor/dist_model.cc | 20 +++- .../xpu/test_fleet_exe_dist_model_run_xpu.py | 93 +++++++++++++++++++ 2 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_fleet_exe_dist_model_run_xpu.py diff --git a/paddle/fluid/distributed/fleet_executor/dist_model.cc b/paddle/fluid/distributed/fleet_executor/dist_model.cc index 0b46369b97..b14bc4f7ed 100644 --- a/paddle/fluid/distributed/fleet_executor/dist_model.cc +++ b/paddle/fluid/distributed/fleet_executor/dist_model.cc @@ -89,10 +89,23 @@ bool LoadDataFromDistModelTensor(const DistModelTensor &input_data, #else PADDLE_THROW(paddle::platform::errors::Fatal( "Paddle wasn't compiled with CUDA, but place is GPU.")); +#endif + } else if (platform::is_xpu_place(place)) { + VLOG(3) << "Loading data for XPU."; +#if defined(PADDLE_WITH_XPU) + auto xpu_place = place; + memory::Copy(xpu_place, + static_cast(input_tensor_ptr), + platform::CPUPlace(), + input_data.data.data(), + input_data.data.length()); +#else + PADDLE_THROW(paddle::platform::errors::Fatal( + "Paddle wasn't compiled with XPU, but place is XPU.")); #endif } else { PADDLE_THROW(paddle::platform::errors::InvalidArgument( - "DistModel only supports CPU and GPU.")); + "DistModel only supports CPU and GPU and XPU.")); } framework::LoD dst_lod; @@ -189,9 +202,12 @@ bool DistModel::PreparePlace() { place_ = paddle::platform::CUDAPlace(config_.device_id); } else if (config_.place == "CPU") { place_ = paddle::platform::CPUPlace(); + } else if (config_.place == "XPU") { + place_ = paddle::platform::XPUPlace(config_.device_id); } else { PADDLE_THROW(platform::errors::InvalidArgument( - "Place must be choosen from GPU or CPU, but got %s.", config_.place)); + "Place must be choosen from GPU or CPU or XPU, but got %s.", + config_.place)); } return true; } diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fleet_exe_dist_model_run_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fleet_exe_dist_model_run_xpu.py new file mode 100644 index 0000000000..851a5b521e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_fleet_exe_dist_model_run_xpu.py @@ -0,0 +1,93 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle +import numpy as np +import os +import tempfile +from paddle.fluid import core + +paddle.enable_static() + + +class TestDistModelRun(unittest.TestCase): + + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + + def tearDown(self): + # step 6: clean up the env, delete the saved model and params + print('cleaned up the env') + self.temp_dir.cleanup() + + def test_dist_model_run(self): + # step 0: declare folder to save the model and params + path_prefix = os.path.join(self.temp_dir.name, + "dist_model_run_test/inf") + + # step 1: saving the inference model and params + x = paddle.static.data(name='x', shape=[28, 28], dtype='float32') + y = paddle.static.data(name='y', shape=[28, 1], dtype='int64') + predict = paddle.static.nn.fc(x, 10, activation='softmax') + loss = paddle.nn.functional.cross_entropy(predict, y) + avg_loss = paddle.tensor.stat.mean(loss) + exe = paddle.static.Executor(paddle.XPUPlace(0)) + exe.run(paddle.static.default_startup_program()) + x_data = np.random.randn(28, 28).astype('float32') + y_data = np.random.randint(0, 9, size=[28, 1]).astype('int64') + exe.run(paddle.static.default_main_program(), + feed={ + 'x': x_data, + 'y': y_data + }, + fetch_list=[avg_loss]) + paddle.static.save_inference_model(path_prefix, [x, y], [avg_loss], exe) + print('save model to', path_prefix) + + # step 2: prepare fake data for the inference + x_tensor = np.random.randn(28, 28).astype('float32') + y_tensor = np.random.randint(0, 9, size=[28, 1]).astype('int64') + + # step 3: init the dist model to inference with fake data + config = core.DistModelConfig() + config.model_dir = path_prefix + config.place = 'XPU' + dist = core.DistModel(config) + dist.init() + dist_x = core.DistModelTensor(x_tensor, 'x') + dist_y = core.DistModelTensor(y_tensor, 'y') + input_data = [dist_x, dist_y] + output_rst = dist.run(input_data) + dist_model_rst = output_rst[0].as_ndarray().ravel().tolist() + print("dist model rst:", dist_model_rst) + + # step 4: use framework's api to inference with fake data + [inference_program, feed_target_names, + fetch_targets] = (paddle.static.load_inference_model(path_prefix, exe)) + results = exe.run(inference_program, + feed={ + 'x': x_tensor, + 'y': y_tensor + }, + fetch_list=fetch_targets) + load_inference_model_rst = results[0] + print("load inference model api rst:", load_inference_model_rst) + + # step 5: compare two results + self.assertTrue(np.allclose(dist_model_rst, load_inference_model_rst)) + + +if __name__ == '__main__': + unittest.main() -- GitLab