未验证 提交 7335b679 编写于 作者: H houj04 提交者: GitHub

[XPU] fleet dist_model support xpu (#44854)

* [XPU] fleet dist_model support xpu. test=kunlun

* [XPU] fleet dist_model support xpu. test=kunlun

* move unittest file location. test=kunlun
上级 0e26361c
...@@ -89,10 +89,23 @@ bool LoadDataFromDistModelTensor(const DistModelTensor &input_data, ...@@ -89,10 +89,23 @@ bool LoadDataFromDistModelTensor(const DistModelTensor &input_data,
#else #else
PADDLE_THROW(paddle::platform::errors::Fatal( PADDLE_THROW(paddle::platform::errors::Fatal(
"Paddle wasn't compiled with CUDA, but place is GPU.")); "Paddle wasn't compiled with CUDA, but place is GPU."));
#endif
} else if (platform::is_xpu_place(place)) {
VLOG(3) << "Loading data for XPU.";
#if defined(PADDLE_WITH_XPU)
auto xpu_place = place;
memory::Copy(xpu_place,
static_cast<void *>(input_tensor_ptr),
platform::CPUPlace(),
input_data.data.data(),
input_data.data.length());
#else
PADDLE_THROW(paddle::platform::errors::Fatal(
"Paddle wasn't compiled with XPU, but place is XPU."));
#endif #endif
} else { } else {
PADDLE_THROW(paddle::platform::errors::InvalidArgument( PADDLE_THROW(paddle::platform::errors::InvalidArgument(
"DistModel only supports CPU and GPU.")); "DistModel only supports CPU and GPU and XPU."));
} }
framework::LoD dst_lod; framework::LoD dst_lod;
...@@ -189,9 +202,12 @@ bool DistModel::PreparePlace() { ...@@ -189,9 +202,12 @@ bool DistModel::PreparePlace() {
place_ = paddle::platform::CUDAPlace(config_.device_id); place_ = paddle::platform::CUDAPlace(config_.device_id);
} else if (config_.place == "CPU") { } else if (config_.place == "CPU") {
place_ = paddle::platform::CPUPlace(); place_ = paddle::platform::CPUPlace();
} else if (config_.place == "XPU") {
place_ = paddle::platform::XPUPlace(config_.device_id);
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"Place must be choosen from GPU or CPU, but got %s.", config_.place)); "Place must be choosen from GPU or CPU or XPU, but got %s.",
config_.place));
} }
return true; return true;
} }
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle
import numpy as np
import os
import tempfile
from paddle.fluid import core
paddle.enable_static()
class TestDistModelRun(unittest.TestCase):
def setUp(self):
self.temp_dir = tempfile.TemporaryDirectory()
def tearDown(self):
# step 6: clean up the env, delete the saved model and params
print('cleaned up the env')
self.temp_dir.cleanup()
def test_dist_model_run(self):
# step 0: declare folder to save the model and params
path_prefix = os.path.join(self.temp_dir.name,
"dist_model_run_test/inf")
# step 1: saving the inference model and params
x = paddle.static.data(name='x', shape=[28, 28], dtype='float32')
y = paddle.static.data(name='y', shape=[28, 1], dtype='int64')
predict = paddle.static.nn.fc(x, 10, activation='softmax')
loss = paddle.nn.functional.cross_entropy(predict, y)
avg_loss = paddle.tensor.stat.mean(loss)
exe = paddle.static.Executor(paddle.XPUPlace(0))
exe.run(paddle.static.default_startup_program())
x_data = np.random.randn(28, 28).astype('float32')
y_data = np.random.randint(0, 9, size=[28, 1]).astype('int64')
exe.run(paddle.static.default_main_program(),
feed={
'x': x_data,
'y': y_data
},
fetch_list=[avg_loss])
paddle.static.save_inference_model(path_prefix, [x, y], [avg_loss], exe)
print('save model to', path_prefix)
# step 2: prepare fake data for the inference
x_tensor = np.random.randn(28, 28).astype('float32')
y_tensor = np.random.randint(0, 9, size=[28, 1]).astype('int64')
# step 3: init the dist model to inference with fake data
config = core.DistModelConfig()
config.model_dir = path_prefix
config.place = 'XPU'
dist = core.DistModel(config)
dist.init()
dist_x = core.DistModelTensor(x_tensor, 'x')
dist_y = core.DistModelTensor(y_tensor, 'y')
input_data = [dist_x, dist_y]
output_rst = dist.run(input_data)
dist_model_rst = output_rst[0].as_ndarray().ravel().tolist()
print("dist model rst:", dist_model_rst)
# step 4: use framework's api to inference with fake data
[inference_program, feed_target_names,
fetch_targets] = (paddle.static.load_inference_model(path_prefix, exe))
results = exe.run(inference_program,
feed={
'x': x_tensor,
'y': y_tensor
},
fetch_list=fetch_targets)
load_inference_model_rst = results[0]
print("load inference model api rst:", load_inference_model_rst)
# step 5: compare two results
self.assertTrue(np.allclose(dist_model_rst, load_inference_model_rst))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册