// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/phi/backends/custom/fake_cpu_device.h" #include "paddle/phi/backends/device_manager.h" void RegisterDevice() { CustomRuntimeParams runtime_params; runtime_params.size = sizeof(CustomRuntimeParams); auto device_interface = std::make_unique(); runtime_params.interface = device_interface.get(); std::memset(runtime_params.interface, 0, sizeof(C_DeviceInterface)); runtime_params.interface->size = sizeof(C_DeviceInterface); InitFakeCPUDevice(&runtime_params); phi::LoadCustomRuntimeLib( runtime_params, std::move(device_interface), "", nullptr); } void InitDevice() { RegisterDevice(); EXPECT_GT(static_cast(phi::DeviceManager::GetAllDeviceTypes().size()), 0); auto place = paddle::platform::CustomPlace(DEVICE_TYPE, 0); auto device = phi::DeviceManager::GetDeviceWithPlace(place); EXPECT_NE(device, nullptr); std::vector places; auto device_types = phi::DeviceManager::GetAllDeviceTypes(); for (auto dev_type : device_types) { auto devices = phi::DeviceManager::GetDeviceList(dev_type); for (auto dev_id : devices) { places.push_back( paddle::platform::PlaceHelper::CreatePlace(dev_type, dev_id)); } } EXPECT_GT(static_cast(places.size()), 0); paddle::platform::DeviceContextPool::Init(places); } void TestDeviceInterface(const paddle::platform::Place& place) { std::cout << "TestDeviceInterface on " << place << std::endl; if (paddle::platform::is_custom_place(place)) { auto device = phi::DeviceManager::GetDeviceWithPlace(place); auto dev_type = paddle::platform::PlaceHelper::GetDeviceType(place); auto p1 = device->MemoryAllocate(phi::DeviceManager::GetMinChunkSize(place)); EXPECT_NE(p1, nullptr); phi::DeviceManager::SetDevice(place); auto dev_id = phi::DeviceManager::GetDevice(dev_type); EXPECT_EQ(dev_id, place.GetDeviceId()); } } void TestTensorMutableData(const paddle::platform::Place& place) { std::cout << "TestTensorInitialization on " << place << std::endl; paddle::framework::Tensor src_tensor; float* p1 = nullptr; float* p2 = nullptr; // initialization p1 = src_tensor.mutable_data(phi::make_ddim({1, 2, 3}), place); auto p1_holder = src_tensor.Holder(); EXPECT_NE(p1, nullptr); // set src_tensor a new dim with large size // momery is supposed to be re-allocated p2 = src_tensor.mutable_data(phi::make_ddim({3, 1024}), place); auto p2_holder = src_tensor.Holder(); EXPECT_NE(p2, nullptr); EXPECT_NE(p1_holder.get(), p2_holder.get()); // set src_tensor a new dim with same size // momery block is supposed to be unchanged p1 = src_tensor.mutable_data(phi::make_ddim({2, 2, 3}), place); EXPECT_EQ(p1, p2); // set src_tensor a new dim with smaller size // momery block is supposed to be unchanged p2 = src_tensor.mutable_data(phi::make_ddim({2, 2}), place); EXPECT_EQ(p1, p2); } void TestTensorShareDataWith(const paddle::platform::Place& place) { std::cout << "TestTensorShareDataWith on " << place << std::endl; paddle::framework::Tensor src_tensor; paddle::framework::Tensor dst_tensor; src_tensor.mutable_data(phi::make_ddim({2, 3, 4}), place); dst_tensor.ShareDataWith(src_tensor); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); } void TestTensorUtils(const paddle::platform::Place& place) { std::cout << "TestTensorUtils on " << place << std::endl; if (paddle::platform::is_custom_place(place) == false) { return; } paddle::framework::Tensor src_tensor; paddle::framework::Tensor gpu_tensor; paddle::framework::Tensor dst_tensor; int* src_ptr = src_tensor.mutable_data(phi::make_ddim({3, 3}), paddle::platform::CPUPlace()); int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; memcpy(src_ptr, arr, 9 * sizeof(int)); // CPU Tensor to GPU Tensor paddle::platform::CustomDeviceContext gpu_ctx(place); paddle::framework::TensorCopy(src_tensor, place, gpu_ctx, &gpu_tensor); #if 0 // GPU Tensor to CPU Tensor auto cpu_place = new paddle::platform::CPUPlace(); paddle::framework::TensorCopy(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); // Sync before Compare Tensors gpu_ctx.Wait(); const int* dst_ptr = dst_tensor.data(); EXPECT_NE(src_ptr, dst_ptr); for (size_t i = 0; i < 9; ++i) { EXPECT_EQ(src_ptr[i], dst_ptr[i]); } // Copy the same tensor paddle::framework::TensorCopy(gpu_tensor, place, gpu_ctx, &gpu_tensor); gpu_ctx.Wait(); const int* dst_ptr_tmp = dst_tensor.data(); EXPECT_NE(src_ptr, dst_ptr_tmp); for (size_t i = 0; i < 9; ++i) { EXPECT_EQ(src_ptr[i], dst_ptr_tmp[i]); } paddle::framework::Tensor slice_tensor = src_tensor.Slice(1, 2); // CPU Slice Tensor to GPU Tensor paddle::framework::TensorCopy(slice_tensor, place, gpu_ctx, &gpu_tensor); // GPU Tensor to CPU Tensor paddle::framework::TensorCopy(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); // Sync before Compare Slice Tensors gpu_ctx.Wait(); const int* slice_ptr = slice_tensor.data(); dst_ptr = dst_tensor.data(); EXPECT_NE(dst_ptr, slice_ptr); for (size_t i = 0; i < 3; ++i) { EXPECT_EQ(dst_ptr[i], slice_ptr[i]); } EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout()); #endif } void TestCustomCCL(const paddle::platform::Place& place) { std::cout << "TestCustomCCL on " << place << std::endl; if (paddle::platform::is_custom_place(place) == false) { return; } std::string dev_type = place.GetDeviceType(); phi::ccl::CCLComm comm; phi::stream::Stream stream(place, nullptr); phi::ccl::CCLRootId root_id; phi::DeviceManager::CCLDestroyComm(dev_type, nullptr); phi::DeviceManager::CCLGetUniqueId(dev_type, &root_id); phi::DeviceManager::CCLCommInitRank(dev_type, 0, &root_id, 0, nullptr); phi::DeviceManager::CCLBroadcast(dev_type, nullptr, 0, phi::ccl::CCLDataType::CCL_DATA_TYPE_FP32, 0, comm, stream); phi::DeviceManager::CCLAllReduce(dev_type, nullptr, nullptr, 0, phi::ccl::CCLDataType::CCL_DATA_TYPE_FP32, phi::ccl::CCLReduceOp::SUM, comm, stream); phi::DeviceManager::CCLReduce(dev_type, nullptr, nullptr, 0, phi::ccl::CCLDataType::CCL_DATA_TYPE_FP32, phi::ccl::CCLReduceOp::SUM, comm, stream); phi::DeviceManager::CCLAllGather(dev_type, nullptr, nullptr, 0, phi::ccl::CCLDataType::CCL_DATA_TYPE_FP32, comm, stream); phi::DeviceManager::CCLReduceScatter( dev_type, nullptr, nullptr, 0, phi::ccl::CCLDataType::CCL_DATA_TYPE_FP32, phi::ccl::CCLReduceOp::SUM, comm, stream); phi::DeviceManager::CCLGroupStart(dev_type); phi::DeviceManager::CCLGroupEnd(dev_type); phi::DeviceManager::CCLSend(dev_type, nullptr, 0, phi::ccl::CCLDataType::CCL_DATA_TYPE_FP32, 0, comm, stream); phi::DeviceManager::CCLRecv(dev_type, nullptr, 0, phi::ccl::CCLDataType::CCL_DATA_TYPE_FP32, 0, comm, stream); } void TestBlasAPI(const paddle::platform::Place& place) { std::cout << "TestBlasAPI on " << place << std::endl; if (paddle::platform::is_custom_place(place) == false) { return; } auto device = phi::DeviceManager::GetDeviceWithPlace(place); phi::stream::Stream stream(place, nullptr); device->BlasAXPBY(stream, 0, 1., nullptr, 1., nullptr); paddle::framework::Variable var1; paddle::framework::Variable var2; std::vector src_data(10, 1.0); std::vector dst_data(10, 0.0); std::vector result; paddle::platform::CPUPlace src_place; for (unsigned int i = 0; i < 10; i++) { result.emplace_back(src_data[i] + dst_data[i]); } std::vector dims = {2, 5}; auto* src = var1.GetMutable(); auto* dst = var2.GetMutable(); src->Resize(phi::make_ddim(dims)); dst->Resize(phi::make_ddim(dims)); auto* src_mutable = src->mutable_data(place); auto* dst_mutable = dst->mutable_data(place); paddle::memory::Copy(place, src_mutable, src_place, src_data.data(), sizeof(float) * src_data.size()); paddle::memory::Copy(place, dst_mutable, src_place, dst_data.data(), sizeof(float) * dst_data.size()); paddle::imperative::TensorAdd(var1, &var2); paddle::framework::LoDTensor rlt; paddle::platform::CPUPlace rlt_place; paddle::framework::TensorCopySync(*dst, rlt_place, &rlt); } TEST(CustomDevice, Tensor) { InitDevice(); auto dev_types = phi::DeviceManager::GetAllDeviceTypes(); for (const auto& dev_type : dev_types) { std::cout << "Test on " << dev_type << std::endl; EXPECT_GT(static_cast(phi::DeviceManager::GetDeviceCount(dev_type)), 0); auto place = paddle::platform::PlaceHelper::CreatePlace(dev_type); TestDeviceInterface(place); TestTensorMutableData(place); TestTensorShareDataWith(place); TestTensorUtils(place); TestCustomCCL(place); TestBlasAPI(place); } } int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); }