提交 e6c14f7e 编写于 作者: L liaogang

ENH: Polish cpu info interface

上级 68ab1ef4
cc_library(cpu_info SRCS cpu_info.cc)
cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info gflags)
cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info gflags glog)
nv_library(gpu_info SRCS gpu_info.cc)
nv_test(cuda_test SRCS cuda_test.cu)
cc_library(place SRCS place.cc)
cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
......@@ -47,9 +47,21 @@ inline size_t CpuTotalPhysicalMemory() {
#endif
}
size_t CpuTotalMemory() {
size_t CpuMaxAllocSize() {
// For distributed systems, it requires configuring and limiting
// the fraction of memory to use.
return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory();
}
size_t CpuMinChunkSize() {
// Allow to allocate the minimum chunk size is 256 bytes.
return 1 << 8;
}
size_t CpuMaxChunkSize() {
// Allow to allocate the maximum chunk size is roughly 3% of CPU memory.
return CpuMaxAllocSize() / 32;
}
} // namespace platform
} // namespace paddle
......@@ -19,8 +19,14 @@ limitations under the License. */
namespace paddle {
namespace platform {
//! Get the total memory on host machine.
size_t CpuTotalMemory();
//! Get the maximum allocation size for a machine.
size_t CpuMaxAllocSize();
//! Get the minimum chunk size for buddy allocator.
size_t CpuMinChunkSize();
//! Get the maximum chunk size for buddy allocator.
size_t CpuMaxChunkSize();
} // namespace platform
} // namespace paddle
#include "paddle/platform/cpu_info.h"
#include "paddle/string/printf.h"
#include <ostream>
#include <sstream>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "gtest/gtest.h"
DECLARE_double(fraction_of_cpu_memory_to_use);
TEST(CpuMemoryUsage, Print) {
std::stringstream ss;
size_t mem_size = paddle::platform::CpuTotalMemory() / 1024 / 1024 / 1024;
ss << std::to_string(
static_cast<size_t>(FLAGS_fraction_of_cpu_memory_to_use * 100))
<< "% of CPU Memory Usage: " << mem_size << " GB";
std::cout << ss.str();
size_t memory_size = paddle::platform::CpuMaxAllocSize() / 1024 / 1024 / 1024;
float use_percent = FLAGS_fraction_of_cpu_memory_to_use * 100;
std::cout << paddle::string::Sprintf("\n%.2f %% of CPU Memory Usage: %d GB\n",
use_percent, memory_size)
<< std::endl;
}
#include <cuda_runtime.h>
#include <stdio.h>
#include "gtest/gtest.h"
#define CHECK_ERR(x) \
if (x != cudaSuccess) { \
fprintf(stderr, \
"%s in %s at line %d\n", \
cudaGetErrorString(err), \
__FILE__, \
__LINE__); \
exit(-1); \
}
__global__ void vecAdd(float *d_A, float *d_B, float *d_C, int n) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < n) {
d_C[i] = d_A[i] + d_B[i];
}
}
TEST(Cuda, Equality) {
int n = 10;
// Memory allocation for h_A, h_B and h_C (in the host)
float h_A[10] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0};
float h_B[10] = {0.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
float h_C[10];
float *d_A, *d_B, *d_C;
cudaError_t err;
// Memory allocation for d_A, d_B and d_C (in the device)
err = cudaMalloc((void **)&d_A, sizeof(float) * n);
CHECK_ERR(err);
err = cudaMalloc((void **)&d_B, sizeof(float) * n);
CHECK_ERR(err);
err = cudaMalloc((void **)&d_C, sizeof(float) * n);
CHECK_ERR(err);
// Copying memory to device
err = cudaMemcpy(d_A, h_A, sizeof(float) * n, cudaMemcpyHostToDevice);
CHECK_ERR(err);
err = cudaMemcpy(d_B, h_B, sizeof(float) * n, cudaMemcpyHostToDevice);
CHECK_ERR(err);
// Calling the kernel
vecAdd<<<ceil(n / 256.0), 256>>>(d_A, d_B, d_C, n);
// Copying results back to host
err = cudaMemcpy(h_C, d_C, sizeof(float) * n, cudaMemcpyDeviceToHost);
CHECK_ERR(err);
EXPECT_EQ(h_C[0], 1.0);
for (int i = 1; i < n - 1; ++i) {
EXPECT_EQ(h_C[i], 11.0);
}
EXPECT_EQ(h_C[9], 1.0);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册