ENH: Polish cpu info interface

e6c14f7e · liaogang · 68ab1ef4 · e6c14f7e · e6c14f7e · e6c14f7e
5 changed file
--- a/paddle/platform/CMakeLists.txt
+++ b/paddle/platform/CMakeLists.txt
 cc_library(cpu_info SRCS cpu_info.cc)
-cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info gflags)
+cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info gflags glog)

 nv_library(gpu_info SRCS gpu_info.cc)
-nv_test(cuda_test SRCS cuda_test.cu)

 cc_library(place SRCS place.cc)
 cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
--- a/paddle/platform/cpu_info.cc
+++ b/paddle/platform/cpu_info.cc
@@ -47,9 +47,21 @@ inline size_t CpuTotalPhysicalMemory() {
 #endif
 }

-size_t CpuTotalMemory() {
+size_t CpuMaxAllocSize() {
+  // For distributed systems, it requires configuring and limiting
+  // the fraction of memory to use.
  return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory();
 }

+size_t CpuMinChunkSize() {
+  // Allow to allocate the minimum chunk size is 256 bytes.
+  return 1 << 8;
+}
+
+size_t CpuMaxChunkSize() {
+  // Allow to allocate the maximum chunk size is roughly 3% of CPU memory.
+  return CpuMaxAllocSize() / 32;
+}
+
 }  // namespace platform
 }  // namespace paddle
--- a/paddle/platform/cpu_info.h
+++ b/paddle/platform/cpu_info.h
@@ -19,8 +19,14 @@ limitations under the License. */
 namespace paddle {
 namespace platform {

-//! Get the total memory on host machine.
-size_t CpuTotalMemory();
+//! Get the maximum allocation size for a machine.
+size_t CpuMaxAllocSize();
+
+//! Get the minimum chunk size for buddy allocator.
+size_t CpuMinChunkSize();
+
+//! Get the maximum chunk size for buddy allocator.
+size_t CpuMaxChunkSize();

 }  // namespace platform
 }  // namespace paddle
--- a/paddle/platform/cpu_info_test.cc
+++ b/paddle/platform/cpu_info_test.cc
 #include "paddle/platform/cpu_info.h"
+#include "paddle/string/printf.h"

 #include <ostream>
 #include <sstream>

 #include "gflags/gflags.h"
+#include "glog/logging.h"
 #include "gtest/gtest.h"

 DECLARE_double(fraction_of_cpu_memory_to_use);

 TEST(CpuMemoryUsage, Print) {
  std::stringstream ss;
-  size_t mem_size = paddle::platform::CpuTotalMemory() / 1024 / 1024 / 1024;
-  ss << std::to_string(
-            static_cast<size_t>(FLAGS_fraction_of_cpu_memory_to_use * 100))
-     << "% of CPU Memory Usage: " << mem_size << " GB";
-  std::cout << ss.str();
+  size_t memory_size = paddle::platform::CpuMaxAllocSize() / 1024 / 1024 / 1024;
+  float use_percent = FLAGS_fraction_of_cpu_memory_to_use * 100;
+
+  std::cout << paddle::string::Sprintf("\n%.2f %% of CPU Memory Usage: %d GB\n",
+                                       use_percent, memory_size)
+            << std::endl;
 }
--- a/paddle/platform/cuda_test.cu
+++ b/paddle/platform/cuda_test.cu
-#include <cuda_runtime.h>
-#include <stdio.h>
-#include "gtest/gtest.h"
-
-#define CHECK_ERR(x)                 \
-  if (x != cudaSuccess) {            \
-    fprintf(stderr,                  \
-            "%s in %s at line %d\n", \
-            cudaGetErrorString(err), \
-            __FILE__,                \
-            __LINE__);               \
-    exit(-1);                        \
-  }
-
-__global__ void vecAdd(float *d_A, float *d_B, float *d_C, int n) {
-  int i = blockDim.x * blockIdx.x + threadIdx.x;
-  if (i < n) {
-    d_C[i] = d_A[i] + d_B[i];
-  }
-}
-
-TEST(Cuda, Equality) {
-  int n = 10;
-  // Memory allocation for h_A, h_B and h_C (in the host)
-  float h_A[10] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0};
-  float h_B[10] = {0.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
-  float h_C[10];
-  float *d_A, *d_B, *d_C;
-  cudaError_t err;
-  // Memory allocation for d_A, d_B and d_C (in the device)
-  err = cudaMalloc((void **)&d_A, sizeof(float) * n);
-  CHECK_ERR(err);
-
-  err = cudaMalloc((void **)&d_B, sizeof(float) * n);
-  CHECK_ERR(err);
-
-  err = cudaMalloc((void **)&d_C, sizeof(float) * n);
-  CHECK_ERR(err);
-
-  // Copying memory to device
-  err = cudaMemcpy(d_A, h_A, sizeof(float) * n, cudaMemcpyHostToDevice);
-  CHECK_ERR(err);
-
-  err = cudaMemcpy(d_B, h_B, sizeof(float) * n, cudaMemcpyHostToDevice);
-  CHECK_ERR(err);
-
-  // Calling the kernel
-  vecAdd<<<ceil(n / 256.0), 256>>>(d_A, d_B, d_C, n);
-
-  // Copying results back to host
-  err = cudaMemcpy(h_C, d_C, sizeof(float) * n, cudaMemcpyDeviceToHost);
-  CHECK_ERR(err);
-
-  EXPECT_EQ(h_C[0], 1.0);
-  for (int i = 1; i < n - 1; ++i) {
-    EXPECT_EQ(h_C[i], 11.0);
-  }
-  EXPECT_EQ(h_C[9], 1.0);
-}