提交 db128c45 编写于 作者: Y Yi Wang

Pass cpu_allocator_test

上级 67481ca8
...@@ -71,7 +71,7 @@ if(ANDROID) ...@@ -71,7 +71,7 @@ if(ANDROID)
"Disable RDMA when cross-compiling for Android" FORCE) "Disable RDMA when cross-compiling for Android" FORCE)
endif(ANDROID) endif(ANDROID)
set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
"A path setting third party libraries download & build directories.") "A path setting third party libraries download & build directories.")
if (WITH_C_API AND WITH_PYTHON) if (WITH_C_API AND WITH_PYTHON)
......
...@@ -78,6 +78,10 @@ ...@@ -78,6 +78,10 @@
# #
# cc_test(example_test SRCS example_test.cc DEPS example glog gflags) # cc_test(example_test SRCS example_test.cc DEPS example glog gflags)
if(WITH_GPU)
add_definitions(-DPADDLE_WITH_GPU)
endif()
if(NOT APPLE) if(NOT APPLE)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
link_libraries(${CMAKE_THREAD_LIBS_INIT}) link_libraries(${CMAKE_THREAD_LIBS_INIT})
......
cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) if(${WITH_GPU})
nv_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # nv_test links CUDA, but
else(${WITH_GPU})
cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) # cc_test doesn't.
endif(${WITH_GPU})
...@@ -17,6 +17,11 @@ limitations under the License. */ ...@@ -17,6 +17,11 @@ limitations under the License. */
#include <malloc.h> // for malloc and free #include <malloc.h> // for malloc and free
#include <stddef.h> // for size_t #include <stddef.h> // for size_t
#ifdef PADDLE_WITH_GPU
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif // PADDLE_WITH_GPU
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace detail { namespace detail {
...@@ -40,9 +45,9 @@ public: ...@@ -40,9 +45,9 @@ public:
void Free(void* p) { free(p); } void Free(void* p) { free(p); }
}; };
// If CMake macro WITH_GPU is OFF, C++ compiler won't generate the // If CMake macro PADDLE_WITH_GPU is OFF, C++ compiler won't generate the
// following specialization that depends on the CUDA library. // following specialization that depends on the CUDA library.
#ifdef WITH_GPU #ifdef PADDLE_WITH_GPU
template <> template <>
class CPUAllocator<true> { class CPUAllocator<true> {
public: public:
...@@ -51,12 +56,12 @@ public: ...@@ -51,12 +56,12 @@ public:
if (cudaMallocHost(&p, size) != cudaSuccess) { if (cudaMallocHost(&p, size) != cudaSuccess) {
return NULL; return NULL;
} }
return *p; return p;
} }
void Free(void* p) { cudaFreeHost(p); } void Free(void* p) { cudaFreeHost(p); }
}; };
#endif // WITH_GPU #endif // PADDLE_WITH_GPU
} // namespace detail } // namespace detail
} // namespace memory } // namespace memory
......
...@@ -22,11 +22,17 @@ TEST(CPUAllocator, NonStaging) { ...@@ -22,11 +22,17 @@ TEST(CPUAllocator, NonStaging) {
a.Free(p); a.Free(p);
} }
#ifdef WITH_GPU #ifdef PADDLE_WITH_GPU
TEST(CPUAllocator, Staging) { TEST(CPUAllocator, Staging) {
paddle::memory::detail::CPUAllocator<true> a; paddle::memory::detail::CPUAllocator<true> a;
int devices;
if (cudaGetDeviceCount(&devices) == cudaSuccess && devices > 0) {
void* p = a.Alloc(4096); void* p = a.Alloc(4096);
EXPECT_NE(p, nullptr); EXPECT_NE(p, nullptr);
a.Free(p); a.Free(p);
} else {
EXPECT_EQ(a.Alloc(4096), nullptr);
}
} }
#endif // WITH_GPU #endif // PADDLE_WITH_GPU
...@@ -19,7 +19,11 @@ namespace memory { ...@@ -19,7 +19,11 @@ namespace memory {
template <> template <>
void* Alloc<CPUPlace>(CPUPlace, size_t size) { void* Alloc<CPUPlace>(CPUPlace, size_t size) {
return GetCPUBuddyAllocator()->Alloc(size); return GetCPUBuddyAllocator(false /*non-staging*/)->Alloc(size);
}
void* AllocStaging(CPUPlace, size_t size) {
return GetCPUBuddyAllocator(true /*staging*/)->Alloc(size);
} }
template <> template <>
...@@ -29,9 +33,14 @@ void* Alloc<GPUPlace>(GPUPlace pl, size_t size) { ...@@ -29,9 +33,14 @@ void* Alloc<GPUPlace>(GPUPlace pl, size_t size) {
template <> template <>
void Free<CPUPlace>(CPUPlace, void* p) { void Free<CPUPlace>(CPUPlace, void* p) {
return GetCPUBuddyAllocator()->Free(p); return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p);
}
void FreeStaging(CPUPlace, void* p) {
return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p);
} }
#ifdef PADDLE_WITH_GPU
template <> template <>
void* Alloc<GPUPlace>(GPUPlace pl, void* p) { void* Alloc<GPUPlace>(GPUPlace pl, void* p) {
return GetGPUBuddyAllocator(pl.device)->Free(p); return GetGPUBuddyAllocator(pl.device)->Free(p);
...@@ -46,6 +55,7 @@ template <> ...@@ -46,6 +55,7 @@ template <>
size_t Alloc<GPUPlace>(GPUPlace pl) { size_t Alloc<GPUPlace>(GPUPlace pl) {
return GetGPUBuddyAllocator(pl.device)->Used(); return GetGPUBuddyAllocator(pl.device)->Used();
} }
#endif // PADDLE_WITH_GPU
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -19,9 +19,19 @@ limitations under the License. */ ...@@ -19,9 +19,19 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace memory { namespace memory {
typename<typename paddle::framework::Place> void* Alloc(Place, size_t); template <typename paddle::framework::Place>
typename<typename paddle::framework::Place> void Free(Place, void*); void* Alloc(Place, size_t);
typename<typename paddle::framework::Place> size_t Used(Place); template <typename paddle::framework::Place>
void Free(Place, void*);
template <typename paddle::framework::Place>
size_t Used(Place);
// Staging memory means "pinned" host memory that can be mapped into
// the CUDA memory space and accessed by the device rapidly. Don't
// allocate too much staging memory; otherwise system performance will
// degrade because the OS cannot find enough swap memory space.
void* AllocStaging(CPUPlace, size_t);
void* FreeStaging(CPUPlace, size_t);
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册