未验证 提交 3d567864 编写于 作者: Q qingqing01 提交者: GitHub

Merge pull request #5247 from qingqing01/memory_alignment

Allocate aligned memory by posix_memalign.
...@@ -41,7 +41,16 @@ void* CPUAllocator::Alloc(size_t& index, size_t size) { ...@@ -41,7 +41,16 @@ void* CPUAllocator::Alloc(size_t& index, size_t size) {
index = 0; // unlock memory index = 0; // unlock memory
void* p = malloc(size); void* p;
#ifdef PADDLE_USE_MKLDNN
// refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp
// memory alignment
PADDLE_ENFORCE_EQ(posix_memalign(&p, 4096ul, size), 0);
#else
PADDLE_ENFORCE_EQ(posix_memalign(&p, 32ul, size), 0);
#endif
PADDLE_ENFORCE(p, "Fail to allocate CPU memory: size = %d .", size);
if (p != nullptr) { if (p != nullptr) {
if (FLAGS_use_pinned_memory) { if (FLAGS_use_pinned_memory) {
......
...@@ -185,7 +185,7 @@ TEST_F(NCCLTester, ncclAllReduceOp) { ...@@ -185,7 +185,7 @@ TEST_F(NCCLTester, ncclAllReduceOp) {
recv_tensor.numel() * sizeof(float), recv_tensor.numel() * sizeof(float),
static_cast<p::CUDADeviceContext *>(dev_ctxs[i])->stream()); static_cast<p::CUDADeviceContext *>(dev_ctxs[i])->stream());
for (size_t j = 0; j < f::product(kDims); ++j) { for (int64_t j = 0; j < f::product(kDims); ++j) {
ASSERT_NEAR(ct[j], result, 1e-5); ASSERT_NEAR(ct[j], result, 1e-5);
} }
} }
...@@ -234,7 +234,7 @@ TEST_F(NCCLTester, ncclReduceOp) { ...@@ -234,7 +234,7 @@ TEST_F(NCCLTester, ncclReduceOp) {
recv_tensor.numel() * sizeof(float), recv_tensor.numel() * sizeof(float),
static_cast<p::CUDADeviceContext *>(dev_ctxs[kRoot])->stream()); static_cast<p::CUDADeviceContext *>(dev_ctxs[kRoot])->stream());
for (int j = 0; j < f::product(kDims); ++j) { for (int64_t j = 0; j < f::product(kDims); ++j) {
ASSERT_NEAR(ct[j], result, 1e-5); ASSERT_NEAR(ct[j], result, 1e-5);
} }
} }
...@@ -282,7 +282,7 @@ TEST_F(NCCLTester, ncclBcastOp) { ...@@ -282,7 +282,7 @@ TEST_F(NCCLTester, ncclBcastOp) {
recv_tensor.numel() * sizeof(float), recv_tensor.numel() * sizeof(float),
static_cast<p::CUDADeviceContext *>(dev_ctxs[idx])->stream()); static_cast<p::CUDADeviceContext *>(dev_ctxs[idx])->stream());
for (size_t j = 0; j < f::product(kDims); ++j) { for (int64_t j = 0; j < f::product(kDims); ++j) {
ASSERT_NEAR(ct[j], result, 1e-5); ASSERT_NEAR(ct[j], result, 1e-5);
} }
} }
......
...@@ -36,7 +36,7 @@ class ReshapeOp : public framework::OperatorWithKernel { ...@@ -36,7 +36,7 @@ class ReshapeOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(shape.size() > 0, "Attr(shape) shouldn't be empty."); PADDLE_ENFORCE(shape.size() > 0, "Attr(shape) shouldn't be empty.");
auto x_dims = ctx->GetInputDim("X"); auto x_dims = ctx->GetInputDim("X");
// TODO(qiao) change batch_size // TODO(qiao) change batch_size
for (int i = 1; i < shape.size(); ++i) { for (size_t i = 1; i < shape.size(); ++i) {
PADDLE_ENFORCE(shape[i] > 0, PADDLE_ENFORCE(shape[i] > 0,
"Each dimension of shape " "Each dimension of shape "
"must be positiv except the first."); "must be positiv except the first.");
......
...@@ -34,7 +34,7 @@ TEST(SaveLoadOp, CPU) { ...@@ -34,7 +34,7 @@ TEST(SaveLoadOp, CPU) {
tensor->set_lod(expect_lod); tensor->set_lod(expect_lod);
int* expect = tensor->mutable_data<int>(place); int* expect = tensor->mutable_data<int>(place);
for (size_t i = 0; i < paddle::framework::product(tensor->dims()); ++i) { for (int64_t i = 0; i < tensor->numel(); ++i) {
expect[i] = static_cast<int>(i); expect[i] = static_cast<int>(i);
} }
paddle::framework::AttributeMap attrs; paddle::framework::AttributeMap attrs;
...@@ -50,7 +50,7 @@ TEST(SaveLoadOp, CPU) { ...@@ -50,7 +50,7 @@ TEST(SaveLoadOp, CPU) {
"load", {}, {{"Out", {"out_var"}}}, attrs); "load", {}, {{"Out", {"out_var"}}}, attrs);
load_op->Run(scope, ctx); load_op->Run(scope, ctx);
int* actual = target->data<int>(); int* actual = target->data<int>();
for (size_t i = 0; i < paddle::framework::product(tensor->dims()); ++i) { for (int64_t i = 0; i < tensor->numel(); ++i) {
EXPECT_EQ(expect[i], actual[i]); EXPECT_EQ(expect[i], actual[i]);
} }
auto& actual_lod = target->lod(); auto& actual_lod = target->lod();
...@@ -60,4 +60,4 @@ TEST(SaveLoadOp, CPU) { ...@@ -60,4 +60,4 @@ TEST(SaveLoadOp, CPU) {
EXPECT_EQ(expect_lod[i][j], actual_lod[i][j]); EXPECT_EQ(expect_lod[i][j], actual_lod[i][j]);
} }
} }
} }
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册