Use posix_memalign to allocate aligned memory, since some SIMD instructions...

Use posix_memalign to allocate aligned memory, since some SIMD instructions require the alignment of memory accesses.

Use posix_memalign to allocate aligned memory, since some SIMD instructions...
Use posix_memalign to allocate aligned memory, since some SIMD instructions require the alignment of memory accesses.
a328ae3b · dangqingqing · ef2f0ec9 · a328ae3b · a328ae3b · a328ae3b
3 changed file
--- a/paddle/memory/detail/system_allocator.cc
+++ b/paddle/memory/detail/system_allocator.cc
@@ -41,7 +41,16 @@ void* CPUAllocator::Alloc(size_t& index, size_t size) {

  index = 0;  // unlock memory

-  void* p = malloc(size);
+  void* p;
+
+#ifdef PADDLE_USE_MKLDNN
+  // refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp
+  // memory alignment
+  PADDLE_ENFORCE_EQ(posix_memalign(&p, 4096ul, size), 0);
+#else
+  PADDLE_ENFORCE_EQ(posix_memalign(&p, 32ul, size), 0);
+#endif
+  PADDLE_ENFORCE(p, "Fail to allocate CPU memory: size = %d .", size);

  if (p != nullptr) {
    if (FLAGS_use_pinned_memory) {

--- a/paddle/operators/reshape_op.cc
+++ b/paddle/operators/reshape_op.cc
@@ -36,7 +36,7 @@ class ReshapeOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE(shape.size() > 0, "Attr(shape) shouldn't be empty.");
    auto x_dims = ctx->GetInputDim("X");
    // TODO(qiao) change batch_size
-    for (int i = 1; i < shape.size(); ++i) {
+    for (size_t i = 1; i < shape.size(); ++i) {
      PADDLE_ENFORCE(shape[i] > 0,
                     "Each dimension of shape "
                     "must be positiv except the first.");

--- a/paddle/operators/save_load_op_test.cc
+++ b/paddle/operators/save_load_op_test.cc
@@ -34,7 +34,7 @@ TEST(SaveLoadOp, CPU) {

  tensor->set_lod(expect_lod);
  int* expect = tensor->mutable_data<int>(place);
-  for (size_t i = 0; i < paddle::framework::product(tensor->dims()); ++i) {
+  for (int64_t i = 0; i < tensor->numel(); ++i) {
    expect[i] = static_cast<int>(i);
  }
  paddle::framework::AttributeMap attrs;
@@ -50,7 +50,7 @@ TEST(SaveLoadOp, CPU) {
      "load", {}, {{"Out", {"out_var"}}}, attrs);
  load_op->Run(scope, ctx);
  int* actual = target->data<int>();
-  for (size_t i = 0; i < paddle::framework::product(tensor->dims()); ++i) {
+  for (int64_t i = 0; i < tensor->numel(); ++i) {
    EXPECT_EQ(expect[i], actual[i]);
  }
  auto& actual_lod = target->lod();