diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.cc b/paddle/fluid/operators/detail/sendrecvop_utils.cc index 16c612c45a37dd2ffd17f8d5f5946df30e9b3fe6..69fcffe9bc34006aef2e5a39227cf6d947e4615f 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.cc +++ b/paddle/fluid/operators/detail/sendrecvop_utils.cc @@ -82,7 +82,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, platform::CPUPlace cpu; auto& gpu_dev_ctx = static_cast(ctx); - auto copy_size = tensor.memory_size(); + auto copy_size = tensor.numel() * framework::SizeOfType(tensor.type()); payload = memory::Alloc(cpu, copy_size); memory::Copy(cpu, payload, @@ -99,7 +99,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, } else { payload = tensor.data(); } - payload_size = tensor.memory_size(); + payload_size = tensor.numel() * framework::SizeOfType(tensor.type()); e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber, payload_size); } break; case framework::proto::VarType_Type_SELECTED_ROWS: { @@ -118,7 +118,8 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, platform::CPUPlace cpu; auto& gpu_dev_ctx = static_cast(ctx); - auto copy_size = tensor->memory_size(); + auto copy_size = + tensor->numel() * framework::SizeOfType(tensor->type()); payload = memory::Alloc(cpu, copy_size); memory::Copy(cpu, payload, boost::get(tensor->place()), @@ -133,7 +134,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, } else { payload = slr->mutable_value()->data(); } - payload_size = tensor->memory_size(); + payload_size = tensor->numel() * framework::SizeOfType(tensor->type()); e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber, payload_size); } break; default: diff --git a/paddle/fluid/operators/split_byref_op.h b/paddle/fluid/operators/split_byref_op.h index 9b54c7c74acb512b9493d603e83380b9a92ac91b..a3aad68ea736e223d3917607cca17f5cccfef630 100644 --- a/paddle/fluid/operators/split_byref_op.h +++ b/paddle/fluid/operators/split_byref_op.h @@ -32,7 +32,8 @@ class SplitByrefOpKernel : public framework::OpKernel { for (size_t i = 0; i < outs.size(); ++i) { // NOTE: no need to call mutable_data here to allocate memory. auto* out = outs[i]; - *out = std::move(in->Slice(row_offset, out->dims()[0])); + VLOG(3) << "spliting by ref: " << row_offset << " " << out->dims()[0]; + *out = std::move(in->Slice(row_offset, row_offset + out->dims()[0])); row_offset += out->dims()[0]; } }