未验证 提交 e5fe8935 编写于 作者: Y Yancey 提交者: GitHub

send_recv variables (#7161)

* send_recv variable

* delete unused logs

* fix ci failed

* update

* resize tensor before tensor copy

* add selectedrows unit test

* check rows
上级 7508d52b
...@@ -217,9 +217,10 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor, ...@@ -217,9 +217,10 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor,
SerializeToStream(os, static_cast<Tensor>(tensor), dev_ctx); SerializeToStream(os, static_cast<Tensor>(tensor), dev_ctx);
} }
void DeserializeFromStream(std::istream &is, LoDTensor *tensor) { void DeserializeFromStream(std::istream &is, LoDTensor *tensor,
const platform::DeviceContext &dev_ctx) {
{ {
// the 1st field, unit32_t version for SelectedRows // the 1st field, unit32_t version for LoDTensor
uint32_t version; uint32_t version;
is.read(reinterpret_cast<char *>(&version), sizeof(version)); is.read(reinterpret_cast<char *>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
...@@ -240,7 +241,7 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor) { ...@@ -240,7 +241,7 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor) {
} }
} }
// the 3st filed, Tensor // the 3st filed, Tensor
DeserializeFromStream(is, static_cast<Tensor *>(tensor)); DeserializeFromStream(is, static_cast<Tensor *>(tensor), dev_ctx);
} }
} // namespace framework } // namespace framework
......
...@@ -208,7 +208,8 @@ void AppendLoD(LoD* lod, const LoD& lod_length); ...@@ -208,7 +208,8 @@ void AppendLoD(LoD* lod, const LoD& lod_length);
*/ */
void SerializeToStream(std::ostream& os, const LoDTensor& tensor, void SerializeToStream(std::ostream& os, const LoDTensor& tensor,
const platform::DeviceContext& dev_ctx); const platform::DeviceContext& dev_ctx);
void DeserializeFromStream(std::istream& is, LoDTensor* tensor); void DeserializeFromStream(std::istream& is, LoDTensor* tensor,
const platform::DeviceContext& dev_ctx);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -132,7 +132,7 @@ TEST_F(LoDTensorTester, SerializeAndDeserialize) { ...@@ -132,7 +132,7 @@ TEST_F(LoDTensorTester, SerializeAndDeserialize) {
std::ostringstream oss; std::ostringstream oss;
SerializeToStream(oss, lod_tensor_, cpu_ctx); SerializeToStream(oss, lod_tensor_, cpu_ctx);
std::istringstream iss(oss.str()); std::istringstream iss(oss.str());
DeserializeFromStream(iss, &dst_tensor); DeserializeFromStream(iss, &dst_tensor, cpu_ctx);
float* dst_ptr = dst_tensor.mutable_data<float>(platform::CPUPlace()); float* dst_ptr = dst_tensor.mutable_data<float>(platform::CPUPlace());
for (int i = 0; i < kLodTensorSize; ++i) { for (int i = 0; i < kLodTensorSize; ++i) {
EXPECT_EQ(dst_ptr[i], i); EXPECT_EQ(dst_ptr[i], i);
......
...@@ -37,8 +37,8 @@ void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows, ...@@ -37,8 +37,8 @@ void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows,
SerializeToStream(os, selected_rows.value(), dev_ctx); SerializeToStream(os, selected_rows.value(), dev_ctx);
} }
void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows) { void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows,
auto tensor = *selected_rows->mutable_value(); const platform::DeviceContext& dev_ctx) {
{ {
// the 1st field, unit32_t version for SelectedRows // the 1st field, unit32_t version for SelectedRows
uint32_t version; uint32_t version;
...@@ -62,7 +62,7 @@ void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows) { ...@@ -62,7 +62,7 @@ void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows) {
selected_rows->set_height(height); selected_rows->set_height(height);
} }
// the 4st field, tensor which contains the data // the 4st field, tensor which contains the data
DeserializeFromStream(is, &tensor); DeserializeFromStream(is, selected_rows->mutable_value(), dev_ctx);
} }
} // namespace framework } // namespace framework
......
...@@ -66,7 +66,8 @@ class SelectedRows { ...@@ -66,7 +66,8 @@ class SelectedRows {
*/ */
void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows, void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows,
const platform::DeviceContext& dev_ctx); const platform::DeviceContext& dev_ctx);
void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows); void DeserializeFromStream(std::istream& is, SelectedRows* selected_rows,
const platform::DeviceContext& dev_ctx);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -51,10 +51,12 @@ TEST_F(SelectedRowsTester, SerializeAndDeseralize) { ...@@ -51,10 +51,12 @@ TEST_F(SelectedRowsTester, SerializeAndDeseralize) {
SerializeToStream(oss, *selected_rows_, cpu_ctx); SerializeToStream(oss, *selected_rows_, cpu_ctx);
std::istringstream iss(oss.str()); std::istringstream iss(oss.str());
DeserializeFromStream(iss, &dst_tensor); DeserializeFromStream(iss, &dst_tensor, cpu_ctx);
ASSERT_EQ(selected_rows_->rows(), dst_tensor.rows()); ASSERT_EQ(selected_rows_->rows(), dst_tensor.rows());
ASSERT_EQ(selected_rows_->height(), dst_tensor.height()); ASSERT_EQ(selected_rows_->height(), dst_tensor.height());
ASSERT_EQ(selected_rows_->value().dims(), dst_tensor.value().dims());
ASSERT_EQ(selected_rows_->GetCompleteDims(), dst_tensor.GetCompleteDims());
} }
} // namespace framework } // namespace framework
......
...@@ -270,7 +270,23 @@ inline void SerializeToStream(std::ostream& os, const Tensor& tensor, ...@@ -270,7 +270,23 @@ inline void SerializeToStream(std::ostream& os, const Tensor& tensor,
} }
} }
inline void DeserializeFromStream(std::istream& is, Tensor* tensor) { struct DeserializedDataFunctor {
DeserializedDataFunctor(void** buf, Tensor* tensor,
const platform::Place& place)
: buf_(buf), tensor_(tensor), place_(place) {}
template <typename T>
void operator()() {
*buf_ = tensor_->mutable_data<T>(place_);
}
void** buf_;
Tensor* tensor_;
platform::Place place_;
};
inline void DeserializeFromStream(std::istream& is, Tensor* tensor,
const platform::DeviceContext& dev_ctx) {
uint32_t version; uint32_t version;
is.read(reinterpret_cast<char*>(&version), sizeof(version)); is.read(reinterpret_cast<char*>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
...@@ -289,27 +305,28 @@ inline void DeserializeFromStream(std::istream& is, Tensor* tensor) { ...@@ -289,27 +305,28 @@ inline void DeserializeFromStream(std::istream& is, Tensor* tensor) {
dims.reserve(static_cast<size_t>(desc.dims().size())); dims.reserve(static_cast<size_t>(desc.dims().size()));
std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims)); std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
tensor->Resize(framework::make_ddim(dims)); tensor->Resize(framework::make_ddim(dims));
void* buf; void* buf;
platform::Place cpu = platform::CPUPlace(); auto ctx = platform::CPUDeviceContext();
// TODO(Yancey1989): use VisiterDataType instead of DataType switch if (platform::is_gpu_place(dev_ctx.GetPlace())) {
switch (desc.data_type()) { #ifdef PADDLE_WITH_CUDA
case proto::FP32: Tensor cpu_tensor;
buf = tensor->mutable_data<float>(cpu); cpu_tensor.Resize(framework::make_ddim(dims));
break; framework::VisitDataType(
case proto::FP64: desc.data_type(),
buf = tensor->mutable_data<double>(cpu); DeserializedDataFunctor(&buf, &cpu_tensor, ctx.GetPlace()));
break; is.read(static_cast<char*>(buf), cpu_tensor.memory_size());
case proto::INT32: auto cpu_place = new platform::CPUPlace();
buf = tensor->mutable_data<int>(cpu); framework::CopyFrom(cpu_tensor, *cpu_place, dev_ctx, tensor);
break; delete cpu_place;
case proto::INT64: #else
buf = tensor->mutable_data<int64_t>(cpu); PADDLE_THROW("Unexpected branch");
break; #endif
default: } else {
PADDLE_THROW("DataType %d not supported", desc.data_type()); framework::VisitDataType(
desc.data_type(),
DeserializedDataFunctor(&buf, tensor, ctx.GetPlace()));
is.read(static_cast<char*>(buf), tensor->memory_size());
} }
is.read(static_cast<char*>(buf), tensor->memory_size());
} }
} }
......
...@@ -270,11 +270,12 @@ TEST(Tensor, SerializeAndDeserialize) { ...@@ -270,11 +270,12 @@ TEST(Tensor, SerializeAndDeserialize) {
SerializeToStream(oss, src_tensor, cpu_ctx); SerializeToStream(oss, src_tensor, cpu_ctx);
std::istringstream iss(oss.str()); std::istringstream iss(oss.str());
DeserializeFromStream(iss, &dst_tensor); DeserializeFromStream(iss, &dst_tensor, cpu_ctx);
int* dst_ptr = dst_tensor.mutable_data<int>(platform::CPUPlace()); int* dst_ptr = dst_tensor.mutable_data<int>(platform::CPUPlace());
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
ASSERT_EQ(dst_ptr[i], array[i]); ASSERT_EQ(dst_ptr[i], array[i]);
} }
ASSERT_EQ(dst_tensor.dims(), src_tensor.dims());
delete place; delete place;
} }
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
...@@ -292,13 +293,12 @@ TEST(Tensor, SerializeAndDeserialize) { ...@@ -292,13 +293,12 @@ TEST(Tensor, SerializeAndDeserialize) {
SerializeToStream(oss, gpu_tensor, gpu_ctx); SerializeToStream(oss, gpu_tensor, gpu_ctx);
std::istringstream iss(oss.str()); std::istringstream iss(oss.str());
DeserializeFromStream(iss, &dst_tensor); DeserializeFromStream(iss, &dst_tensor, gpu_ctx);
int* dst_ptr = dst_tensor.mutable_data<int>(platform::CPUPlace()); int* dst_ptr = dst_tensor.mutable_data<int>(platform::CPUPlace());
for (int i = 0; i < 6; ++i) { for (int i = 0; i < 6; ++i) {
ASSERT_EQ(dst_ptr[i], array[i]); ASSERT_EQ(dst_ptr[i], array[i]);
} }
delete gpu_place; delete gpu_place;
} }
#endif #endif
......
...@@ -17,6 +17,8 @@ limitations under the License. */ ...@@ -17,6 +17,8 @@ limitations under the License. */
#include "paddle/framework/lod_rank_table.h" #include "paddle/framework/lod_rank_table.h"
#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor.h"
#include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/lod_tensor_array.h"
#include "paddle/framework/selected_rows.h"
#include "paddle/framework/variable.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -35,7 +37,7 @@ inline proto::VarDesc::VarType ToVarType(std::type_index type) { ...@@ -35,7 +37,7 @@ inline proto::VarDesc::VarType ToVarType(std::type_index type) {
} }
template <typename Visitor> template <typename Visitor>
inline void VisitVarType(const Variable& var, Visitor visitor) { inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
switch (ToVarType(var.Type())) { switch (ToVarType(var.Type())) {
case proto::VarDesc_VarType_LOD_TENSOR: case proto::VarDesc_VarType_LOD_TENSOR:
visitor(var.Get<framework::LoDTensor>()); visitor(var.Get<framework::LoDTensor>());
......
...@@ -21,14 +21,9 @@ namespace detail { ...@@ -21,14 +21,9 @@ namespace detail {
Status SendRecvServerImpl::SendVariable(ServerContext *context, Status SendRecvServerImpl::SendVariable(ServerContext *context,
const VariableMessage *in_var, const VariableMessage *in_var,
VoidMessage *out_var) { VoidMessage *out_var) {
// TODO(typhoonzero): support different variable types. MessageWithName msg_with_name =
std::istringstream iss(in_var->serialized()); std::make_pair(in_var->varname(), std::move(*in_var));
framework::LoDTensor t; var_recv_queue_.Push(std::move(msg_with_name));
framework::DeserializeFromStream(iss, &t);
TensorWithName tensor_with_name =
std::make_pair(in_var->varname(), std::move(t));
var_recv_queue_.Push(std::move(tensor_with_name));
return Status::OK; return Status::OK;
} }
...@@ -37,14 +32,8 @@ Status SendRecvServerImpl::GetVariable(ServerContext *context, ...@@ -37,14 +32,8 @@ Status SendRecvServerImpl::GetVariable(ServerContext *context,
VariableMessage *out_var) { VariableMessage *out_var) {
std::string get_var_name = in_var->varname(); std::string get_var_name = in_var->varname();
auto *var = scope_->FindVar(get_var_name); auto *var = scope_->FindVar(get_var_name);
auto tensor = var->Get<framework::LoDTensor>();
std::ostringstream oss;
framework::SerializeToStream(oss, tensor, platform::CPUDeviceContext());
std::string *varname = out_var->mutable_varname(); SerializeToMessage(get_var_name, var, platform::CPUDeviceContext(), out_var);
*varname = get_var_name;
std::string *serialized = out_var->mutable_serialized();
*serialized = oss.str();
return Status::OK; return Status::OK;
} }
......
...@@ -27,14 +27,8 @@ bool RPCClient::SendVariable(const framework::Scope& scope, ...@@ -27,14 +27,8 @@ bool RPCClient::SendVariable(const framework::Scope& scope,
auto ctx = platform::CPUDeviceContext(); auto ctx = platform::CPUDeviceContext();
auto* var = scope.FindVar(inname); auto* var = scope.FindVar(inname);
PADDLE_ENFORCE(var); PADDLE_ENFORCE(var);
// TODO(typhoonzero): support SelectedRows SerializeToMessage(inname, var, ctx, &msg);
PADDLE_ENFORCE(var->IsType<framework::LoDTensor>(),
"Only support LoDTensor, %s has wrong type", inname);
const framework::LoDTensor& tensor = var->Get<framework::LoDTensor>();
std::ostringstream oss;
framework::SerializeToStream(oss, tensor, ctx);
msg.set_varname(inname);
msg.set_serialized(oss.str());
Status status = stub_->SendVariable(&context, msg, &out_msg); Status status = stub_->SendVariable(&context, msg, &out_msg);
if (!status.ok()) { if (!status.ok()) {
LOG(ERROR) << "gRPC error: " << status.error_message(); LOG(ERROR) << "gRPC error: " << status.error_message();
...@@ -50,19 +44,15 @@ bool RPCClient::GetVariable(const framework::Scope& scope, ...@@ -50,19 +44,15 @@ bool RPCClient::GetVariable(const framework::Scope& scope,
call_msg.set_varname(outname); call_msg.set_varname(outname);
auto ctx = platform::CPUDeviceContext(); auto ctx = platform::CPUDeviceContext();
Status status = stub_->GetVariable(&context, call_msg, &ret_msg); Status status = stub_->GetVariable(&context, call_msg, &ret_msg);
auto* outvar = scope.FindVar(outname);
if (!status.ok()) { if (!status.ok()) {
LOG(ERROR) << "gRPC error: " << status.error_message(); LOG(ERROR) << "gRPC error: " << status.error_message();
return false; return false;
} }
std::istringstream iss(ret_msg.serialized()); std::istringstream iss(ret_msg.serialized());
DeserializeFromMessage(ret_msg, ctx, outvar);
framework::LoDTensor ret_tensor;
framework::DeserializeFromStream(iss, &ret_tensor);
auto* outvar = scope.FindVar(outname);
framework::LoDTensor* out_tensor = outvar->GetMutable<framework::LoDTensor>();
// FIXME(typhoonzero): do not copy.
framework::CopyFrom(ret_tensor, ctx.GetPlace(), ctx, out_tensor);
return true; return true;
} }
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. Licensed under
the Apache License, Version 2.0 (the "License"); you may not use this file
Licensed under the Apache License, Version 2.0 (the "License"); except in compliance with the License.
you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
...@@ -13,7 +12,6 @@ See the License for the specific language governing permissions and ...@@ -13,7 +12,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
syntax = "proto3"; syntax = "proto3";
package sendrecv; package sendrecv;
service SendRecvService { service SendRecvService {
...@@ -29,12 +27,18 @@ service SendRecvService { ...@@ -29,12 +27,18 @@ service SendRecvService {
// VariableMessage is serialized paddle variable message. // VariableMessage is serialized paddle variable message.
// It can be: // It can be:
// Tensor
// LoDTensor // LoDTensor
// SelectedRows // SelectedRows
enum VarType {
LOD_TENSOR = 0;
SELECTED_ROWS = 1;
}
message VariableMessage { message VariableMessage {
string varname = 1; string varname = 1;
bytes serialized = 2; // TODO(Yancey1989): reference framework::proto::VarDesc::VarType
VarType type = 2;
bytes serialized = 3;
} }
message VoidMessage {} message VoidMessage {}
...@@ -14,10 +14,10 @@ limitations under the License. */ ...@@ -14,10 +14,10 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/framework/data_type.h"
#include "paddle/framework/lod_tensor.h" #include "paddle/framework/lod_tensor.h"
#include "paddle/framework/scope.h" #include "paddle/framework/scope.h"
#include "paddle/framework/selected_rows.h" #include "paddle/framework/selected_rows.h"
#include "paddle/framework/var_type.h"
#include "paddle/operators/detail/simple_block_queue.h" #include "paddle/operators/detail/simple_block_queue.h"
#include "paddle/operators/detail/send_recv.grpc.pb.h" #include "paddle/operators/detail/send_recv.grpc.pb.h"
...@@ -44,7 +44,7 @@ namespace paddle { ...@@ -44,7 +44,7 @@ namespace paddle {
namespace operators { namespace operators {
namespace detail { namespace detail {
typedef std::pair<std::string, framework::LoDTensor> TensorWithName; typedef std::pair<std::string, sendrecv::VariableMessage> MessageWithName;
class SendRecvServerImpl final : public SendRecvService::Service { class SendRecvServerImpl final : public SendRecvService::Service {
public: public:
...@@ -60,13 +60,13 @@ class SendRecvServerImpl final : public SendRecvService::Service { ...@@ -60,13 +60,13 @@ class SendRecvServerImpl final : public SendRecvService::Service {
void Done(); void Done();
void SetScope(framework::Scope *scope) { scope_ = scope; }; void SetScope(framework::Scope *scope) { scope_ = scope; };
const TensorWithName Get() { return this->var_recv_queue_.Pop(); } const MessageWithName Get() { return this->var_recv_queue_.Pop(); }
void Push(const TensorWithName &msg) { this->var_recv_queue_.Push(msg); } void Push(const MessageWithName &msg) { this->var_recv_queue_.Push(msg); }
private: private:
// received variable from RPC, operators fetch variable from this queue. // received variable from RPC, operators fetch variable from this queue.
SimpleBlockQueue<TensorWithName> var_recv_queue_; SimpleBlockQueue<MessageWithName> var_recv_queue_;
framework::Scope *scope_; framework::Scope *scope_;
// condition of the sub program // condition of the sub program
std::mutex mutex_; std::mutex mutex_;
...@@ -89,6 +89,53 @@ class RPCClient { ...@@ -89,6 +89,53 @@ class RPCClient {
std::unique_ptr<SendRecvService::Stub> stub_; std::unique_ptr<SendRecvService::Stub> stub_;
}; };
inline void SerializeToMessage(const std::string &name,
const framework::Variable *var,
const platform::DeviceContext &ctx,
VariableMessage *msg) {
msg->set_varname(name);
std::ostringstream oss;
switch (framework::ToVarType(var->Type())) {
case framework::proto::VarDesc_VarType_LOD_TENSOR:
msg->set_type(sendrecv::VarType::LOD_TENSOR);
framework::SerializeToStream(oss, var->Get<framework::LoDTensor>(), ctx);
break;
case framework::proto::VarDesc_VarType_SELECTED_ROWS:
msg->set_type(sendrecv::VarType::SELECTED_ROWS);
framework::SerializeToStream(oss, var->Get<framework::SelectedRows>(),
ctx);
break;
default: {
PADDLE_THROW("Serialize does not support type: %s",
typeid(var->Type()).name());
break;
}
}
msg->set_serialized(oss.str());
}
inline void DeserializeFromMessage(const VariableMessage &msg,
const platform::DeviceContext &ctx,
framework::Variable *var) {
using namespace paddle::framework::proto;
std::istringstream iss(msg.serialized());
switch (msg.type()) {
case sendrecv::VarType::LOD_TENSOR:
DeserializeFromStream(iss, var->GetMutable<framework::LoDTensor>(), ctx);
break;
case sendrecv::VarType::SELECTED_ROWS: {
DeserializeFromStream(iss, var->GetMutable<framework::SelectedRows>(),
ctx);
break;
}
default: {
PADDLE_THROW("Deserialize does not support type: %s",
typeid(var->Type()).name());
break;
}
}
}
} // namespace detail } // namespace detail
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -38,10 +38,10 @@ class LoadOp : public framework::OperatorBase { ...@@ -38,10 +38,10 @@ class LoadOp : public framework::OperatorBase {
out_var_name); out_var_name);
auto *tensor = out_var->GetMutable<framework::LoDTensor>(); auto *tensor = out_var->GetMutable<framework::LoDTensor>();
DeserializeFromStream(fin, tensor);
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(place); auto &dev_ctx = *pool.Get(place);
DeserializeFromStream(fin, tensor, dev_ctx);
if (platform::is_gpu_place(place)) { if (platform::is_gpu_place(place)) {
// copy CPU to GPU // copy CPU to GPU
......
...@@ -60,7 +60,7 @@ class RecvOp : public framework::OperatorBase { ...@@ -60,7 +60,7 @@ class RecvOp : public framework::OperatorBase {
} }
void Stop() override { void Stop() override {
detail::TensorWithName term_msg; detail::MessageWithName term_msg;
term_msg.first = LISTEN_TERMINATE_MESSAGE; term_msg.first = LISTEN_TERMINATE_MESSAGE;
rpc_service_->Push(term_msg); rpc_service_->Push(term_msg);
rpc_server_->Shutdown(); rpc_server_->Shutdown();
...@@ -94,7 +94,7 @@ class RecvOp : public framework::OperatorBase { ...@@ -94,7 +94,7 @@ class RecvOp : public framework::OperatorBase {
// the gradient arrives, just add suffix 0~n then average the gradient. // the gradient arrives, just add suffix 0~n then average the gradient.
for (size_t i = 0; i < param_count * trainer_count; ++i) { for (size_t i = 0; i < param_count * trainer_count; ++i) {
// blocking get one var from client. // blocking get one var from client.
const detail::TensorWithName &v = rpc_service_->Get(); const detail::MessageWithName &v = rpc_service_->Get();
auto grad_var_name = v.first; auto grad_var_name = v.first;
if (grad_var_name == LISTEN_TERMINATE_MESSAGE) { if (grad_var_name == LISTEN_TERMINATE_MESSAGE) {
exit_flag = true; exit_flag = true;
...@@ -121,11 +121,10 @@ class RecvOp : public framework::OperatorBase { ...@@ -121,11 +121,10 @@ class RecvOp : public framework::OperatorBase {
} }
auto *var = recv_scope.Var(grad_var_name); auto *var = recv_scope.Var(grad_var_name);
auto *tensor = var->GetMutable<framework::LoDTensor>(); platform::DeviceContextPool &pool =
// FIXME(typhoonzero): do not copy platform::DeviceContextPool::Instance();
platform::DeviceContextPool &pool = platform::DeviceContextPool::Get(); auto &dev_ctx = *pool.Get(dev_place);
auto &dev_ctx = *pool.Borrow(dev_place); detail::DeserializeFromMessage(v.second, dev_ctx, var);
framework::CopyFrom(v.second, dev_place, dev_ctx, tensor);
} }
if (exit_flag) { if (exit_flag) {
break; break;
......
...@@ -20,22 +20,27 @@ limitations under the License. */ ...@@ -20,22 +20,27 @@ limitations under the License. */
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
#include "paddle/framework/program_desc.h" #include "paddle/framework/program_desc.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/math/selected_rows_functor.h"
#include "paddle/string/printf.h" #include "paddle/string/printf.h"
USE_NO_KERNEL_OP(send); USE_NO_KERNEL_OP(send);
USE_NO_KERNEL_OP(recv); USE_NO_KERNEL_OP(recv);
USE_OP(sum); USE_OP(sum);
namespace f = paddle::framework;
namespace p = paddle::platform;
namespace m = paddle::operators::math;
// global for simplicity. // global for simplicity.
std::unique_ptr<paddle::framework::OperatorBase> recv_op; std::unique_ptr<f::OperatorBase> recv_op;
void InitTensorsInScope(paddle::framework::Scope &scope, void InitTensorsInScope(f::Scope &scope, p::CPUPlace &place) {
paddle::platform::CPUPlace &place) { p::CPUDeviceContext ctx(place);
paddle::platform::CPUDeviceContext ctx(place);
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
auto var_name = paddle::string::Sprintf("x%d", i); auto var_name = paddle::string::Sprintf("x%d", i);
auto var = scope.Var(var_name); auto var = scope.Var(var_name);
auto tensor = var->GetMutable<paddle::framework::LoDTensor>(); auto tensor = var->GetMutable<f::LoDTensor>();
tensor->Resize({10, 10}); tensor->Resize({10, 10});
float *expect = tensor->mutable_data<float>(place); float *expect = tensor->mutable_data<float>(place);
for (int64_t i = 0; i < tensor->numel(); ++i) { for (int64_t i = 0; i < tensor->numel(); ++i) {
...@@ -44,21 +49,53 @@ void InitTensorsInScope(paddle::framework::Scope &scope, ...@@ -44,21 +49,53 @@ void InitTensorsInScope(paddle::framework::Scope &scope,
} }
auto out_var = scope.Var("Out"); auto out_var = scope.Var("Out");
auto out_tensor = out_var->GetMutable<paddle::framework::LoDTensor>(); auto out_tensor = out_var->GetMutable<f::LoDTensor>();
out_tensor->Resize({10, 10}); out_tensor->Resize({10, 10});
out_tensor->mutable_data<float>(place); // allocate out_tensor->mutable_data<float>(place); // allocate
} }
void AddOp(const std::string &type, void InitSelectedRowsInScope(f::Scope &scope, p::CPUPlace &place) {
const paddle::framework::VariableNameMap &inputs, p::CPUDeviceContext ctx(place);
const paddle::framework::VariableNameMap &outputs, int64_t height = 10;
paddle::framework::AttributeMap attrs, int64_t row_numel = 10;
paddle::framework::BlockDesc *block) { m::SetConstant<p::CPUDeviceContext, float> set_one;
// init x0
std::vector<int64_t> rows0{0, 4, 7};
auto x0_var = scope.Var("x0");
auto x0 = x0_var->GetMutable<f::SelectedRows>();
x0->set_rows(rows0);
x0->set_height(height);
auto x0_value = x0->mutable_value();
x0_value->mutable_data<float>(
f::make_ddim({static_cast<int64_t>(rows0.size()), row_numel}), place);
set_one(ctx, x0_value, 1.0);
// init x1
std::vector<int64_t> rows1{2, 9};
auto x1_var = scope.Var("x1");
auto x1 = x1_var->GetMutable<f::SelectedRows>();
x1->set_rows(rows1);
x1->set_height(height);
auto x1_value = x1->mutable_value();
x1_value->mutable_data<float>(
f::make_ddim({static_cast<int64_t>(rows1.size()), row_numel}), place);
set_one(ctx, x1_value, 1.0);
auto out_var = scope.Var("Out");
auto out = out_var->GetMutable<f::SelectedRows>();
auto out_value = out->mutable_value();
out->set_height(height);
out_value->mutable_data<float>(f::make_ddim({5, 10}), place);
}
void AddOp(const std::string &type, const f::VariableNameMap &inputs,
const f::VariableNameMap &outputs, f::AttributeMap attrs,
f::BlockDesc *block) {
// insert output // insert output
for (auto kv : outputs) { for (auto kv : outputs) {
for (auto v : kv.second) { for (auto v : kv.second) {
auto var = block->Var(v); auto var = block->Var(v);
var->SetDataType(paddle::framework::proto::DataType::FP32); var->SetDataType(f::proto::DataType::FP32);
} }
} }
...@@ -74,58 +111,99 @@ void AddOp(const std::string &type, ...@@ -74,58 +111,99 @@ void AddOp(const std::string &type,
op->SetAttrMap(attrs); op->SetAttrMap(attrs);
} }
void StartServerNet() { void StartServerNet(bool is_sparse) {
paddle::framework::Scope scope; f::Scope scope;
paddle::platform::CPUPlace place; p::CPUPlace place;
InitTensorsInScope(scope, place); if (is_sparse) {
InitSelectedRowsInScope(scope, place);
} else {
InitTensorsInScope(scope, place);
}
// sub program run in recv_op, for simple test we use sum // sub program run in recv_op, for simple test we use sum
paddle::framework::ProgramDesc program; f::ProgramDesc program;
paddle::framework::BlockDesc *block = program.MutableBlock(0); f::BlockDesc *block = program.MutableBlock(0);
// X for server side tensors, RX for received tensers, must be of same shape. // X for server side tensors, RX for received tensers, must be of same shape.
AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"x0"}}}, {}, block); AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, block);
paddle::framework::AttributeMap attrs; f::AttributeMap attrs;
attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); attrs.insert({"endpoint", std::string("127.0.0.1:6174")});
attrs.insert({"ParamList", std::vector<std::string>({"x0"})}); attrs.insert({"ParamList", std::vector<std::string>({"Out"})});
attrs.insert({"GradList", std::vector<std::string>({"x1"})}); attrs.insert({"GradList", std::vector<std::string>({"x1"})});
std::string program_proto; std::string program_proto;
PADDLE_ENFORCE(program.Proto()->SerializeToString(&program_proto)); PADDLE_ENFORCE(program.Proto()->SerializeToString(&program_proto));
attrs.insert({"OptimizeProgram", program_proto}); attrs.insert({"OptimizeProgram", program_proto});
recv_op = paddle::framework::OpRegistry::CreateOp("recv", {{"RX", {"x1"}}}, recv_op = f::OpRegistry::CreateOp("recv", {{"RX", {"x1"}}}, {}, attrs);
{}, attrs);
recv_op->Run(scope, place); recv_op->Run(scope, place);
} }
TEST(SendRecvOp, CPU) { TEST(SendRecvOp, CPUDense) {
std::thread server_thread(StartServerNet); std::thread server_thread(StartServerNet, false);
sleep(5); // wait server to start sleep(3); // wait server to start
// local net // local net
paddle::framework::Scope scope; f::Scope scope;
paddle::platform::CPUPlace place; p::CPUPlace place;
InitTensorsInScope(scope, place); InitTensorsInScope(scope, place);
paddle::framework::AttributeMap attrs; f::AttributeMap attrs;
attrs.insert({"endpoints", std::vector<std::string>({"127.0.0.1:6174"})}); attrs.insert({"endpoints", std::vector<std::string>({"127.0.0.1:6174"})});
attrs.insert({"epmap", std::vector<std::string>({"127.0.0.1:6174"})}); attrs.insert({"epmap", std::vector<std::string>({"127.0.0.1:6174"})});
auto send_op = paddle::framework::OpRegistry::CreateOp( auto send_op = f::OpRegistry::CreateOp("send", {{"X", {"x1"}}},
"send", {{"X", {"x1"}}}, {{"Out", {"x0"}}}, attrs); {{"Out", {"Out"}}}, attrs);
send_op->Run(scope, place); send_op->Run(scope, place);
auto in_var = scope.Var("x1"); auto in_var = scope.Var("x1");
auto tensor = in_var->GetMutable<paddle::framework::LoDTensor>(); auto tensor = in_var->GetMutable<f::LoDTensor>();
float *expected = tensor->data<float>(); float *expected = tensor->data<float>();
auto out_var = scope.Var("x0"); auto out_var = scope.Var("Out");
auto target = out_var->GetMutable<paddle::framework::LoDTensor>(); auto target = out_var->GetMutable<f::LoDTensor>();
// x1 * 2 == x0 // x1 * 2 == x0
EXPECT_NE(target->memory_size(), size_t(0)); EXPECT_NE(target->memory_size(), size_t(0));
float *actual = target->data<float>(); float *actual = target->data<float>();
for (int64_t i = 0; i < target->numel(); ++i) { for (int64_t i = 0; i < target->numel(); ++i) {
EXPECT_EQ(expected[i] * 2, actual[i]); EXPECT_EQ(expected[i] * 2, actual[i]);
} }
recv_op->Stop();
server_thread.join();
recv_op.reset(nullptr);
}
TEST(SendRecvOp, CPUSparse) {
std::thread server_thread(StartServerNet, true);
sleep(3); // wait server to start
// local net
f::Scope scope;
p::CPUPlace place;
p::CPUDeviceContext ctx(place);
InitSelectedRowsInScope(scope, place);
f::AttributeMap attrs;
attrs.insert({"endpoints", std::vector<std::string>({"127.0.0.1:6174"})});
attrs.insert({"epmap", std::vector<std::string>({"127.0.0.1:6174"})});
auto send_op = f::OpRegistry::CreateOp("send", {{"X", {"x1"}}},
{{"Out", {"Out"}}}, attrs);
send_op->Run(scope, place);
auto x0 = scope.Var("x0")->GetMutable<f::SelectedRows>();
auto x1 = scope.Var("x1")->GetMutable<f::SelectedRows>();
auto out = scope.Var("Out")->GetMutable<f::SelectedRows>();
auto actual = out->mutable_value();
std::unique_ptr<f::SelectedRows> expect{new f::SelectedRows()};
auto expect_value = expect->mutable_value();
expect_value->mutable_data<float>(f::make_ddim({5, 10}), place);
m::SelectedRowsAdd<p::CPUDeviceContext, float> add_functor;
add_functor(ctx, *x0, *x1, expect.get());
EXPECT_EQ(actual->numel(), expect_value->numel());
EXPECT_EQ(out->rows().size(), x0->rows().size() + x1->rows().size());
for (int64_t i = 0; i < expect_value->numel(); ++i) {
EXPECT_EQ(expect_value->mutable_data<float>(place)[i],
actual->mutable_data<float>(place)[i]);
}
recv_op->Stop(); recv_op->Stop();
server_thread.join(); server_thread.join();
// recv_op.reset(); recv_op.reset();
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册