提交 e805cfcb 编写于 作者: Y Yancey1989

fix unit test failed

上级 b1b7af40
...@@ -160,10 +160,12 @@ This operator will recv tensor from send_op ...@@ -160,10 +160,12 @@ This operator will recv tensor from send_op
"Serialized ProgramDesc string for recv to run."); "Serialized ProgramDesc string for recv to run.");
AddAttr<std::vector<std::string>>( AddAttr<std::vector<std::string>>(
"ParamList", "type list of string", "ParamList", "type list of string",
"grad->param name mapping to find which param to optimize."); "grad->param name mapping to find which param to optimize.")
.SetDefault({});
AddAttr<std::vector<std::string>>( AddAttr<std::vector<std::string>>(
"GradList", "type list of string", "GradList", "type list of string",
"grad->param name mapping to find which param to optimize."); "grad->param name mapping to find which param to optimize.")
.SetDefault({});
AddAttr<int>("Trainers", "type int", AddAttr<int>("Trainers", "type int",
"Number of trainers in the current cluster job") "Number of trainers in the current cluster job")
.SetDefault(1); .SetDefault(1);
......
...@@ -16,12 +16,14 @@ ...@@ -16,12 +16,14 @@
// a RemoteOptimizer. // a RemoteOptimizer.
#include <unistd.h> #include <unistd.h>
#include <string>
#include <thread> #include <thread>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
#include "paddle/framework/program_desc.h" #include "paddle/framework/program_desc.h"
#include "paddle/string/printf.h"
USE_NO_KERNEL_OP(send); USE_NO_KERNEL_OP(send);
USE_NO_KERNEL_OP(recv); USE_NO_KERNEL_OP(recv);
...@@ -33,18 +35,21 @@ std::unique_ptr<paddle::framework::OperatorBase> recv_op; ...@@ -33,18 +35,21 @@ std::unique_ptr<paddle::framework::OperatorBase> recv_op;
void InitTensorsInScope(paddle::framework::Scope &scope, void InitTensorsInScope(paddle::framework::Scope &scope,
paddle::platform::CPUPlace &place) { paddle::platform::CPUPlace &place) {
paddle::platform::CPUDeviceContext ctx(place); paddle::platform::CPUDeviceContext ctx(place);
auto var = scope.Var("X"); for (int i = 0; i < 2; ++i) {
auto tensor = var->GetMutable<paddle::framework::LoDTensor>(); auto var_name = paddle::string::Sprintf("x%d", i);
tensor->Resize({10, 10}); auto var = scope.Var(var_name);
float *expect = tensor->mutable_data<float>(place); auto tensor = var->GetMutable<paddle::framework::LoDTensor>();
for (int64_t i = 0; i < tensor->numel(); ++i) { tensor->Resize({10, 10});
expect[i] = static_cast<float>(i); float *expect = tensor->mutable_data<float>(place);
for (int64_t i = 0; i < tensor->numel(); ++i) {
expect[i] = static_cast<float>(i);
}
} }
auto out_var = scope.Var("Out"); auto out_var = scope.Var("Out");
auto out_tensor = out_var->GetMutable<paddle::framework::LoDTensor>(); auto out_tensor = out_var->GetMutable<paddle::framework::LoDTensor>();
out_tensor->Resize({10, 10}); out_tensor->Resize({10, 10});
tensor->mutable_data<float>(place); // allocate out_tensor->mutable_data<float>(place); // allocate
} }
void AddOp(const std::string &type, void AddOp(const std::string &type,
...@@ -81,7 +86,7 @@ void StartServerNet() { ...@@ -81,7 +86,7 @@ void StartServerNet() {
paddle::framework::ProgramDescBind program; paddle::framework::ProgramDescBind program;
paddle::framework::BlockDescBind *block = program.MutableBlock(0); paddle::framework::BlockDescBind *block = program.MutableBlock(0);
// X for server side tensors, RX for received tensers, must be of same shape. // X for server side tensors, RX for received tensers, must be of same shape.
AddOp("sum", {{"X", {"X", "RX"}}}, {{"Out", {"Out"}}}, {}, block); AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, block);
paddle::framework::AttributeMap attrs; paddle::framework::AttributeMap attrs;
attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); attrs.insert({"endpoint", std::string("127.0.0.1:6174")});
...@@ -89,8 +94,8 @@ void StartServerNet() { ...@@ -89,8 +94,8 @@ void StartServerNet() {
PADDLE_ENFORCE(program.Proto()->SerializeToString(&program_proto)); PADDLE_ENFORCE(program.Proto()->SerializeToString(&program_proto));
attrs.insert({"OptimizeProgram", program_proto}); attrs.insert({"OptimizeProgram", program_proto});
recv_op = paddle::framework::OpRegistry::CreateOp("recv", {{"RX", {"RX"}}}, recv_op = paddle::framework::OpRegistry::CreateOp(
{{"Out", {"Out"}}}, attrs); "recv", {{"RX", {"x0", "x1"}}}, {{"Out", {"Out"}}}, attrs);
paddle::platform::CPUDeviceContext ctx(place); paddle::platform::CPUDeviceContext ctx(place);
recv_op->Run(scope, ctx); recv_op->Run(scope, ctx);
} }
...@@ -107,11 +112,11 @@ TEST(SendRecvOp, CPU) { ...@@ -107,11 +112,11 @@ TEST(SendRecvOp, CPU) {
attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); attrs.insert({"endpoint", std::string("127.0.0.1:6174")});
auto send_op = paddle::framework::OpRegistry::CreateOp( auto send_op = paddle::framework::OpRegistry::CreateOp(
"send", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs); "send", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, attrs);
paddle::platform::CPUDeviceContext ctx(place); paddle::platform::CPUDeviceContext ctx(place);
send_op->Run(scope, ctx); send_op->Run(scope, ctx);
auto in_var = scope.Var("X"); auto in_var = scope.Var("x0");
auto tensor = in_var->GetMutable<paddle::framework::LoDTensor>(); auto tensor = in_var->GetMutable<paddle::framework::LoDTensor>();
float *expected = tensor->data<float>(); float *expected = tensor->data<float>();
......
...@@ -39,14 +39,16 @@ train_reader = paddle.batch( ...@@ -39,14 +39,16 @@ train_reader = paddle.batch(
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
t = fluid.DistributeTranspiler() t = fluid.DistributeTranspiler()
t.transpile(optimize_ops, params_grads, pservers="127.0.0.1:6174", trainers=1) pserver_endpoints = os.getenv("PSERVERS")
training_role = os.getenv("TRAINING_ROLE",
"TRAINER") # get the training role: trainer/pserver
t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=1)
pserver_endpoint = os.getenv("PSERVER") if training_role == "PSERVER":
if pserver_endpoint: pserver_prog = t.get_pserver_program(pserver_endpoints, optimize_ops)
pserver_prog = t.get_pserver_program(pserver_endpoint, optimize_ops)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
exe.run(pserver_prog) exe.run(pserver_prog)
else: elif training_role == "TRAINER":
feeder = fluid.DataFeeder(feed_list=[images, label], place=place) feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
...@@ -64,5 +66,7 @@ else: ...@@ -64,5 +66,7 @@ else:
pass_acc = accuracy.eval(exe) pass_acc = accuracy.eval(exe)
print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc)) print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc))
else:
print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
exit(1) exit(1)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册