diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc index 094084458e8e17cf67231f6ac932189122c2b035..71848581934515fafbe9935022bba3023d1d8971 100644 --- a/paddle/operators/recv_op.cc +++ b/paddle/operators/recv_op.cc @@ -160,10 +160,12 @@ This operator will recv tensor from send_op "Serialized ProgramDesc string for recv to run."); AddAttr>( "ParamList", "type list of string", - "grad->param name mapping to find which param to optimize."); + "grad->param name mapping to find which param to optimize.") + .SetDefault({}); AddAttr>( "GradList", "type list of string", - "grad->param name mapping to find which param to optimize."); + "grad->param name mapping to find which param to optimize.") + .SetDefault({}); AddAttr("Trainers", "type int", "Number of trainers in the current cluster job") .SetDefault(1); diff --git a/paddle/operators/send_recv_op_test.cc b/paddle/operators/send_recv_op_test.cc index 3e2e2051afacb748877e3b0c3dec8d6662ac4e72..1715b05c2ca5ebcdb0dc3bd8217ab0cb27800d4e 100644 --- a/paddle/operators/send_recv_op_test.cc +++ b/paddle/operators/send_recv_op_test.cc @@ -16,12 +16,14 @@ // a RemoteOptimizer. #include +#include #include #include "gtest/gtest.h" #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" #include "paddle/framework/program_desc.h" +#include "paddle/string/printf.h" USE_NO_KERNEL_OP(send); USE_NO_KERNEL_OP(recv); @@ -33,18 +35,21 @@ std::unique_ptr recv_op; void InitTensorsInScope(paddle::framework::Scope &scope, paddle::platform::CPUPlace &place) { paddle::platform::CPUDeviceContext ctx(place); - auto var = scope.Var("X"); - auto tensor = var->GetMutable(); - tensor->Resize({10, 10}); - float *expect = tensor->mutable_data(place); - for (int64_t i = 0; i < tensor->numel(); ++i) { - expect[i] = static_cast(i); + for (int i = 0; i < 2; ++i) { + auto var_name = paddle::string::Sprintf("x%d", i); + auto var = scope.Var(var_name); + auto tensor = var->GetMutable(); + tensor->Resize({10, 10}); + float *expect = tensor->mutable_data(place); + for (int64_t i = 0; i < tensor->numel(); ++i) { + expect[i] = static_cast(i); + } } auto out_var = scope.Var("Out"); auto out_tensor = out_var->GetMutable(); out_tensor->Resize({10, 10}); - tensor->mutable_data(place); // allocate + out_tensor->mutable_data(place); // allocate } void AddOp(const std::string &type, @@ -81,7 +86,7 @@ void StartServerNet() { paddle::framework::ProgramDescBind program; paddle::framework::BlockDescBind *block = program.MutableBlock(0); // X for server side tensors, RX for received tensers, must be of same shape. - AddOp("sum", {{"X", {"X", "RX"}}}, {{"Out", {"Out"}}}, {}, block); + AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, block); paddle::framework::AttributeMap attrs; attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); @@ -89,8 +94,8 @@ void StartServerNet() { PADDLE_ENFORCE(program.Proto()->SerializeToString(&program_proto)); attrs.insert({"OptimizeProgram", program_proto}); - recv_op = paddle::framework::OpRegistry::CreateOp("recv", {{"RX", {"RX"}}}, - {{"Out", {"Out"}}}, attrs); + recv_op = paddle::framework::OpRegistry::CreateOp( + "recv", {{"RX", {"x0", "x1"}}}, {{"Out", {"Out"}}}, attrs); paddle::platform::CPUDeviceContext ctx(place); recv_op->Run(scope, ctx); } @@ -107,11 +112,11 @@ TEST(SendRecvOp, CPU) { attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); auto send_op = paddle::framework::OpRegistry::CreateOp( - "send", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs); + "send", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, attrs); paddle::platform::CPUDeviceContext ctx(place); send_op->Run(scope, ctx); - auto in_var = scope.Var("X"); + auto in_var = scope.Var("x0"); auto tensor = in_var->GetMutable(); float *expected = tensor->data(); diff --git a/python/paddle/v2/fluid/tests/book/notest_recognize_digits_conv_dist.py b/python/paddle/v2/fluid/tests/book/notest_recognize_digits_conv_dist.py index c7f4f2212f33678438ae57b426a91608a8edf6f1..2680502efb91061be37a77fbe5b451960fdd15f7 100644 --- a/python/paddle/v2/fluid/tests/book/notest_recognize_digits_conv_dist.py +++ b/python/paddle/v2/fluid/tests/book/notest_recognize_digits_conv_dist.py @@ -39,14 +39,16 @@ train_reader = paddle.batch( place = fluid.CPUPlace() exe = fluid.Executor(place) t = fluid.DistributeTranspiler() -t.transpile(optimize_ops, params_grads, pservers="127.0.0.1:6174", trainers=1) +pserver_endpoints = os.getenv("PSERVERS") +training_role = os.getenv("TRAINING_ROLE", + "TRAINER") # get the training role: trainer/pserver +t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=1) -pserver_endpoint = os.getenv("PSERVER") -if pserver_endpoint: - pserver_prog = t.get_pserver_program(pserver_endpoint, optimize_ops) +if training_role == "PSERVER": + pserver_prog = t.get_pserver_program(pserver_endpoints, optimize_ops) exe.run(fluid.default_startup_program()) exe.run(pserver_prog) -else: +elif training_role == "TRAINER": feeder = fluid.DataFeeder(feed_list=[images, label], place=place) exe.run(fluid.default_startup_program()) @@ -64,5 +66,7 @@ else: pass_acc = accuracy.eval(exe) print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc)) +else: + print("environment var TRAINER_ROLE should be TRAINER os PSERVER") exit(1)