diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 38b89b9eb108d73c3374360a81c6ed28502bfdc5..b3c217518b5414ba86bc07b17e6539c3a89fc85b 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -227,6 +227,10 @@ set_source_files_properties( COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op sum_op executor) +# FIXME(typhoonzero): use gtest to get benchmark result +if(WITH_PROFILER) + cc_test(test_send_recv_benchmark SRCS send_recv_op_benchmark.cc DEPS send_op recv_op sum_op executor) +endif() endif() op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc index c69e416e10f2a9ced1f1b22c39235e4c9338e77c..ca1c091920f4984786dcf6a58a097fca783b6463 100644 --- a/paddle/operators/recv_op.cc +++ b/paddle/operators/recv_op.cc @@ -38,7 +38,7 @@ void RunServer(Server **rpc_server, builder.RegisterService(service.get()); std::unique_ptr server(builder.BuildAndStart()); *rpc_server = server.get(); - LOG(INFO) << "Server listening on " << server_address << std::endl; + LOG(INFO) << "Server listening on " << server_address; server->Wait(); } diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc index a3059847f2d420359b347e3a5d514d8a3829a4e2..0f76545743a294ea3ab4f7f01dd8a684587485d2 100644 --- a/paddle/operators/send_op.cc +++ b/paddle/operators/send_op.cc @@ -41,6 +41,7 @@ class SendOp : public framework::OperatorBase { // TODO(typhoonzero): how to call InitVariables } } + void Run(const framework::Scope &scope, const platform::DeviceContext &dev_ctx) const override { auto iname = Input("X"); diff --git a/paddle/operators/send_recv_op_benchmark.cc b/paddle/operators/send_recv_op_benchmark.cc new file mode 100644 index 0000000000000000000000000000000000000000..69131989f080e5f6bb302943a7f2386d577160d6 --- /dev/null +++ b/paddle/operators/send_recv_op_benchmark.cc @@ -0,0 +1,121 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +// TODO(typhoonzero): add python bindings for this test as +// a RemoteOptimizer. + +#include +#include +#include + +#include "gtest/gtest.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" +#include "paddle/framework/program_desc.h" + +USE_NO_KERNEL_OP(send); +USE_NO_KERNEL_OP(recv); +USE_OP(sum); + +// global for simplicity. +std::unique_ptr recv_op; +int benchmark_count = 100000; +int mat_size = 10; + +void InitTensorsInScope(paddle::framework::Scope &scope, + paddle::platform::CPUPlace &place) { + paddle::platform::CPUDeviceContext ctx(place); + auto var = scope.Var("X"); + auto tensor = var->GetMutable(); + tensor->Resize({mat_size, mat_size}); + float *expect = tensor->mutable_data(place); + for (int64_t i = 0; i < tensor->numel(); ++i) { + expect[i] = static_cast(i) / 1000.0f; + } + + auto out_var = scope.Var("Out"); + auto out_tensor = out_var->GetMutable(); + out_tensor->Resize({mat_size, mat_size}); + tensor->mutable_data(place); // allocate +} + +void AddOp(const std::string &type, + const paddle::framework::VariableNameMap &inputs, + const paddle::framework::VariableNameMap &outputs, + paddle::framework::AttributeMap attrs, + paddle::framework::BlockDescBind *block) { + // insert output + for (auto kv : outputs) { + for (auto v : kv.second) { + auto var = block->Var(v); + var->SetDataType(paddle::framework::DataType::FP32); + } + } + + // insert op + auto op = block->AppendOp(); + op->SetType(type); + for (auto &kv : inputs) { + op->SetInput(kv.first, kv.second); + } + for (auto &kv : outputs) { + op->SetOutput(kv.first, kv.second); + } + op->SetAttrMap(attrs); +} + +void StartServerNet() { + paddle::framework::Scope scope; + paddle::platform::CPUPlace place; + InitTensorsInScope(scope, place); + + // sub program run in recv_op, for simple test we use sum + paddle::framework::ProgramDescBind program; + paddle::framework::BlockDescBind *block = program.MutableBlock(0); + // X for server side tensors, RX for received tensers, must be of same shape. + AddOp("sum", {{"X", {"X", "RX"}}}, {{"Out", {"Out"}}}, {}, block); + + paddle::framework::AttributeMap attrs; + attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); + attrs.insert({"OptimizeBlock", block}); + recv_op = paddle::framework::OpRegistry::CreateOp("recv", {{"RX", {"RX"}}}, + {{"Out", {"Out"}}}, attrs); + paddle::platform::CPUDeviceContext ctx(place); + for (int i = 0; i < benchmark_count; ++i) { + recv_op->Run(scope, ctx); + } +} + +TEST(SendRecvBenchmark, CPU) { + std::thread server_thread(StartServerNet); + sleep(5); // wait server to start + // local net + paddle::framework::Scope scope; + paddle::platform::CPUPlace place; + InitTensorsInScope(scope, place); + + paddle::framework::AttributeMap attrs; + attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); + + auto send_op = paddle::framework::OpRegistry::CreateOp( + "send", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs); + paddle::platform::CPUDeviceContext ctx(place); + + for (int i = 0; i < benchmark_count; ++i) { + send_op->Run(scope, ctx); + } + + recv_op.reset(); // dtor can shutdown and join server thread. + server_thread.join(); +} diff --git a/paddle/operators/send_recv_op_test.cc b/paddle/operators/send_recv_op_test.cc index ac03eb3752e7cd31dd80f4caa39dc0625f0409d5..42cbc50ca9008ccb4b130f2cb8462b182dd61a5c 100644 --- a/paddle/operators/send_recv_op_test.cc +++ b/paddle/operators/send_recv_op_test.cc @@ -16,6 +16,7 @@ // a RemoteOptimizer. #include +#include #include #include "gtest/gtest.h" @@ -38,7 +39,7 @@ void InitTensorsInScope(paddle::framework::Scope &scope, tensor->Resize({10, 10}); float *expect = tensor->mutable_data(place); for (int64_t i = 0; i < tensor->numel(); ++i) { - expect[i] = static_cast(i); + expect[i] = static_cast(i) / 1000.0f; } auto out_var = scope.Var("Out"); @@ -89,7 +90,11 @@ void StartServerNet() { recv_op = paddle::framework::OpRegistry::CreateOp("recv", {{"RX", {"RX"}}}, {{"Out", {"Out"}}}, attrs); paddle::platform::CPUDeviceContext ctx(place); - recv_op->Run(scope, ctx); + while (1) { + recv_op->Run(scope, ctx); + // run once + break; + } } TEST(SendRecvOp, CPU) {