diff --git a/benchmark/fluid/fluid_benchmark.py b/benchmark/fluid/fluid_benchmark.py index 902dca209fcc07cc8048acda84e917f0f6c6af68..aa70783ecd68be543b2d5aabee96a5b09bd72e6a 100644 --- a/benchmark/fluid/fluid_benchmark.py +++ b/benchmark/fluid/fluid_benchmark.py @@ -180,7 +180,7 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, print_train_time(start_time, time.time(), num_samples) print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses))), # evaluation - if not args.no_test and batch_acc: + if not args.no_test and batch_acc and not args.use_reader_op: pass_test_acc = test(exe, infer_prog, test_reader, feeder, batch_acc) print(", Test Accuracy: %f" % pass_test_acc) @@ -277,11 +277,12 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_id += 1 print_train_time(start_time, time.time(), num_samples) - if not args.no_test and batch_acc: + if not args.no_test and batch_acc and not args.use_reader_op: + # we have not implement record io for test + # skip test when use args.use_reader_op test_acc = test(startup_exe, infer_prog, test_reader, feeder, batch_acc) print("Pass: %d, Test Accuracy: %f\n" % (pass_id, test_acc)) - exit(0) def print_arguments(args): diff --git a/benchmark/fluid/models/resnet.py b/benchmark/fluid/models/resnet.py index 2ee2b5be09bfcc2e7fcec7eb2f80e28e4e75ab3d..9ed1093c54a501cc93dbbf9c3651fe70914ce26b 100644 --- a/benchmark/fluid/models/resnet.py +++ b/benchmark/fluid/models/resnet.py @@ -199,7 +199,10 @@ def get_model(args): batched_train_reader = paddle.batch( paddle.reader.shuffle( train_reader, buf_size=5120), - batch_size=args.batch_size * args.gpus) - batched_test_reader = paddle.batch(train_reader, batch_size=args.batch_size) + batch_size=args.batch_size * args.gpus, + drop_last=True) + batched_test_reader = paddle.batch( + train_reader, batch_size=args.batch_size, drop_last=True) - return avg_cost, inference_program, optimizer, batched_train_reader, batched_test_reader, batch_acc + return avg_cost, inference_program, optimizer, batched_train_reader,\ + batched_test_reader, batch_acc diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 89d7a62fe9aca3a71ad34b976a186a80174bfd5e..6a8b15a6b60a2e5635dc78fc877f0c8da9a2a998 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -118,6 +118,10 @@ endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}") +if(WITH_DISTRIBUTE) + add_definitions(-DPADDLE_WITH_DISTRIBUTE) +endif() + if(WITH_GOLANG) # we need to symlink Paddle directory into GOPATH. If we # don't do it and we have code that depends on Paddle, go diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 4271e4c1bb6bc7b83f2633191ea2d464f4f56c4c..6bc770580640f242cfce6a9838f00210f785010a 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -83,8 +83,13 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog) -cc_library(executor SRCS executor.cc DEPS op_registry device_context scope -framework_proto glog lod_rank_table feed_fetch_method) +if(WITH_DISTRIBUTE) + cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr) + set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") + set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) +else() + cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method) +endif() cc_library(parallel_executor SRCS parallel_executor.cc DEPS ssa_graph_builder_factory threaded_ssa_graph_executor scope_buffered_ssa_graph_executor) diff --git a/paddle/fluid/framework/details/ssa_graph_checker.h b/paddle/fluid/framework/details/ssa_graph_checker.h index 542c4a172891ba9d3621918986089b2e400b6ae8..304b221e7e4c414a0ab562a1b99836d3b7c02efb 100644 --- a/paddle/fluid/framework/details/ssa_graph_checker.h +++ b/paddle/fluid/framework/details/ssa_graph_checker.h @@ -19,7 +19,7 @@ namespace paddle { namespace framework { namespace details { -class SSAGraph; +struct SSAGraph; class SSAGraghBuilderWithChecker : public SSAGraphBuilder { public: diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index d4d6c34108b9f1e457d8eb0c36d10339b03330bd..4a6f53cba1f46214dbff3058b221f878ecf46613 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -20,6 +20,9 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/reader.h" +#ifdef PADDLE_WITH_DISTRIBUTE +#include "paddle/fluid/operators/detail/grpc_client.h" +#endif #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" @@ -44,6 +47,14 @@ ExecutorPrepareContext::~ExecutorPrepareContext() { Executor::Executor(const platform::Place& place) : place_(place) {} +#ifdef PADDLE_WITH_DISTRIBUTE +void Executor::Complete() { + ::paddle::operators::detail::RPCClient::GetInstance< + ::paddle::operators::detail::GRPCClient>() + ->SendComplete(); +} +#endif + void InitializeVariable(Variable* var, proto::VarType::Type var_type) { if (var_type == proto::VarType::LOD_TENSOR) { var->GetMutable(); diff --git a/paddle/fluid/framework/executor.h b/paddle/fluid/framework/executor.h index e6f9c3d31c18f762ef2de269977e0642a79fb174..67a0761dac2a9adcdd0ce2b218c4aa505d688d56 100644 --- a/paddle/fluid/framework/executor.h +++ b/paddle/fluid/framework/executor.h @@ -44,6 +44,13 @@ class Executor { explicit Executor(const platform::Place& place); +#ifdef PADDLE_WITH_DISTRIBUTE + /* + * Sending signal to pserver to mark current trainer stop. + */ + void Complete(); +#endif + /* @Brief * Runtime evaluation of the given ProgramDesc under certain Scope * diff --git a/paddle/fluid/framework/reader.h b/paddle/fluid/framework/reader.h index 3a413941df964c8d9454fafc6030c377c10f9fb1..64d4ceab624312ed366d7e835072899f1f033a88 100644 --- a/paddle/fluid/framework/reader.h +++ b/paddle/fluid/framework/reader.h @@ -35,14 +35,15 @@ class ReaderBase { class DecoratedReader : public ReaderBase { public: - explicit DecoratedReader(ReaderBase* reader) : ReaderBase(), reader_(reader) { + explicit DecoratedReader(const std::shared_ptr& reader) + : ReaderBase(), reader_(reader) { PADDLE_ENFORCE_NOT_NULL(reader_); } void ReInit() override { reader_->ReInit(); } protected: - ReaderBase* reader_; + std::shared_ptr reader_; }; class FileReader : public ReaderBase { @@ -64,7 +65,7 @@ class ReaderHolder { public: void Reset(ReaderBase* reader) { reader_.reset(reader); } - ReaderBase* Get() const { return reader_.get(); } + std::shared_ptr Get() const { return reader_; } void ReadNext(std::vector* out) { PADDLE_ENFORCE_NOT_NULL(reader_); @@ -76,7 +77,7 @@ class ReaderHolder { } private: - std::unique_ptr reader_; + std::shared_ptr reader_; }; } // namespace framework diff --git a/paddle/fluid/operators/detail/grpc_client.cc b/paddle/fluid/operators/detail/grpc_client.cc index 6b8373b1509c898e6ae70a18833df39a4898714a..02ffe3651e1deefcf6981c3d304d64b9a01661bf 100644 --- a/paddle/fluid/operators/detail/grpc_client.cc +++ b/paddle/fluid/operators/detail/grpc_client.cc @@ -34,6 +34,12 @@ void GRPCClient::InitEventLoop() { client_thread_.reset(new std::thread(std::bind(&GRPCClient::Proceed, this))); } +void GRPCClient::SendComplete() { + for (auto& it : channels_) { + this->AsyncSendComplete(it.first); + } +} + GRPCClient::~GRPCClient() { Wait(); cq_.Shutdown(); @@ -210,6 +216,19 @@ void GRPCClient::AsyncSendFetchBarrier(const std::string& ep, req_count_++; } +void GRPCClient::AsyncSendComplete(const std::string& ep, int64_t time_out) { + const auto ch = GetChannel(ep); + + BatchBarrierProcessor* s = new BatchBarrierProcessor(ch); + s->Prepare(time_out); + + sendrecv::VariableMessage req; + req.set_varname(COMPLETE_MESSAGE); + auto rpc = s->stub_->AsyncSendVariable(s->context_.get(), req, &cq_); + rpc->Finish(&s->reply_, &s->status_, reinterpret_cast(s)); + req_count_++; +} + void GRPCClient::Wait() { std::unique_lock lk(sync_mutex_); sync_cond_.wait(lk, [this] { return req_count_ == 0; }); diff --git a/paddle/fluid/operators/detail/grpc_client.h b/paddle/fluid/operators/detail/grpc_client.h index 8db73f875e3e2048386e91f6b5efb29b4ee7e193..44000c028b499d9ad1a0e0dd40a5e287cd61d143 100644 --- a/paddle/fluid/operators/detail/grpc_client.h +++ b/paddle/fluid/operators/detail/grpc_client.h @@ -195,6 +195,8 @@ class GRPCClient : public RPCClient { void Wait() override; + void SendComplete() override; + protected: void InitImpl() override; @@ -204,6 +206,9 @@ class GRPCClient : public RPCClient { void Proceed(); + void AsyncSendComplete(const std::string& ep, + int64_t time_out = RPCClient::rpc_time_out); + std::shared_ptr GetChannel(const std::string& ep); private: diff --git a/paddle/fluid/operators/detail/grpc_server.cc b/paddle/fluid/operators/detail/grpc_server.cc index 18651544a1c0207127be335c37fe85c6e24dc16e..2d34f85838c34f1dfe43d2130e127d0258072fa7 100644 --- a/paddle/fluid/operators/detail/grpc_server.cc +++ b/paddle/fluid/operators/detail/grpc_server.cc @@ -162,16 +162,18 @@ class RequestPrefetch final : public RequestBase { void Process() override { // prefetch process... - std::string varname = request_->OutVarname(); - VLOG(3) << "RequestPrefetch " << varname; + std::string in_var_name = request_->Varname(); + std::string out_var_name = request_->OutVarname(); + VLOG(3) << "RequestPrefetch, in_var_name: " << in_var_name + << " out_var_name: " << out_var_name; auto scope = request_->GetMutableLocalScope(); - auto invar = scope->FindVar(varname); - framework::Variable* outvar = nullptr; + auto invar = scope->FindVar(in_var_name); + framework::Variable* outvar = scope->FindVar(out_var_name); - request_handler_->Handle(varname, scope, invar, &outvar); + request_handler_->Handle(in_var_name, scope, invar, &outvar, out_var_name); - SerializeToByteBuffer(varname, outvar, *request_handler_->dev_ctx(), + SerializeToByteBuffer(out_var_name, outvar, *request_handler_->dev_ctx(), &reply_); Finish(reply_, &responder_); } @@ -287,7 +289,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name, } else if (rpc_name == kRequestPrefetch) { b = new RequestPrefetch(&service_, cq.get(), handler, req_id); } else { - PADDLE_ENFORCE(false, "not surpported rpc"); + PADDLE_ENFORCE(false, "not supported rpc"); } reqs[req_id] = b; diff --git a/paddle/fluid/operators/detail/request_handler.h b/paddle/fluid/operators/detail/request_handler.h index fa979024e37f435b918568a1c5e603f8962f9172..a2d08747d59220d30a5b8fd56074fd2739ae3bab 100644 --- a/paddle/fluid/operators/detail/request_handler.h +++ b/paddle/fluid/operators/detail/request_handler.h @@ -40,6 +40,7 @@ constexpr char kRequestPrefetch[] = "RequestPrefetch"; #define LISTEN_TERMINATE_MESSAGE "TERMINATE@RECV" #define BATCH_BARRIER_MESSAGE "BATCH_BARRIER@RECV" #define FETCH_BARRIER_MESSAGE "FETCH_BARRIER@RECV" +#define COMPLETE_MESSAGE "COMPLETE@RECV" class RPCServer; @@ -60,9 +61,12 @@ class RequestHandler { void SetDevCtx(const platform::DeviceContext* dev_ctx) { dev_ctx_ = dev_ctx; } void SetProgram(framework::ProgramDesc* program) { program_ = program; } void SetExecutor(framework::Executor* executor) { executor_ = executor; } + + // Used for dist lookup table prefetch void SetPrefetchPreparedCtx( - std::unique_ptr prepared) { - prefetch_ctx_.reset(prepared.release()); + std::unordered_map< + std::string, std::shared_ptr>* g) { + prefetch_var_name_to_prepared_ctx_ = g; } // Used for async. @@ -78,9 +82,6 @@ class RequestHandler { bool sync_mode() { return sync_mode_; } framework::Scope* scope() { return scope_; } const platform::DeviceContext* dev_ctx() { return dev_ctx_; } - framework::ExecutorPrepareContext* prefetch_ctx() { - return prefetch_ctx_.get(); - } framework::ProgramDesc* program() { return program_; } framework::Executor* executor() { return executor_; } @@ -99,8 +100,8 @@ class RequestHandler { // *request_handler_->dev_ctx(), &reply_); // } virtual bool Handle(const std::string& varname, framework::Scope* scope, - framework::Variable* var, - framework::Variable** outvar) = 0; + framework::Variable* var, framework::Variable** outvar, + const std::string& out_var_name = "") = 0; protected: const bool sync_mode_; @@ -109,12 +110,17 @@ class RequestHandler { framework::Executor* executor_; framework::Scope* scope_; framework::ProgramDesc* program_; - std::unique_ptr prefetch_ctx_; + + // used for distribute lookup table prefetch + std::unordered_map>* + prefetch_var_name_to_prepared_ctx_; // Used for async. std::unordered_map>* grad_to_prepared_ctx_; + RPCServer* rpc_server_; }; diff --git a/paddle/fluid/operators/detail/request_handler_impl.cc b/paddle/fluid/operators/detail/request_handler_impl.cc index 5f1a346e93b1a0239af77b86d10782d67c403e23..7425bee798cd9ba0af8cd777a6db63862c8a4031 100644 --- a/paddle/fluid/operators/detail/request_handler_impl.cc +++ b/paddle/fluid/operators/detail/request_handler_impl.cc @@ -30,7 +30,8 @@ namespace detail { bool RequestSendHandler::Handle(const std::string& varname, framework::Scope* scope, framework::Variable* invar, - framework::Variable** outvar) { + framework::Variable** outvar, + const std::string& out_var_name) { VLOG(4) << "RequestSendHandler:" << varname; // Async @@ -49,6 +50,9 @@ bool RequestSendHandler::Handle(const std::string& varname, if (varname == BATCH_BARRIER_MESSAGE) { VLOG(3) << "sync: recv batch barrier message"; rpc_server_->IncreaseBatchBarrier(kRequestSend); + } else if (varname == COMPLETE_MESSAGE) { + VLOG(3) << "sync: recv complete message"; + rpc_server_->DecreaseClientNum(); } else { VLOG(3) << "sync: received var_name: " << varname; if (sync_mode_) { @@ -79,7 +83,8 @@ void RequestSendHandler::ResetSparseVarRecorder() { bool RequestGetHandler::Handle(const std::string& varname, framework::Scope* scope, framework::Variable* invar, - framework::Variable** outvar) { + framework::Variable** outvar, + const std::string& out_var_name) { VLOG(4) << "RequestGetHandler:" << varname; if (varname != FETCH_BARRIER_MESSAGE) { @@ -102,13 +107,14 @@ bool RequestGetHandler::Handle(const std::string& varname, bool RequestPrefetchHandler::Handle(const std::string& varname, framework::Scope* scope, framework::Variable* invar, - framework::Variable** outvar) { + framework::Variable** outvar, + const std::string& out_var_name) { VLOG(4) << "RequestPrefetchHandler " << varname; - auto var_desc = program_->Block(0).FindVar(varname); - *outvar = scope->FindVar(varname); + auto var_desc = program_->Block(0).FindVar(out_var_name); InitializeVariable(*outvar, var_desc->GetType()); - executor_->RunPreparedContext(prefetch_ctx_.get(), scope); + executor_->RunPreparedContext( + (*prefetch_var_name_to_prepared_ctx_)[varname].get(), scope); return true; } diff --git a/paddle/fluid/operators/detail/request_handler_impl.h b/paddle/fluid/operators/detail/request_handler_impl.h index c392267cfaeb7e94e6a23d6445c09b756e7e58b1..3f77c09a9598b431d747f1b824615e49d939098e 100644 --- a/paddle/fluid/operators/detail/request_handler_impl.h +++ b/paddle/fluid/operators/detail/request_handler_impl.h @@ -39,7 +39,8 @@ class RequestSendHandler final : public RequestHandler { explicit RequestSendHandler(bool sync_mode) : RequestHandler(sync_mode) {} virtual ~RequestSendHandler() {} bool Handle(const std::string& varname, framework::Scope* scope, - framework::Variable* var, framework::Variable** outvar) override; + framework::Variable* var, framework::Variable** outvar, + const std::string& out_var_name = "") override; void ResetSparseVarRecorder(); private: @@ -52,7 +53,8 @@ class RequestGetHandler final : public RequestHandler { explicit RequestGetHandler(bool sync_mode) : RequestHandler(sync_mode) {} virtual ~RequestGetHandler() {} bool Handle(const std::string& varname, framework::Scope* scope, - framework::Variable* var, framework::Variable** outvar) override; + framework::Variable* var, framework::Variable** outvar, + const std::string& out_var_name = "") override; }; class RequestPrefetchHandler final : public RequestHandler { @@ -60,7 +62,8 @@ class RequestPrefetchHandler final : public RequestHandler { explicit RequestPrefetchHandler(bool sync_mode) : RequestHandler(sync_mode) {} virtual ~RequestPrefetchHandler() {} bool Handle(const std::string& varname, framework::Scope* scope, - framework::Variable* var, framework::Variable** outvar) override; + framework::Variable* var, framework::Variable** outvar, + const std::string& out_var_name = "") override; }; } // namespace detail diff --git a/paddle/fluid/operators/detail/rpc_client.h b/paddle/fluid/operators/detail/rpc_client.h index 7e76ac0348574d4090793b191be0ff3ff8666b37..47c6ffb4fd7a002fc0bd8053fb3314a2fbf18fd3 100644 --- a/paddle/fluid/operators/detail/rpc_client.h +++ b/paddle/fluid/operators/detail/rpc_client.h @@ -53,6 +53,11 @@ class RPCClient { virtual void AsyncSendFetchBarrier(const std::string& ep, int64_t time_out = rpc_time_out) = 0; + // SendComplete tells all the server that current trainer have no more data + // to train, so that the pserver can reduce it's barrier count, and continue + // to train with other trainers. + virtual void SendComplete() = 0; + virtual void Wait() = 0; static constexpr int64_t rpc_time_out = 120 * 1000; diff --git a/paddle/fluid/operators/detail/rpc_server.cc b/paddle/fluid/operators/detail/rpc_server.cc index 448763372a8c224cc68319a4a444915896b68234..cd0fe96e2301ee3304fe9a2967df58b9f7072d8d 100644 --- a/paddle/fluid/operators/detail/rpc_server.cc +++ b/paddle/fluid/operators/detail/rpc_server.cc @@ -43,7 +43,7 @@ void RPCServer::SavePort() const { void RPCServer::WaitBarrier(const std::string& rpc_name) { std::unique_lock lock(this->mutex_); - barrier_cond_.wait(lock, [=] { + barrier_cond_.wait(lock, [this, &rpc_name] { return (barrier_counter_[rpc_name] >= client_num_ || exit_flag_.load()); }); @@ -53,19 +53,23 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) { void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) { VLOG(3) << "RPCServer begin IncreaseBatchBarrier " << rpc_name; int b = 0; - { - std::unique_lock lock(mutex_); - b = ++barrier_counter_[rpc_name]; - } - - VLOG(3) << "RPCServer IncreaseBatchBarrier " << rpc_name - << ", barrier_count:" << b << ", fan_in" << client_num_; - + std::unique_lock lock(mutex_); + b = ++barrier_counter_[rpc_name]; if (b >= client_num_) { + lock.unlock(); barrier_cond_.notify_all(); + lock.lock(); } } +void RPCServer::DecreaseClientNum() { + { + std::unique_lock lock(mutex_); + client_num_--; + } + barrier_cond_.notify_all(); +} + void RPCServer::ResetBarrierCounter() { VLOG(3) << "RPCServer ResetBarrierCounter "; std::unique_lock lock(mutex_); diff --git a/paddle/fluid/operators/detail/rpc_server.h b/paddle/fluid/operators/detail/rpc_server.h index f809c13c726ac2f1c60e8cf84848c4138f631b44..2e3342428cb56c34abaca655d5906668cda8f140 100644 --- a/paddle/fluid/operators/detail/rpc_server.h +++ b/paddle/fluid/operators/detail/rpc_server.h @@ -60,7 +60,7 @@ class RPCServer { void SetCond(const std::string& rpc_name); void WaitCond(const std::string& rpc_name); void IncreaseBatchBarrier(const std::string rpc_name); - + void DecreaseClientNum(); void ResetBarrierCounter(); protected: @@ -79,8 +79,7 @@ class RPCServer { std::string bind_address_; std::atomic exit_flag_; int selected_port_; - - const int client_num_; + int client_num_; std::unordered_map rpc_call_map_; std::unordered_map rpc_thread_num_; diff --git a/paddle/fluid/operators/detail/rpc_server_test.cc b/paddle/fluid/operators/detail/rpc_server_test.cc index f49274a7b53d04f82718887aabaffd5d33053dfe..463a7b80cfac280de5afe91ee85caaaf074cef32 100644 --- a/paddle/fluid/operators/detail/rpc_server_test.cc +++ b/paddle/fluid/operators/detail/rpc_server_test.cc @@ -98,11 +98,17 @@ void StartServer() { framework::Executor exe(place); platform::CPUDeviceContext ctx(place); auto* block = AppendPrefetchBlcok(&program); - auto prepared = exe.Prepare(program, block->ID()); + std::string in_var_name("ids"); + std::vector prefetch_block_ids{block->ID()}; + auto prepared = exe.Prepare(program, prefetch_block_ids); InitTensorsOnServer(&scope, &place, 10); + std::unordered_map> + prefetch_var_name_to_prepared; + prefetch_var_name_to_prepared[in_var_name] = prepared[0]; g_req_handler->SetProgram(&program); - g_req_handler->SetPrefetchPreparedCtx(std::move(prepared)); + g_req_handler->SetPrefetchPreparedCtx(&prefetch_var_name_to_prepared); g_req_handler->SetDevCtx(&ctx); g_req_handler->SetScope(&scope); g_req_handler->SetExecutor(&exe); diff --git a/paddle/fluid/operators/elementwise_op.h b/paddle/fluid/operators/elementwise_op.h index 0803a6035d342fefdae69297461fc78abbf18414..12364fff96c03c5f9dff23c7c00ceedd043803a6 100644 --- a/paddle/fluid/operators/elementwise_op.h +++ b/paddle/fluid/operators/elementwise_op.h @@ -66,40 +66,41 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault(-1) .EqualGreaterThan(-1); AddComment(string::Sprintf(R"DOC( -Limited Elementwise %s Operator. +Limited Elementwise %s Operator The equation is: $$%s$$ -$X$ is a tensor of any dimension and the dimensions of tensor $Y$ must be -smaller than or equal to the dimensions of $X$. +- $X$: a tensor of any dimension. +- $Y$: a tensor whose dimensions must be less than or equal to the dimensions of $X$. There are two cases for this operator: -1. The shape of $Y$ is same with $X$; -2. The shape of $Y$ is a congiguous subsequencet of $X$. The trailing dimensions - of size 1 for $Y$ will be ignored for the consideration of subsequence. +1. The shape of $Y$ is the same with $X$. +2. The shape of $Y$ is a continuous subsequence of $X$. For case 2: -$Y$ will be broadcasted to match the shape of $X$ and axis should be -set to index of the start dimension to broadcast $Y$ onto $X$. +1. Broadcast $Y$ to match the shape of $X$, where $axis$ is the start dimension index + for broadcasting $Y$ onto $X$. +2. If $axis$ is -1 (default), $axis = rank(X) - rank(Y)$. +3. The trailing dimensions of size 1 for $Y$ will be ignored for the consideration of + subsequence, such as shape(Y) = (2, 1) => (2). -If axis is -1, it is treated as axis=rank(X)-rank(Y). +For example: -For example .. code-block:: python shape(X) = (2, 3, 4, 5), shape(Y) = (,) shape(X) = (2, 3, 4, 5), shape(Y) = (5,) - shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5) + shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5), with axis=-1(default) or axis=2 shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1 shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0 shape(X) = (2, 3, 4, 5), shape(Y) = (2, 1), with axis=0 -Either of the inputs $X$ and $Y$ or none can carry the LoD (Level of Details) -information. However, the output only shares the LoD information with input $X$. +The inputs $X$ and $Y$ can carry the different LoD information. +But the output only shares the LoD information with the input $X$. )DOC", GetName(), GetEquation())); diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index 76106004404bb0bb108cca003869f07c43bbf62f..4d12278799f66f2fb92b7580ba0c43e845aa4d3a 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -96,19 +96,22 @@ static int64_t GetTimestamp() { return tp.tv_sec * 1000 + tp.tv_usec / 1000; } -void ListenAndServOp::RunSyncLoop(framework::Executor *executor, - framework::ProgramDesc *program, - framework::Scope *recv_scope, - framework::BlockDesc *prefetch_block) const { +void ListenAndServOp::RunSyncLoop( + framework::Executor *executor, framework::ProgramDesc *program, + framework::Scope *recv_scope, + const std::vector &prefetch_block_id_list) const { size_t num_blocks = program->Size(); PADDLE_ENFORCE_GE(num_blocks, 2, "server program should have at least 2 blocks"); - std::vector block_list; - for (size_t blkid = 1; blkid < num_blocks; ++blkid) { - block_list.push_back(blkid); + std::vector optimize_block_id_list; + for (int blkid = 1; blkid < num_blocks; ++blkid) { + if (std::find(prefetch_block_id_list.begin(), prefetch_block_id_list.end(), + blkid) == prefetch_block_id_list.end()) { + optimize_block_id_list.push_back(blkid); + } } - auto optimize_prepared = executor->Prepare(*program, block_list); + auto optimize_prepared = executor->Prepare(*program, optimize_block_id_list); // Insert placeholder for block0 which holds current op itself. optimize_prepared.insert( optimize_prepared.begin(), @@ -135,16 +138,17 @@ void ListenAndServOp::RunSyncLoop(framework::Executor *executor, std::vector parallel_blkids; parallel_blkids.push_back(1); double ts = GetTimestamp(); - for (size_t blkid = 2; blkid < num_blocks; ++blkid) { - if (blkid != static_cast(prefetch_block->ID())) { - if (program->Block(blkid).Parent() != last_parent_blkid) { - ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, - program, recv_scope); - parallel_blkids.clear(); - last_parent_blkid = program->Block(blkid).Parent(); - } - parallel_blkids.push_back(blkid); + for (size_t i = 1; i < optimize_block_id_list.size(); ++i) { + // skip the first optimize block because it is already in the + // parallel_blkids. + int blkid = optimize_block_id_list[i]; + if (program->Block(blkid).Parent() != last_parent_blkid) { + ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, + program, recv_scope); + parallel_blkids.clear(); + last_parent_blkid = program->Block(blkid).Parent(); } + parallel_blkids.push_back(blkid); } ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, program, recv_scope); @@ -210,18 +214,19 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, } // while(true) } -static void FillRequestCtx(detail::RequestHandler *h, framework::Scope *scope, - platform::DeviceContext *dev_ctx, - framework::Executor *executor, - framework::ProgramDesc *program, - framework::ExecutorPrepareContext *prefetch_ctx, - detail::RPCServer *rpc_server) { +static void FillRequestCtx( + detail::RequestHandler *h, framework::Scope *scope, + platform::DeviceContext *dev_ctx, framework::Executor *executor, + framework::ProgramDesc *program, + std::unordered_map> + *prefetch_ctx, + detail::RPCServer *rpc_server) { h->SetScope(scope); h->SetDevCtx(dev_ctx); h->SetExecutor(executor); h->SetProgram(program); - h->SetPrefetchPreparedCtx( - std::unique_ptr(prefetch_ctx)); + h->SetPrefetchPreparedCtx(prefetch_ctx); h->SetRPCServer(rpc_server); } @@ -255,17 +260,42 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, request_prefetch_handler_.get()); auto *optimize_block = Attr(kOptimizeBlock); - auto *prefetch_block = Attr(kPrefetchBlock); auto *program = optimize_block->Program(); framework::Executor executor(dev_place); // prepare for prefetch - VLOG(3) << "prefetch block id is " << prefetch_block->ID(); - auto prefetch_prepared = executor.Prepare(*program, prefetch_block->ID()); + std::vector prefetch_block_id_list; + std::unordered_map block_id_to_prefetch_var_name; + + auto prefetch_var_name_to_block_id_str = + Attr>(kPrefetchVarNameToBlockId); + for (const auto &prefetch_var_name_and_id : + prefetch_var_name_to_block_id_str) { + std::vector pieces; + split(prefetch_var_name_and_id, ':', &pieces); + VLOG(3) << "after split, prefetch_var = " << pieces[0] + << ", id=" << pieces[1]; + PADDLE_ENFORCE_EQ(pieces.size(), 2); + + int block_id = std::stoi(pieces[1]); + prefetch_block_id_list.push_back(block_id); + block_id_to_prefetch_var_name[block_id] = pieces[0]; + } + + auto prefetch_prepared = executor.Prepare(*program, prefetch_block_id_list); + + std::unordered_map> + prefetch_var_name_to_prepared_ctx; + for (size_t i = 0; i < prefetch_block_id_list.size(); ++i) { + auto block_id = prefetch_block_id_list[i]; + auto prefetch_var_name = block_id_to_prefetch_var_name[block_id]; + prefetch_var_name_to_prepared_ctx[prefetch_var_name] = prefetch_prepared[i]; + } auto f = std::bind(FillRequestCtx, std::placeholders::_1, &recv_scope, - &dev_ctx, &executor, program, prefetch_prepared.release(), - rpc_service_.get()); + &dev_ctx, &executor, program, + &prefetch_var_name_to_prepared_ctx, rpc_service_.get()); f(request_send_handler_.get()); f(request_get_handler_.get()); @@ -283,7 +313,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, // Write to a file of server selected port for python use. SavePort(); if (sync_mode) { - RunSyncLoop(&executor, program, &recv_scope, prefetch_block); + RunSyncLoop(&executor, program, &recv_scope, prefetch_block_id_list); } else { RunAsyncLoop(&executor, program); } @@ -309,8 +339,9 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("sync_mode", "if works at sync_mode or not").SetDefault(true); AddAttr(kOptimizeBlock, "BlockID to run on server side."); - AddAttr(kPrefetchBlock, - "prefetch block to run on server side."); + AddAttr>(kPrefetchVarNameToBlockId, + "prefetch blocks to run on server side.") + .SetDefault({}); AddAttr("Fanin", "How many clients send to this server.") .SetDefault(1); } diff --git a/paddle/fluid/operators/listen_and_serv_op.h b/paddle/fluid/operators/listen_and_serv_op.h index 87952cb0e683596b2b0395890b6e25b15f74d7e2..46c3a19e20b3f2dd970a672bb99f98e83d3e25bf 100644 --- a/paddle/fluid/operators/listen_and_serv_op.h +++ b/paddle/fluid/operators/listen_and_serv_op.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include +#include #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/lod_tensor.h" @@ -30,7 +31,7 @@ namespace paddle { namespace operators { constexpr char kOptimizeBlock[] = "OptimizeBlock"; -constexpr char kPrefetchBlock[] = "PrefetchBlock"; +constexpr char kPrefetchVarNameToBlockId[] = "prefetch_var_name_to_block_id"; void RunServer(std::shared_ptr service); @@ -46,7 +47,7 @@ class ListenAndServOp : public framework::OperatorBase { void RunSyncLoop(framework::Executor* executor, framework::ProgramDesc* program, framework::Scope* recv_scope, - framework::BlockDesc* prefetch_block) const; + const std::vector& prefetch_block_id_list) const; void RunAsyncLoop(framework::Executor* executor, framework::ProgramDesc* program) const; diff --git a/paddle/fluid/operators/reader/create_batch_reader_op.cc b/paddle/fluid/operators/reader/create_batch_reader_op.cc index 4cc7cbc6e89b0712faf9ad9c51480bce00da15f5..ecbae3894d551186f53625a6cc9cfdb36adc8d2d 100644 --- a/paddle/fluid/operators/reader/create_batch_reader_op.cc +++ b/paddle/fluid/operators/reader/create_batch_reader_op.cc @@ -20,7 +20,7 @@ namespace reader { class BatchReader : public framework::DecoratedReader { public: - BatchReader(ReaderBase* reader, int batch_size) + BatchReader(const std::shared_ptr& reader, int batch_size) : DecoratedReader(reader), batch_size_(batch_size) { buffer_.reserve(batch_size_); } diff --git a/paddle/fluid/operators/reader/create_custom_reader_op.cc b/paddle/fluid/operators/reader/create_custom_reader_op.cc index 331224a59899b4a7d517ca4f7141fb5b8f4f5168..0a02fcdeaa5a6de97d59ddce4f58ad945aa2572a 100644 --- a/paddle/fluid/operators/reader/create_custom_reader_op.cc +++ b/paddle/fluid/operators/reader/create_custom_reader_op.cc @@ -22,7 +22,8 @@ namespace reader { class CustomReader : public framework::DecoratedReader { public: - CustomReader(ReaderBase* reader, const framework::BlockDesc& sub_block, + CustomReader(const std::shared_ptr& reader, + const framework::BlockDesc& sub_block, const std::vector& source_var_names, const std::vector& sink_var_names) : DecoratedReader(reader), diff --git a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc index bc830a2b72e657f79f4c94e24428d38ff2b7c42e..5f35b9b3eac1d9aab8662833c6e39d12f11a0087 100644 --- a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc +++ b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc @@ -34,7 +34,8 @@ static constexpr size_t kChannelSize = 1; // kCacheSize - 2 class DoubleBufferReader : public framework::DecoratedReader { public: explicit DoubleBufferReader( - ReaderBase* reader, platform::Place target_place = platform::CPUPlace()) + const std::shared_ptr& reader, + platform::Place target_place = platform::CPUPlace()) : DecoratedReader(reader), place_(target_place) { cpu_tensor_cache_.resize(kCacheSize); gpu_tensor_cache_.resize(kCacheSize); diff --git a/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc b/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc index 249b0b7c6dbc8b8104bce95562e6e9b2a28c77f8..19b54110b9aeece33b8d6c73612ae0e12dbfafbd 100644 --- a/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc +++ b/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc @@ -21,7 +21,7 @@ namespace reader { class MultiPassReader : public framework::DecoratedReader { public: - MultiPassReader(ReaderBase* reader, int pass_num) + MultiPassReader(const std::shared_ptr& reader, int pass_num) : DecoratedReader(reader), pass_num_(pass_num), pass_count_(0) {} void ReadNext(std::vector* out) override { diff --git a/paddle/fluid/operators/reader/create_shuffle_reader_op.cc b/paddle/fluid/operators/reader/create_shuffle_reader_op.cc index fd233be945932eee9f9a3c0c578a43d5b7cc83aa..57e8e21214b7c99e52550fe51a67c9b5201cb46f 100644 --- a/paddle/fluid/operators/reader/create_shuffle_reader_op.cc +++ b/paddle/fluid/operators/reader/create_shuffle_reader_op.cc @@ -23,7 +23,8 @@ namespace reader { class ShuffleReader : public framework::DecoratedReader { public: - ShuffleReader(ReaderBase* reader, size_t buffer_size, size_t seed = 0) + ShuffleReader(const std::shared_ptr& reader, size_t buffer_size, + size_t seed = 0) : DecoratedReader(reader), buffer_size_(buffer_size), seed_(seed) { VLOG(10) << "Create shuffle reader of " << reader_; if (seed_ == 0) { diff --git a/paddle/fluid/operators/reader/create_threaded_reader_op.cc b/paddle/fluid/operators/reader/create_threaded_reader_op.cc index 1db70f3e9699dba604569c36dc35025dfe2c94fe..3798015146f4ffb085aa82e23ca3f1fb3c5cf5a4 100644 --- a/paddle/fluid/operators/reader/create_threaded_reader_op.cc +++ b/paddle/fluid/operators/reader/create_threaded_reader_op.cc @@ -21,7 +21,8 @@ namespace reader { class ThreadedReader : public framework::DecoratedReader { public: - explicit ThreadedReader(ReaderBase* reader) : DecoratedReader(reader) {} + explicit ThreadedReader(const std::shared_ptr& reader) + : DecoratedReader(reader) {} void ReadNext(std::vector* out) override { std::lock_guard lock(mutex_); diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index 4fc9aae8e36e9b43d65fab0b92ec3a2549057128..c202eed354c5f1a91e93e1c3919d1bfebc1bc401 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -21,12 +21,17 @@ limitations under the License. */ #include #endif +#include #include "gflags/gflags.h" DEFINE_double(fraction_of_cpu_memory_to_use, 1, "Default use 100% of CPU memory for PaddlePaddle," "reserve the rest for page tables, etc"); +DEFINE_uint64( + initial_cpu_memory_in_mb, 500, + "Default initial 500MB of CPU memory for PaddlePaddle, in MD unit."); + DEFINE_double( fraction_of_cuda_pinned_memory_to_use, 0.5, "Default use 50% of CPU memory as the pinned_memory for PaddlePaddle," @@ -54,7 +59,9 @@ inline size_t CpuTotalPhysicalMemory() { size_t CpuMaxAllocSize() { // For distributed systems, it requires configuring and limiting // the fraction of memory to use. - return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory(); + return std::min(static_cast(FLAGS_fraction_of_cpu_memory_to_use * + CpuTotalPhysicalMemory()), + FLAGS_initial_cpu_memory_in_mb * 1 << 20); } size_t CpuMinChunkSize() { diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index c88fbef63cf26c671246b15ea9872da0e7a92c1a..bd5c613f8cf794df5dfeb7517ed4350f9b3b6099 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -413,6 +413,9 @@ All parameter, weight, gradient are variables in Paddle. py::class_(m, "Executor") .def(py::init()) +#ifdef PADDLE_WITH_DISTRIBUTE + .def("complete", &Executor::Complete) +#endif .def("run", (void (Executor::*)(const ProgramDesc &, Scope *, int, bool, bool)) & Executor::Run); diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 6719a4d7eca696a0ec7d742740dc7f3979e988ce..1a010ab3ac619d3377f6f7f2ae3d90dfa6042384 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -All layers just related to the neural network. +All layers just related to the neural network. """ from ..layer_helper import LayerHelper @@ -95,7 +95,6 @@ def fc(input, num_flatten_dims=1, param_attr=None, bias_attr=None, - use_cudnn=False, use_mkldnn=False, act=None, is_test=False, @@ -222,6 +221,7 @@ def embedding(input, have two elements which indicate the size of the dictionary of embeddings and the size of each embedding vector respectively. is_sparse(bool): The flag indicating whether to use sparse update. + is_distributed (bool): Whether to run lookup table from remote parameter server. padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup. Otherwise the given :attr:`padding_idx` indicates padding the output with zeros whenever lookup encounters it in :attr:`input`. If @@ -654,8 +654,9 @@ def dynamic_gru(input, :attr:`False`. gate_activation(str): The activation for update gate and reset gate. Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid". - activation(str): The activation for candidate hidden state. + candidate_activation(str): The activation for candidate hidden state. Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". + h_0 (Variable): The hidden output of the first time step. Returns: Variable: The hidden state of GRU. The shape is :math:`(T \\times D)`, \ @@ -873,6 +874,13 @@ def cos_sim(X, Y): """ This function performs the cosine similarity between two tensors X and Y and returns that as the output. + + Args: + X (Variable): The input X. + Y (Variable): The input Y. + + Returns: + Variable: the output of cosine(X, Y). """ helper = LayerHelper('cos_sim', **locals()) out = helper.create_tmp_variable(dtype=X.dtype) @@ -899,15 +907,15 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None): unchanged. Args: - x(variable): The input tensor. - dropout_prob(float): Probability of setting units to zero. - is_test(bool): A flag indicating whether it is in test phrase or not. - seed(int): A Python integer used to create random seeds. If this - parameter is set to None, a random seed is used. - NOTE: If an integer seed is given, always the same output - units will be dropped. DO NOT use a fixed seed in training. - name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + x (Variable): The input tensor. + dropout_prob (float): Probability of setting units to zero. + is_test (bool): A flag indicating whether it is in test phrase or not. + seed (int): A Python integer used to create random seeds. If this + parameter is set to None, a random seed is used. + NOTE: If an integer seed is given, always the same output + units will be dropped. DO NOT use a fixed seed in training. + name (str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: Variable: A tensor variable. @@ -1029,8 +1037,8 @@ def square_error_cost(input, label): * :math:`Out`: Output value, same shape with :math:`X`. Args: - input(Variable): Input tensor, has predictions. - label(Variable): Label tensor, has target labels. + input (Variable): Input tensor, has predictions. + label (Variable): Label tensor, has target labels. Returns: Variable: The tensor variable storing the element-wise squared error \ @@ -1059,6 +1067,7 @@ def square_error_cost(input, label): return square_out +@templatedoc() def chunk_eval(input, label, chunk_scheme, @@ -1067,6 +1076,18 @@ def chunk_eval(input, """ This function computes and outputs the precision, recall and F1-score of chunk detection. + + Args: + input (Variable): prediction output of the network. + label (Variable): label of the test data set. + chunk_scheme (str): ${chunk_scheme_comment} + num_chunk_types (int): ${num_chunk_types_comment} + excluded_chunk_types (list): ${excluded_chunk_types_comment} + + Returns: + tuple: tuple containing: (precision, recall, f1_score, + num_infer_chunks, num_label_chunks, + num_correct_chunks) """ helper = LayerHelper("chunk_eval", **locals()) @@ -1099,6 +1120,7 @@ def chunk_eval(input, num_correct_chunks) +@templatedoc() def sequence_conv(input, num_filters, filter_size=3, @@ -1111,6 +1133,19 @@ def sequence_conv(input, This function creates the op for sequence_conv, using the inputs and other convolutional configurations for the filters and stride as given in the input parameters to the function. + + Args: + input (Variable): ${x_comment} + num_filters (int): number of filters. + filter_size (int): the filter size (H and W). + filter_stride (int): stride of the filter. + padding (bool): if True, add paddings. + bias_attr (ParamAttr|None): attributes for bias + param_attr (ParamAttr|None): attributes for parameter + act (str): the activation type + + Returns: + Variable: output of sequence_conv """ # FIXME(dzh) : want to unify the argument of python layer @@ -1225,33 +1260,34 @@ def conv2d(input, W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1 Args: - input(Variable): The input image with [N, C, H, W] format. - num_filters(int): The number of filter. It is as same as the output - image channel. - filter_size(int|tuple|None): The filter size. If filter_size is a tuple, - it must contain two integers, (filter_size_H, filter_size_W). - Otherwise, the filter will be a square. - stride(int|tuple): The stride size. If stride is a tuple, it must - contain two integers, (stride_H, stride_W). Otherwise, the - stride_H = stride_W = stride. Default: stride = 1. - padding(int|tuple): The padding size. If padding is a tuple, it must - contain two integers, (padding_H, padding_W). Otherwise, the - padding_H = padding_W = padding. Default: padding = 0. - dilation(int|tuple): The dilation size. If dilation is a tuple, it must - contain two integers, (dilation_H, dilation_W). Otherwise, the - dilation_H = dilation_W = dilation. Default: dilation = 1. - groups(int): The groups number of the Conv2d Layer. According to grouped - convolution in Alex Krizhevsky's Deep CNN paper: when group=2, - the first half of the filters is only connected to the first half - of the input channels, while the second half of the filters is only - connected to the second half of the input channels. Default: groups=1 - param_attr(ParamAttr): The parameters to the Conv2d Layer. Default: None - bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None - use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True - act(str): Activation type. Default: None - name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + input (Variable): The input image with [N, C, H, W] format. + num_filters(int): The number of filter. It is as same as the output + image channel. + filter_size (int|tuple|None): The filter size. If filter_size is a tuple, + it must contain two integers, (filter_size_H, filter_size_W). + Otherwise, the filter will be a square. + stride (int|tuple): The stride size. If stride is a tuple, it must + contain two integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. Default: stride = 1. + padding (int|tuple): The padding size. If padding is a tuple, it must + contain two integers, (padding_H, padding_W). Otherwise, the + padding_H = padding_W = padding. Default: padding = 0. + dilation (int|tuple): The dilation size. If dilation is a tuple, it must + contain two integers, (dilation_H, dilation_W). Otherwise, the + dilation_H = dilation_W = dilation. Default: dilation = 1. + groups (int): The groups number of the Conv2d Layer. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. Default: groups=1 + param_attr (ParamAttr): The parameters to the Conv2d Layer. Default: None + bias_attr (ParamAttr): Bias parameter for the Conv2d layer. Default: None + use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + use_mkldnn (bool): Use mkldnn kernels or not. + act (str): Activation type. Default: None + name (str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: Variable: The tensor variable storing the convolution and \ @@ -1409,7 +1445,7 @@ def sequence_pool(input, pool_type): def sequence_first_step(input): """ - This funciton get the first step of sequence. + This function gets the first step of sequence. .. code-block:: text @@ -1442,7 +1478,7 @@ def sequence_first_step(input): def sequence_last_step(input): """ - This funciton get the last step of sequence. + This function gets the last step of sequence. .. code-block:: text @@ -1486,6 +1522,22 @@ def pool2d(input, """ This function adds the operator for pooling in 2 dimensions, using the pooling configurations mentioned in input parameters. + + Args: + input (Variable): ${input_comment} + pool_size (int): ${ksize_comment} + pool_type (str): ${pooling_type_comment} + pool_stride (int): stride of the pooling layer. + pool_padding (int): padding size. + global_pooling (bool): ${global_pooling_comment} + use_cudnn (bool): ${use_cudnn_comment} + ceil_mode (bool): ${ceil_mode_comment} + use_mkldnn (bool): ${use_mkldnn_comment} + name (str): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + Variable: output of pool2d layer. """ if pool_type not in ["max", "avg"]: raise ValueError( @@ -1589,7 +1641,6 @@ def batch_norm(input, hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w') hidden2 = fluid.layers.batch_norm(input=hidden1) - """ helper = LayerHelper('batch_norm', **locals()) dtype = helper.input_dtype() @@ -1717,6 +1768,7 @@ def layer_norm(input, bias_attr(ParamAttr|None): The parameter attribute for the learnable bias :math:`b`. act(str): Activation to be applied to the output of layer normalizaiton. + name (str): The name of this layer. It is optional. Returns: Variable: A tensor variable with the same shape as the input. @@ -1768,6 +1820,17 @@ def layer_norm(input, def beam_search_decode(ids, scores, name=None): + """ + ${beam_search_decode} + + Args: + ids (Variable): ${ids_comment} + scores (Variable): ${scores_comment} + name (str): The name of this layer. It is optional. + + Returns: + tuple: a tuple of two output variable: sentence_ids, sentence_scores + """ helper = LayerHelper('beam_search_decode', **locals()) sentence_ids = helper.create_tmp_variable(dtype=ids.dtype) sentence_scores = helper.create_tmp_variable(dtype=ids.dtype) @@ -1843,46 +1906,46 @@ def conv2d_transpose(input, W_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 Args: - input(Variable): The input image with [N, C, H, W] format. - num_filters(int): The number of the filter. It is as same as the output - image channel. - output_size(int|tuple|None): The output image size. If output size is a - tuple, it must contain two integers, (image_H, image_W). This - parameter only works when filter_size is None. - filter_size(int|tuple|None): The filter size. If filter_size is a tuple, - it must contain two integers, (filter_size_H, filter_size_W). - Otherwise, the filter will be a square. None if use output size to - calculate filter_size. - padding(int|tuple): The padding size. If padding is a tuple, it must - contain two integers, (padding_H, padding_W). Otherwise, the - padding_H = padding_W = padding. Default: padding = 0. - stride(int|tuple): The stride size. If stride is a tuple, it must - contain two integers, (stride_H, stride_W). Otherwise, the - stride_H = stride_W = stride. Default: stride = 1. - dilation(int|tuple): The dilation size. If dilation is a tuple, it must - contain two integers, (dilation_H, dilation_W). Otherwise, the - dilation_H = dilation_W = dilation. Default: dilation = 1. - groups(int): The groups number of the Conv2d transpose layer. Inspired by - grouped convolution in Alex Krizhevsky's Deep CNN paper, in which - when group=2, the first half of the filters is only connected to the - first half of the input channels, while the second half of the - filters is only connected to the second half of the input channels. - Default: groups=1 - param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer. - Default: None - bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None - use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True - act(str): Activation type. Default: None - name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + input(Variable): The input image with [N, C, H, W] format. + num_filters(int): The number of the filter. It is as same as the output + image channel. + output_size(int|tuple|None): The output image size. If output size is a + tuple, it must contain two integers, (image_H, image_W). This + parameter only works when filter_size is None. + filter_size(int|tuple|None): The filter size. If filter_size is a tuple, + it must contain two integers, (filter_size_H, filter_size_W). + Otherwise, the filter will be a square. None if use output size to + calculate filter_size. + padding(int|tuple): The padding size. If padding is a tuple, it must + contain two integers, (padding_H, padding_W). Otherwise, the + padding_H = padding_W = padding. Default: padding = 0. + stride(int|tuple): The stride size. If stride is a tuple, it must + contain two integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. Default: stride = 1. + dilation(int|tuple): The dilation size. If dilation is a tuple, it must + contain two integers, (dilation_H, dilation_W). Otherwise, the + dilation_H = dilation_W = dilation. Default: dilation = 1. + groups(int): The groups number of the Conv2d transpose layer. Inspired by + grouped convolution in Alex Krizhevsky's Deep CNN paper, in which + when group=2, the first half of the filters is only connected to the + first half of the input channels, while the second half of the + filters is only connected to the second half of the input channels. + Default: groups=1 + param_attr(ParamAttr): The parameters to the Conv2d_transpose Layer. + Default: None + bias_attr(ParamAttr): Bias parameter for the Conv2d layer. Default: None + use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + act(str): Activation type. Default: None + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: - Variable: The tensor variable storing the convolution transpose result. + Variable: The tensor variable storing the convolution transpose result. Raises: - ValueError: If the shapes of input, filter_size, stride, padding and - groups mismatch. + ValueError: If the shapes of input, filter_size, stride, padding and + groups mismatch. Examples: .. code-block:: python @@ -2019,6 +2082,17 @@ def sequence_expand(x, y, ref_level=-1, name=None): def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0): ''' This function implements the beam search algorithm. + + Args: + pre_ids (Variable): ${pre_ids_comment} + ids (Variable): ${ids_comment} + scores (Variable): ${scores_comment} + beam_size (int): ${beam_size_comment} + end_id (int): ${end_id_comment} + level (int): ${level_comment} + + Returns: + tuple: a tuple of beam_search output variables: selected_ids, selected_scores ''' helper = LayerHelper('beam_search', **locals()) score_type = scores.dtype @@ -2521,14 +2595,14 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): slice along dimension `axis`. Args: - x(Variable|list): The input tensor to l2_normalize layer. - axis(int): The axis on which to apply normalization. If `axis < 0`, - the dimension to normalization is rank(X) + axis. -1 is the - last dimension. - epsilon(float): The epsilon value is used to avoid division by zero, - the defalut value is 1e-10. - name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + x(Variable|list): The input tensor to l2_normalize layer. + axis(int): The axis on which to apply normalization. If `axis < 0`, + the dimension to normalization is rank(X) + axis. -1 is the + last dimension. + epsilon(float): The epsilon value is used to avoid division by zero, + the defalut value is 1e-10. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: @@ -2741,16 +2815,13 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None, the edit distance will be divided by the length of reference string. Args: - input(Variable): The indices for hypothesis strings. - label(Variable): The indices for reference strings. - normalized(bool): Indicated whether to normalize the edit distance by the length of reference string. - ignored_tokens(list of int): Tokens that should be removed before calculating edit distance. + name (str): The name of this layer. It is optional. Returns: Variable: sequence-to-sequence edit distance in shape [batch_size, 1]. @@ -2840,10 +2911,10 @@ def ctc_greedy_decoder(input, blank, name=None): where Lp is the sum of all input sequences' length and num_classes is the true number of classes. (not including the blank label). - blank(int): the blank label index of Connectionist Temporal Classification (CTC) loss, which is in thehalf-opened interval [0, num_classes + 1). + name (str): The name of this layer. It is optional. Returns: Variable: CTC greedy decode result. If all the sequences in result were @@ -2880,23 +2951,23 @@ def warpctc(input, label, blank=0, norm_by_times=False): input tensor. Args: - input(Variable): (LodTensor, default: LoDTensor), - the unscaled probabilities of variable-length sequences, - which is a 2-D Tensor with LoD information. - It's shape is [Lp, num_classes + 1], where Lp is the sum of all input - sequences' length and num_classes is the true number of classes. - (not including the blank label). - label(Variable): (LodTensor, default: LoDTensor), the ground truth - of variable-length sequence, which is a 2-D Tensor with LoD - information. It is of the shape [Lg, 1], where Lg is th sum of - all labels' length. - blank: (int, default: 0), the blank label index of Connectionist - Temporal Classification (CTC) loss, which is in the - half-opened interval [0, num_classes + 1). - norm_by_times: (bool, default: false), whether to normalize - the gradients by the number of time-step, which is also the - sequence's length. There is no need to normalize the gradients - if warpctc layer was follewed by a mean_op. + input(Variable): (LodTensor, default: LoDTensor), + the unscaled probabilities of variable-length sequences, + which is a 2-D Tensor with LoD information. + It's shape is [Lp, num_classes + 1], where Lp is the sum of all input + sequences' length and num_classes is the true number of classes. + (not including the blank label). + label(Variable): (LodTensor, default: LoDTensor), the ground truth + of variable-length sequence, which is a 2-D Tensor with LoD + information. It is of the shape [Lg, 1], where Lg is th sum of + all labels' length. + blank (int): default 0, the blank label index of Connectionist + Temporal Classification (CTC) loss, which is in the + half-opened interval [0, num_classes + 1). + norm_by_times (bool): default false, whether to normalize + the gradients by the number of time-step, which is also the + sequence's length. There is no need to normalize the gradients + if warpctc layer was follewed by a mean_op. Returns: Variable: The Connectionist Temporal Classification (CTC) loss, @@ -2955,9 +3026,9 @@ def sequence_reshape(input, new_dim): no remainder for each sequence. Args: - input (Variable): (LodTensor, default: LoDTensor), a 2-D LoDTensor - with shape being [N, M] where M for dimension. - new_dim (int): New dimension which the input LoDTensor is reshaped to. + input (Variable): (LodTensor, default: LoDTensor), a 2-D LoDTensor + with shape being [N, M] where M for dimension. + new_dim (int): New dimension which the input LoDTensor is reshaped to. Returns: Variable: Reshaped LoDTensor according to new dimension. @@ -2979,7 +3050,10 @@ def sequence_reshape(input, new_dim): return out -@autodoc() +# FIXME(wuyi): let docstring_checker.py understand @autodoc. +# For now, the comments in c++ use types like Tensor, but in python side +# the type is often "Variable", and arguments may vary. +@templatedoc(op_type="nce") def nce(input, label, num_total_classes, @@ -2987,6 +3061,21 @@ def nce(input, param_attr=None, bias_attr=None, num_neg_samples=None): + """ + ${comment} + + Args: + input (Variable): input variable. + label (Variable): label. + num_total_classes (int):${num_total_classes_comment} + sample_weight (int): ${sample_weight_comment} + param_attr (ParamAttr|None): attributes for parameter + bias_attr (ParamAttr|None): attributes for bias + num_neg_samples (int): ${num_neg_samples_comment} + + Returns: + Variable: output of nce layer. + """ helper = LayerHelper('nce', **locals()) assert isinstance(input, Variable) dim = input.shape[1] @@ -3044,8 +3133,9 @@ def transpose(x, perm, name=None): perm[i]-th dimension of `input`. Args: - input (Variable): (Tensor), A Tensor. - perm (list): A permutation of the dimensions of `input`. + x (Variable): The input Tensor. + perm (list): A permutation of the dimensions of `input`. + name (str): The name of this layer. It is optional. Returns: Variable: A transposed Tensor. @@ -3278,9 +3368,9 @@ def multiplex(inputs, index): row of the matrix, then `O[i]` is equal to :math:`I_{ID[i]}[i]`. Args: - inputs (list): A list of variables to gather from. All variables have the + inputs (list): A list of variables to gather from. All variables have the same shape and the rank is at least 2. - index (Variable): Tensor, index variable which is a 2-D tensor + index (Variable): Tensor, index variable which is a 2-D tensor with shape [M, 1] where M is the batch size. Returns: @@ -3479,7 +3569,8 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1): begin(int): The first value of this counter. step(int): The increment step between each execution. - Returns(Variable): The global run counter. + Returns: + Variable: The global run counter. """ helper = LayerHelper('global_step_counter') if counter_name is None: @@ -3540,7 +3631,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): the corresponding dimension of x. Args: - input(variable): The input tensor. + x(variable): The input tensor. shape(list): The new shape. At most one dimension of the new shape can be -1. actual_shape(variable): An optional input. If provided, reshape @@ -3552,8 +3643,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): inplace(bool): If this flag is set true, a new output tensor is created whose data is copied from input x, otherwise the output shares data with input without copying. + name (str): The name of this layer. It is optional. - Returns(variable): The output tensor. + Returns: + Variable: The output tensor. Examples: .. code-block:: python @@ -4074,7 +4167,6 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None): name(str|None): The output variable name. Returns: - ${out_comment}. """ @@ -4093,6 +4185,7 @@ def image_resize_short(input, out_short_len, resample='BILINEAR'): This is a 4-D tensor of the shape (num_batches, channels, in_h, in_w). out_short_len(int): The length of output images' short edge. + resample (str): resample method, default: BILINEAR. Returns: out (Variable): The output is a 4-D tensor of the shape diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 5b299a0f29d2f17f7ecf3509fee341712f3c9d70..2480d4e76a1b5fd76b7dc8299c2f8fcae967145e 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -515,35 +515,38 @@ class DistributeTranspiler: grad_to_block_id, None) # process distributed lookup_table - prefetch_block = None + prefetch_var_name_to_block_id = [] if self.has_distributed_lookup_table: pserver_index = self.pserver_endpoints.index(endpoint) table_opt_block = self._create_table_optimize_block( pserver_index, pserver_program, pre_block_idx, grad_to_block_id) - prefetch_block = self._create_prefetch_block( + prefetch_var_name_to_block_id = self._create_prefetch_block( pserver_index, pserver_program, table_opt_block) # NOTE: if has_distributed_lookup_table is False, then prefetch_block will # not be executed, so it's safe to use optimize_block to hold the place if self.has_distributed_lookup_table: - assert prefetch_block is not None + assert len(prefetch_var_name_to_block_id) > 0 else: - assert prefetch_block is None - prefetch_block = pserver_program.global_block() + assert len(prefetch_var_name_to_block_id) == 0 + + attrs = { + "OptimizeBlock": pserver_program.block(1), + "endpoint": endpoint, + "Fanin": self.trainer_num, + "sync_mode": self.sync_mode, + "grad_to_block_id": grad_to_block_id + } + if len(prefetch_var_name_to_block_id) > 0: + attrs['prefetch_var_name_to_block_id'] \ + = prefetch_var_name_to_block_id # step5 append the listen_and_serv op pserver_program.global_block().append_op( type="listen_and_serv", inputs={'X': recv_inputs}, outputs={}, - attrs={ - "OptimizeBlock": pserver_program.block(1), - "endpoint": endpoint, - "Fanin": self.trainer_num, - "PrefetchBlock": prefetch_block, - "sync_mode": self.sync_mode, - "grad_to_block_id": grad_to_block_id - }) + attrs=attrs) pserver_program.sync_with_cpp() return pserver_program @@ -608,8 +611,15 @@ class DistributeTranspiler: def _replace_lookup_table_op_with_prefetch(self, program, pserver_endpoints): # 1. replace lookup_table_op with split_ids_op -> prefetch_op -> sum_op - self.prefetch_input_vars = None - self.prefetch_output_vars = None + # self.all_prefetch_input_vars = + # [[var0_prefetch_in_pserver0, var0_prefetch_in_pserver1] + # [var1_prefetch_in_pserver0, var1_prefetch_in_pserver1]] + self.all_prefetch_input_vars = [] + + # self.all_prefetch_input_vars = + # [[var0_prefetch_in_pserver0, var0_prefetch_in_pserver1] + # [var1_prefetch_in_pserver0, var1_prefetch_in_pserver1]] + self.all_prefetch_output_vars = [] continue_search_lookup_table_op = True while continue_search_lookup_table_op: @@ -623,18 +633,19 @@ class DistributeTranspiler: ids_name = op.input("Ids") out_name = op.output("Out") - if self.prefetch_input_vars is None: - ids_var = program.global_block().vars[ids_name[0]] - self.prefetch_input_vars = self.create_splited_vars( - source_var=ids_var, - block=program.global_block(), - tag="_prefetch_in_") - if self.prefetch_output_vars is None: - out_var = program.global_block().vars[out_name[0]] - self.prefetch_output_vars = self.create_splited_vars( - source_var=out_var, - block=program.global_block(), - tag="_prefetch_out_") + ids_var = program.global_block().vars[ids_name[0]] + prefetch_input_vars = self.create_splited_vars( + source_var=ids_var, + block=program.global_block(), + tag="_prefetch_in_") + self.all_prefetch_input_vars.append(prefetch_input_vars) + + out_var = program.global_block().vars[out_name[0]] + prefetch_output_vars = self.create_splited_vars( + source_var=out_var, + block=program.global_block(), + tag="_prefetch_out_") + self.all_prefetch_output_vars.append(prefetch_output_vars) # insert split_ids_op program.global_block().insert_op( @@ -646,14 +657,14 @@ class DistributeTranspiler: for varname in ids_name ] }, - outputs={"Out": self.prefetch_input_vars}) + outputs={"Out": prefetch_input_vars}) # insert prefetch_op program.global_block().insert_op( index=op_index + 1, type="prefetch", - inputs={'X': self.prefetch_input_vars}, - outputs={"Out": self.prefetch_output_vars}, + inputs={'X': prefetch_input_vars}, + outputs={"Out": prefetch_output_vars}, attrs={ "epmap": pserver_endpoints, RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE @@ -663,7 +674,7 @@ class DistributeTranspiler: program.global_block().insert_op( index=op_index + 2, type="concat", - inputs={'X': self.prefetch_output_vars}, + inputs={'X': prefetch_output_vars}, outputs={ "Out": [ program.global_block().vars[varname] @@ -709,30 +720,34 @@ class DistributeTranspiler: optimize_block): # STEP: create prefetch block table_var = pserver_program.global_block().vars[self.table_name] - prefetch_block = pserver_program.create_block(optimize_block.idx) - trainer_ids = self.prefetch_input_vars[pserver_index] - pserver_ids = pserver_program.global_block().create_var( - name=trainer_ids.name, - type=trainer_ids.type, - shape=trainer_ids.shape, - dtype=trainer_ids.dtype) - trainer_out = self.prefetch_output_vars[pserver_index] - pserver_out = pserver_program.global_block().create_var( - name=trainer_out.name, - type=trainer_out.type, - shape=trainer_out.shape, - dtype=trainer_out.dtype) - prefetch_block.append_op( - type="lookup_sparse_table", - inputs={'Ids': pserver_ids, - "W": table_var}, - outputs={"Out": pserver_out}, - attrs={ - "is_sparse": True, # has no effect on lookup_table op - "is_distributed": True, - "padding_idx": -1 - }) - return prefetch_block + prefetch_var_name_to_block_id = [] + for index in range(len(self.all_prefetch_input_vars)): + prefetch_block = pserver_program.create_block(optimize_block.idx) + trainer_ids = self.all_prefetch_input_vars[index][pserver_index] + pserver_ids = pserver_program.global_block().create_var( + name=trainer_ids.name, + type=trainer_ids.type, + shape=trainer_ids.shape, + dtype=trainer_ids.dtype) + trainer_out = self.all_prefetch_output_vars[index][pserver_index] + pserver_out = pserver_program.global_block().create_var( + name=trainer_out.name, + type=trainer_out.type, + shape=trainer_out.shape, + dtype=trainer_out.dtype) + prefetch_block.append_op( + type="lookup_sparse_table", + inputs={'Ids': pserver_ids, + "W": table_var}, + outputs={"Out": pserver_out}, + attrs={ + "is_sparse": True, # has no effect on lookup_table op + "is_distributed": True, + "padding_idx": -1 + }) + prefetch_var_name_to_block_id.append(trainer_ids.name + ":" + str( + prefetch_block.idx)) + return prefetch_var_name_to_block_id def _create_table_optimize_block(self, pserver_index, pserver_program, pre_block_idx, grad_to_block_id): diff --git a/tools/codestyle/docstring_checker.py b/tools/codestyle/docstring_checker.py index 48100e5bf989520043b5ca372b02883faea8a9fd..54a690462699651d3e14f9b24383df01a9740336 100644 --- a/tools/codestyle/docstring_checker.py +++ b/tools/codestyle/docstring_checker.py @@ -126,9 +126,10 @@ class DocstringChecker(BaseChecker): 'W9002': ('Doc string does not end with "." period', symbol + "-end-with", 'Used when a doc string does not end with a period'), - 'W9003': ('All args with their types must be mentioned in doc string', - symbol + "-with-all-args", - 'Used when not all arguments are in the doc string '), + 'W9003': + ('All args with their types must be mentioned in doc string %s', + symbol + "-with-all-args", + 'Used when not all arguments are in the doc string '), 'W9005': ('Missing docstring or docstring is too short', symbol + "-missing", 'Add docstring longer >=10'), 'W9006': ('Docstring indent error, use 4 space for indent', @@ -178,6 +179,8 @@ class DocstringChecker(BaseChecker): self.indent_style(node) def missing_doc_string(self, node): + if node.name.startswith("__") or node.name.startswith("_"): + return True if node.tolineno - node.fromlineno <= 10: return True @@ -199,12 +202,16 @@ class DocstringChecker(BaseChecker): doc = node.doc lines = doc.splitlines() + line_num = 0 for l in lines: + if line_num == 0: + continue cur_indent = len(l) - len(l.lstrip()) if cur_indent % indent != 0: self.add_message('W9006', node=node, line=node.fromlineno) return False + line_num += 1 return True @@ -320,15 +327,19 @@ class DocstringChecker(BaseChecker): return True parsed_args = doc.args + args_not_documented = set(args) - set(parsed_args) if len(args) > 0 and len(parsed_args) <= 0: - print "debug:parsed args: ", parsed_args - self.add_message('W9003', node=node, line=node.fromlineno) + self.add_message( + 'W9003', + node=node, + line=node.fromlineno, + args=list(args_not_documented)) return False for t in args: if t not in parsed_args: - print t, " with (type) not in ", parsed_args - self.add_message('W9003', node=node, line=node.fromlineno) + self.add_message( + 'W9003', node=node, line=node.fromlineno, args=[t, ]) return False return True diff --git a/tools/codestyle/pylint_pre_commit.hook b/tools/codestyle/pylint_pre_commit.hook index e7c92ba671e0eb778b2ab5447bea7c4b14fe761b..150a3f5666bd39d30b7e6518e58a14fb5fe2f14b 100755 --- a/tools/codestyle/pylint_pre_commit.hook +++ b/tools/codestyle/pylint_pre_commit.hook @@ -7,13 +7,13 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" export PYTHONPATH=$DIR:$PYTHONPATH # The trick to remove deleted files: https://stackoverflow.com/a/2413151 -for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do +for file in $(git diff --name-status | awk '$1 != "D" {print $2}'); do pylint --disable=all --load-plugins=docstring_checker \ --enable=doc-string-one-line,doc-string-end-with,doc-string-with-all-args,doc-string-triple-quotes,doc-string-missing,doc-string-indent-error,doc-string-with-returns,doc-string-with-raises $file; TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); done -#exit $TOTAL_ERRORS +exit $TOTAL_ERRORS #For now, just warning: -exit 0 +#exit 0