diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 56b00c76952cc2d3b076916027c97b9cf64ef9bc..9cdf8f691fe87416f687830d9d89efeeeab8763b 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -198,7 +198,7 @@ if(WITH_PSLIB) trainer_factory.cc trainer.cc device_worker.cc hogwild_worker.cc downpour_worker.cc pull_dense_worker.cc device_worker_factory.cc DEPS op_registry device_context scope framework_proto - trainer_desc_proto glog lod_rank_table fleet_wrapper + trainer_desc_proto glog lod_rank_table fleet_wrapper lodtensor_printer feed_fetch_method graph_to_program_pass async_executor_proto variable_helper pslib_brpc pslib timer) else() @@ -207,7 +207,7 @@ else() trainer_factory.cc trainer.cc device_worker.cc hogwild_worker.cc downpour_worker.cc pull_dense_worker.cc device_worker_factory.cc DEPS op_registry device_context scope framework_proto - trainer_desc_proto glog lod_rank_table fleet_wrapper + trainer_desc_proto glog lod_rank_table fleet_wrapper lodtensor_printer feed_fetch_method graph_to_program_pass async_executor_proto variable_helper timer) endif(WITH_PSLIB) diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index 59d8151f1e8471e4934022f2d0d05b8fb0d197f2..67770f77c2fc3b4b7f6141f7813c5f55b9c94796 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -155,7 +155,6 @@ void AsyncExecutor::RunFromFile(const ProgramDesc& main_program, VLOG(3) << "start to run from files in async_executor"; VLOG(3) << "Drop current scope kids"; root_scope_->DropKids(); - return; } diff --git a/paddle/fluid/framework/data_feed.h b/paddle/fluid/framework/data_feed.h index 7cc6919703680c359b89075777e97676f5253c57..b027c71e97c30a17730d838b83706d199bf879eb 100644 --- a/paddle/fluid/framework/data_feed.h +++ b/paddle/fluid/framework/data_feed.h @@ -235,6 +235,9 @@ class MultiSlotDataFeed int index); virtual bool ParseOneInstance(std::vector* instance); virtual void PutToFeedVec(const std::vector& ins_vec); + + private: + BatchGenerator batch_gen_; }; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/device_worker.h b/paddle/fluid/framework/device_worker.h index c9997b5ee3b1f27fca380c53db27ab6576305271..db3b68adcc80dfc6f1b39daac72136c8bfb15156 100644 --- a/paddle/fluid/framework/device_worker.h +++ b/paddle/fluid/framework/device_worker.h @@ -95,6 +95,7 @@ class DeviceWorker { virtual void Initialize(const TrainerDesc& desc) = 0; virtual void SetDeviceIndex(int tid) = 0; virtual void TrainFiles() = 0; + virtual void PrintFetchVars(int batch_cnt) = 0; virtual void TrainFilesWithProfiler() = 0; virtual void CreateDeviceResource(const ProgramDesc& main_prog) = 0; // will make this zero copy in the future @@ -118,6 +119,7 @@ class CPUWorkerBase : public DeviceWorker { virtual void SetDeviceIndex(int tid) { thread_id_ = tid; } virtual void TrainFiles() = 0; virtual void TrainFilesWithProfiler() {} + virtual void PrintFetchVars(int batch_cnt) {} virtual void CreateDeviceResource(const ProgramDesc& main_prog) {} protected: @@ -128,9 +130,10 @@ class HogwildWorker : public CPUWorkerBase { public: HogwildWorker() {} virtual ~HogwildWorker() {} - virtual void Initialize(const TrainerDesc& desc) {} + virtual void Initialize(const TrainerDesc& desc); virtual void TrainFiles(); virtual void TrainFilesWithProfiler(); + virtual void PrintFetchVars(int batch_cnt); virtual void CreateDeviceResource(const ProgramDesc& main_prog); virtual void BindingDataFeedMemory(); @@ -142,6 +145,7 @@ class HogwildWorker : public CPUWorkerBase { Scope* thread_scope_; std::vector fetch_var_names_; std::vector> fetch_values_; + int batch_cnt_per_print_; }; class DownpourWorker : public HogwildWorker { diff --git a/paddle/fluid/framework/downpour_worker.cc b/paddle/fluid/framework/downpour_worker.cc index 238bf03815343d7935c3c35237d3a40c2fee8560..7da8db67dc0253a98abea9590de5cb54e82b32e2 100644 --- a/paddle/fluid/framework/downpour_worker.cc +++ b/paddle/fluid/framework/downpour_worker.cc @@ -57,8 +57,14 @@ void DownpourWorker::Initialize(const TrainerDesc& desc) { for (size_t i = 0; i < param_.skip_ops_size(); ++i) { skip_ops_[i] = param_.skip_ops(i); } - skip_ops_.resize(param_.skip_ops_size()); + fetch_var_names_.resize(desc.fetch_var_names_size()); + for (size_t i = 0; i < desc.fetch_var_names_size(); ++i) { + fetch_var_names_[i] = desc.fetch_var_names(i); + } + + batch_cnt_per_print_ = static_cast(desc.batch_per_print()); + skip_ops_.resize(param_.skip_ops_size()); fleet_ptr_ = FleetWrapper::GetInstance(); } diff --git a/paddle/fluid/framework/hogwild_worker.cc b/paddle/fluid/framework/hogwild_worker.cc index 9b603d9f13cf82a666b3b6a3ea477c8d82484151..148557a95427389ad28db586de13bbc689f3313e 100644 --- a/paddle/fluid/framework/hogwild_worker.cc +++ b/paddle/fluid/framework/hogwild_worker.cc @@ -15,10 +15,19 @@ limitations under the License. */ #include "paddle/fluid/framework/device_worker.h" #include "paddle/fluid/framework/device_worker_factory.h" #include "paddle/fluid/platform/cpu_helper.h" +#include "paddle/fluid/platform/lodtensor_printer.h" namespace paddle { namespace framework { +void HogwildWorker::Initialize(const TrainerDesc& desc) { + fetch_var_names_.resize(desc.fetch_var_names_size()); + for (size_t i = 0; i < desc.fetch_var_names_size(); ++i) { + fetch_var_names_[i] = desc.fetch_var_names(i); + } + batch_cnt_per_print_ = static_cast(desc.batch_per_print()); +} + void HogwildWorker::CreateThreadOperators(const ProgramDesc& program) { auto& block = program.Block(0); op_names_.clear(); @@ -129,5 +138,16 @@ void HogwildWorker::TrainFiles() { } } +void HogwildWorker::PrintFetchVars(int batch_cnt) { + if (thread_id_ == 0) { + if (batch_cnt > 0 && batch_cnt % batch_cnt_per_print_ == 0) { + int fetch_var_num = fetch_var_names_.size(); + for (int i = 0; i < fetch_var_num; ++i) { + platform::PrintVar(thread_scope_, fetch_var_names_[i], "None"); + } + } + } +} + } // end namespace framework } // end namespace paddle diff --git a/paddle/fluid/framework/trainer_desc.proto b/paddle/fluid/framework/trainer_desc.proto index 035cdb3d80862e85612f8775a77f5649dc85bdfc..72034ebee75bf8deaeaf8a926e45ff57f0524346 100644 --- a/paddle/fluid/framework/trainer_desc.proto +++ b/paddle/fluid/framework/trainer_desc.proto @@ -28,6 +28,8 @@ message TrainerDesc { // if we need to binding cpu optional bool binding_cpu = 4 [ default = false ]; repeated string filelist = 5; + repeated string fetch_var_names = 6; + optional int32 batch_per_print = 7 [ default = 100 ]; // device worker parameters optional HogwildWorkerParameter hogwild_param = 101; diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index c3db59563f3ae77acd860216b34d2cfb4f8b6560..ba1968e07616c9bba2db59d2a79fc95fa5394953 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -90,6 +90,9 @@ nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) cc_library(timer SRCS timer.cc) cc_test(timer_test SRCS timer_test.cc DEPS timer) +cc_library(lodtensor_printer SRCS lodtensor_printer.cc) +cc_test(lodtensor_printer SRCS lodtensor_printer.cc DEPS lodtensor_printer) + cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) if(WITH_GPU) nv_library(profiler SRCS profiler.cc profiler.cu DEPS device_tracer gpu_info enforce) diff --git a/paddle/fluid/platform/lodtensor_printer.cc b/paddle/fluid/platform/lodtensor_printer.cc new file mode 100644 index 0000000000000000000000000000000000000000..5bfbcdeecfbeeceab790853d3b5db81a91a9ca64 --- /dev/null +++ b/paddle/fluid/platform/lodtensor_printer.cc @@ -0,0 +1,65 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/lodtensor_printer.h" +#include "paddle/fluid/framework/lod_tensor_array.h" +#include "paddle/fluid/framework/variable.h" + +namespace paddle { +namespace platform { + +template +void print_lod_tensor(const std::string& var_name, + const framework::LoDTensor& lod_tensor, + const std::string& print_info) { + auto inspect = lod_tensor.data(); + auto element_num = lod_tensor.numel(); + + std::ostringstream sstream; + sstream << "user info: " << print_info << "\t"; + sstream << "var name: " << var_name << "\t"; + sstream << "numel: " << element_num << "\t"; + sstream << "value: " << inspect[0]; + for (int j = 1; j < element_num; ++j) { + sstream << " " << inspect[j]; + } + sstream << "]"; + + std::cout << sstream.str() << std::endl; +} + +void PrintVar(framework::Scope* scope, const std::string& var_name, + const std::string& print_info) { + framework::Variable* var = scope->FindVar(var_name); + CHECK(var != nullptr) << "var[" << var_name << "] not found"; + framework::LoDTensor* tensor = var->GetMutable(); + if (tensor == nullptr) { + VLOG(1) << "Variable Name " << var_name << " does not exist in your scope"; + return; + } + +#define PrintLoDTensorCallback(cpp_type, proto_type) \ + do { \ + if (tensor->type() == proto_type) { \ + print_lod_tensor(var_name, *tensor, print_info); \ + return; \ + } \ + } while (0) + + _ForEachDataType_(PrintLoDTensorCallback); + VLOG(1) << "PrintVar: unrecognized data type:" << tensor->type(); +} + +} // end namespace platform +} // end namespace paddle diff --git a/paddle/fluid/platform/lodtensor_printer.h b/paddle/fluid/platform/lodtensor_printer.h new file mode 100644 index 0000000000000000000000000000000000000000..e070e3540c996a0fe248a3b9312c18d948395426 --- /dev/null +++ b/paddle/fluid/platform/lodtensor_printer.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/fluid/framework/scope.h" + +namespace paddle { +namespace platform { +void PrintVar(framework::Scope* scope, const std::string& var_name, + const std::string& print_info); +} // end namespace platform +} // end namespace paddle diff --git a/paddle/fluid/platform/lodtensor_printer_test.cc b/paddle/fluid/platform/lodtensor_printer_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..248237b0c9694e33f24db87bb92ad3c52fb3b546 --- /dev/null +++ b/paddle/fluid/platform/lodtensor_printer_test.cc @@ -0,0 +1,46 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "paddle/fluid/platform/lodtensor_printer.h" +#include "gtest/gtest.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/variable.h" + +TEST(LodTensorPrinter, PrintVar) { + Scope scope; + PrintVar(&scope, "NotAVar"); + Variable* v = scope.Var("NotAVar"); + PrintVar(&scope, "NotAVar"); +} + +TEST(Timer, Start) { + paddle::platform::Timer timeline; + timeline.Start(); + sleep(3); + timeline.Pause(); +} + +TEST(Timer, Pause) { + paddle::platform::Timer timeline; + timeline.Start(); + sleep(3); + timeline.Pause(); +} + +TEST(Timer, Resume) { + paddle::platform::Timer timeline; + timeline.Start(); + sleep(3); + timeline.Pause(); + timeline.Resume(); +}