提交 0693b414 编写于 作者: T tensor-tang

Merge remote-tracking branch 'ups/develop' into refine

...@@ -87,7 +87,7 @@ cc_library(executor SRCS executor.cc DEPS op_registry device_context scope ...@@ -87,7 +87,7 @@ cc_library(executor SRCS executor.cc DEPS op_registry device_context scope
framework_proto glog lod_rank_table feed_fetch_method) framework_proto glog lod_rank_table feed_fetch_method)
cc_library(parallel_executor SRCS parallel_executor.cc DEPS multi_devices_graph_builder threaded_ssa_graph_executor scope_buffered_ssa_graph_executor) cc_library(parallel_executor SRCS parallel_executor.cc DEPS graph_builder_factory threaded_ssa_graph_executor scope_buffered_ssa_graph_executor)
cc_library(prune SRCS prune.cc DEPS framework_proto) cc_library(prune SRCS prune.cc DEPS framework_proto)
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
......
...@@ -7,6 +7,7 @@ cc_library(rpc_op_handle SRCS rpc_op_handle.cc DEPS framework_proto scope place ...@@ -7,6 +7,7 @@ cc_library(rpc_op_handle SRCS rpc_op_handle.cc DEPS framework_proto scope place
cc_library(ssa_graph SRCS ssa_graph.cc DEPS var_handle op_handle_base) cc_library(ssa_graph SRCS ssa_graph.cc DEPS var_handle op_handle_base)
cc_library(ssa_graph_builder SRCS ssa_graph_builder.cc DEPS ssa_graph) cc_library(ssa_graph_builder SRCS ssa_graph_builder.cc DEPS ssa_graph)
cc_library(ssa_graph_printer SRCS ssa_graph_printer.cc DEPS ssa_graph_builder)
cc_library(variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_rows) cc_library(variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_rows)
...@@ -28,6 +29,9 @@ cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope d ...@@ -28,6 +29,9 @@ cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope d
cc_library(multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle cc_library(multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
scale_loss_grad_op_handle rpc_op_handle ${multi_devices_graph_builder_deps} reduce_op_handle broadcast_op_handle) scale_loss_grad_op_handle rpc_op_handle ${multi_devices_graph_builder_deps} reduce_op_handle broadcast_op_handle)
cc_library(graph_builder_factory SRCS graph_builder_factory.cc DEPS multi_devices_graph_builder ssa_graph_printer)
cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ssa_graph framework_proto) cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ssa_graph framework_proto)
cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope
simple_threadpool device_context) simple_threadpool device_context)
......
...@@ -59,8 +59,8 @@ struct BroadcastOpHandle : public OpHandleBase { ...@@ -59,8 +59,8 @@ struct BroadcastOpHandle : public OpHandleBase {
void RunImpl() override; void RunImpl() override;
private: private:
const std::vector<Scope *> &local_scopes_; std::vector<Scope *> local_scopes_;
const std::vector<platform::Place> &places_; std::vector<platform::Place> places_;
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
const platform::NCCLContextMap *nccl_ctxs_; const platform::NCCLContextMap *nccl_ctxs_;
#endif #endif
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#pragma once #pragma once
#include <string>
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
...@@ -29,6 +31,8 @@ struct BuildStrategy { ...@@ -29,6 +31,8 @@ struct BuildStrategy {
ReduceStrategy reduce_{ReduceStrategy::kAllReduce}; ReduceStrategy reduce_{ReduceStrategy::kAllReduce};
GradientScaleStrategy gradient_scale_{GradientScaleStrategy::kCoeffNumDevice}; GradientScaleStrategy gradient_scale_{GradientScaleStrategy::kCoeffNumDevice};
std::string debug_graphviz_path_{""};
}; };
} // namespace details } // namespace details
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/graph_builder_factory.h"
#include <fstream>
#include "paddle/fluid/framework/details/multi_devices_graph_builder.h"
#include "paddle/fluid/framework/details/ssa_graph_printer.h"
namespace paddle {
namespace framework {
namespace details {
std::unique_ptr<SSAGraphBuilder> SSAGraphBuilderFactory::Create() {
std::unique_ptr<SSAGraphBuilder> res(
#ifdef PADDLE_WITH_CUDA
new MultiDevSSAGraphBuilder(places_, loss_var_name_, param_names_,
local_scopes_, nccl_ctxs_, strategy_)
#else
new MultiDevSSAGraphBuilder(places_, loss_var_name_, param_names_,
local_scopes_, strategy_)
#endif
); // NOLINT
if (!strategy_.debug_graphviz_path_.empty()) {
std::unique_ptr<std::ostream> fout(
new std::ofstream(strategy_.debug_graphviz_path_));
PADDLE_ENFORCE(fout->good());
std::unique_ptr<GraphvizSSAGraphPrinter> graphviz_printer(
new GraphvizSSAGraphPrinter());
res.reset(new SSAGraghBuilderWithPrinter(
std::move(fout), std::move(graphviz_printer), std::move(res)));
}
return res;
}
} // namespace details
} // namespace framework
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/build_strategy.h"
#include "paddle/fluid/framework/details/ssa_graph_builder.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/nccl_helper.h"
#endif
namespace paddle {
namespace framework {
class Scope;
namespace details {
class SSAGraphBuilderFactory {
public:
SSAGraphBuilderFactory(const std::vector<platform::Place>& places,
const std::string& loss_var_name,
const std::unordered_set<std::string>& param_names,
const std::vector<Scope*>& local_scopes,
const BuildStrategy& strategy)
: places_(places),
loss_var_name_(loss_var_name),
param_names_(param_names),
local_scopes_(local_scopes),
strategy_(strategy) {}
#ifdef PADDLE_WITH_CUDA
void SetNCCLContextMap(platform::NCCLContextMap* nccl_ctxs) {
nccl_ctxs_ = nccl_ctxs;
}
#endif
std::unique_ptr<SSAGraphBuilder> Create();
private:
std::vector<platform::Place> places_;
std::string loss_var_name_;
std::unordered_set<std::string> param_names_;
std::vector<Scope*> local_scopes_;
BuildStrategy strategy_;
#ifdef PADDLE_WITH_CUDA
platform::NCCLContextMap* nccl_ctxs_;
#endif
};
} // namespace details
} // namespace framework
} // namespace paddle
...@@ -30,10 +30,6 @@ ...@@ -30,10 +30,6 @@
#include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h" #include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h"
#endif #endif
DEFINE_string(ssa_graph_path, "/tmp/ssa_graph.dot",
"the ssa graph path only print with GLOG_v=10,"
"default /tmp/graph.dot");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
namespace details { namespace details {
...@@ -277,11 +273,6 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build( ...@@ -277,11 +273,6 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
*/ */
AddOutputToLeafOps(&result); AddOutputToLeafOps(&result);
if (VLOG_IS_ON(10)) {
std::ofstream fout(FLAGS_ssa_graph_path);
PrintGraphviz(*graph, fout);
}
return std::unique_ptr<SSAGraph>(graph); return std::unique_ptr<SSAGraph>(graph);
} }
......
...@@ -41,8 +41,8 @@ struct NCCLAllReduceOpHandle : public OpHandleBase { ...@@ -41,8 +41,8 @@ struct NCCLAllReduceOpHandle : public OpHandleBase {
void RunImpl() override; void RunImpl() override;
private: private:
const std::vector<Scope *> &local_scopes_; std::vector<Scope *> local_scopes_;
const std::vector<platform::Place> &places_; std::vector<platform::Place> places_;
const platform::NCCLContextMap &nccl_ctxs_; const platform::NCCLContextMap &nccl_ctxs_;
}; };
......
...@@ -32,8 +32,8 @@ namespace framework { ...@@ -32,8 +32,8 @@ namespace framework {
namespace details { namespace details {
struct ReduceOpHandle : public OpHandleBase { struct ReduceOpHandle : public OpHandleBase {
const std::vector<Scope *> &local_scopes_; std::vector<Scope *> local_scopes_;
const std::vector<platform::Place> &places_; std::vector<platform::Place> places_;
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
const platform::NCCLContextMap *nccl_ctxs_; const platform::NCCLContextMap *nccl_ctxs_;
......
...@@ -73,64 +73,6 @@ void SSAGraphBuilder::CreateOpOutput(SSAGraph *graph, OpHandleBase *op_handle, ...@@ -73,64 +73,6 @@ void SSAGraphBuilder::CreateOpOutput(SSAGraph *graph, OpHandleBase *op_handle,
op_handle->AddOutput(var); op_handle->AddOutput(var);
} }
template <typename Callback>
void IterAllVar(const SSAGraph &graph, Callback callback) {
for (auto &each : graph.vars_) {
for (auto &pair1 : each) {
for (auto &pair2 : pair1.second) {
callback(*pair2);
}
}
}
for (auto &var : graph.dep_vars_) {
callback(*var);
}
}
void SSAGraphBuilder::PrintGraphviz(const SSAGraph &graph, std::ostream &sout) {
size_t var_id = 0;
std::unordered_map<const VarHandleBase *, size_t> vars;
sout << "digraph G {\n";
IterAllVar(graph, [&](const VarHandleBase &var) {
auto *var_ptr = &var;
auto *var_handle_ptr = dynamic_cast<const VarHandle *>(var_ptr);
auto *dummy_ptr = dynamic_cast<const DummyVarHandle *>(var_ptr);
size_t cur_var_id = var_id++;
vars[var_ptr] = cur_var_id;
if (var_handle_ptr) {
sout << "var_" << cur_var_id << " [label=\"" << var_handle_ptr->name_
<< "\\n"
<< var_handle_ptr->place_ << "\\n"
<< var_handle_ptr->version_ << "\"]" << std::endl;
} else if (dummy_ptr) {
sout << "var_" << cur_var_id << " [label=\"dummy\"]" << std::endl;
}
});
size_t op_id = 0;
for (auto &op : graph.ops_) {
std::string op_name = "op_" + std::to_string(op_id++);
sout << op_name << " [label=\"" << op->Name() << "\", shape=rect]"
<< std::endl;
for (auto in : op->Inputs()) {
std::string var_name = "var_" + std::to_string(vars[in]);
sout << var_name << " -> " << op_name << std::endl;
}
for (auto out : op->Outputs()) {
std::string var_name = "var_" + std::to_string(vars[out]);
sout << op_name << " -> " << var_name << std::endl;
}
}
sout << "}\n";
}
void SSAGraphBuilder::AddOutputToLeafOps(SSAGraph *graph) { void SSAGraphBuilder::AddOutputToLeafOps(SSAGraph *graph) {
for (auto &op : graph->ops_) { for (auto &op : graph->ops_) {
if (!op->Outputs().empty()) { if (!op->Outputs().empty()) {
......
...@@ -55,8 +55,6 @@ class SSAGraphBuilder { ...@@ -55,8 +55,6 @@ class SSAGraphBuilder {
const platform::Place &place, size_t place_offset); const platform::Place &place, size_t place_offset);
static void AddOutputToLeafOps(SSAGraph *graph); static void AddOutputToLeafOps(SSAGraph *graph);
static void PrintGraphviz(const SSAGraph &graph, std::ostream &sout);
}; };
} // namespace details } // namespace details
} // namespace framework } // namespace framework
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/ssa_graph_printer.h"
#include <string>
#include "paddle/fluid/framework/details/ssa_graph.h"
namespace paddle {
namespace framework {
namespace details {
template <typename Callback>
static inline void IterAllVar(const SSAGraph &graph, Callback callback) {
for (auto &each : graph.vars_) {
for (auto &pair1 : each) {
for (auto &pair2 : pair1.second) {
callback(*pair2);
}
}
}
for (auto &var : graph.dep_vars_) {
callback(*var);
}
}
void GraphvizSSAGraphPrinter::Print(const SSAGraph &graph,
std::ostream &sout) const {
size_t var_id = 0;
std::unordered_map<const VarHandleBase *, size_t> vars;
sout << "digraph G {\n";
IterAllVar(graph, [&](const VarHandleBase &var) {
auto *var_ptr = &var;
auto *var_handle_ptr = dynamic_cast<const VarHandle *>(var_ptr);
auto *dummy_ptr = dynamic_cast<const DummyVarHandle *>(var_ptr);
size_t cur_var_id = var_id++;
vars[var_ptr] = cur_var_id;
if (var_handle_ptr) {
sout << "var_" << cur_var_id << " [label=\"" << var_handle_ptr->name_
<< "\\n"
<< var_handle_ptr->place_ << "\\n"
<< var_handle_ptr->version_ << "\"]" << std::endl;
} else if (dummy_ptr) {
sout << "var_" << cur_var_id << " [label=\"dummy\"]" << std::endl;
}
});
size_t op_id = 0;
for (auto &op : graph.ops_) {
std::string op_name = "op_" + std::to_string(op_id++);
sout << op_name << " [label=\"" << op->Name() << "\", shape=rect]"
<< std::endl;
for (auto in : op->Inputs()) {
std::string var_name = "var_" + std::to_string(vars[in]);
sout << var_name << " -> " << op_name << std::endl;
}
for (auto out : op->Outputs()) {
std::string var_name = "var_" + std::to_string(vars[out]);
sout << op_name << " -> " << var_name << std::endl;
}
}
sout << "}\n";
}
} // namespace details
} // namespace framework
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iosfwd>
#include "paddle/fluid/framework/details/ssa_graph_builder.h"
namespace paddle {
namespace framework {
namespace details {
class SSAGraph;
class SSAGraphPrinter {
public:
virtual ~SSAGraphPrinter() {}
virtual void Print(const SSAGraph& graph, std::ostream& sout) const = 0;
};
class GraphvizSSAGraphPrinter : public SSAGraphPrinter {
public:
void Print(const SSAGraph& graph, std::ostream& sout) const override;
};
class SSAGraghBuilderWithPrinter : public SSAGraphBuilder {
public:
SSAGraghBuilderWithPrinter(std::ostream& sout,
std::unique_ptr<SSAGraphPrinter>&& printer,
std::unique_ptr<SSAGraphBuilder>&& builder)
: printer_(std::move(printer)),
builder_(std::move(builder)),
stream_ref_(sout) {}
SSAGraghBuilderWithPrinter(std::unique_ptr<std::ostream>&& sout,
std::unique_ptr<SSAGraphPrinter>&& printer,
std::unique_ptr<SSAGraphBuilder>&& builder)
: printer_(std::move(printer)),
builder_(std::move(builder)),
stream_ptr_(std::move(sout)),
stream_ref_(*stream_ptr_) {}
std::unique_ptr<SSAGraph> Build(const ProgramDesc& program) const override {
auto graph = builder_->Build(program);
printer_->Print(*graph, stream_ref_);
return graph;
}
private:
std::unique_ptr<SSAGraphPrinter> printer_;
std::unique_ptr<SSAGraphBuilder> builder_;
std::unique_ptr<std::ostream> stream_ptr_;
std::ostream& stream_ref_;
};
} // namespace details
} // namespace framework
} // namespace paddle
...@@ -22,7 +22,7 @@ limitations under the License. */ ...@@ -22,7 +22,7 @@ limitations under the License. */
#include "paddle/fluid/platform/nccl_helper.h" #include "paddle/fluid/platform/nccl_helper.h"
#endif #endif
#include "paddle/fluid/framework/details/multi_devices_graph_builder.h" #include "paddle/fluid/framework/details/graph_builder_factory.h"
#include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h" #include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -102,22 +102,19 @@ ParallelExecutor::ParallelExecutor( ...@@ -102,22 +102,19 @@ ParallelExecutor::ParallelExecutor(
var_infos.back().persistable_ = var->Persistable(); var_infos.back().persistable_ = var->Persistable();
} }
// Step 3. Convert main_program to SSA form and dependency graph. Also, insert // Step 3. Convert main_program to SSA form and dependency graph. Also, insert
// ncclOp // ncclOp
#ifdef PADDLE_WITH_CUDA
details::MultiDevSSAGraphBuilder builder( details::SSAGraphBuilderFactory builder_factory(
member_->places_, loss_var_name, params, member_->local_scopes_, member_->places_, loss_var_name, params, member_->local_scopes_,
member_->nccl_ctxs_.get(), build_strategy); build_strategy);
#else #ifdef PADDLE_WITH_CUDA
details::MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name, builder_factory.SetNCCLContextMap(member_->nccl_ctxs_.get());
params, member_->local_scopes_,
build_strategy);
#endif #endif
auto graph = builder.Build(main_program);
member_->executor_.reset(new details::ThreadedSSAGraphExecutor( member_->executor_.reset(new details::ThreadedSSAGraphExecutor(
exec_strategy, member_->local_scopes_, places, std::move(graph))); exec_strategy, member_->local_scopes_, places,
builder_factory.Create()->Build(main_program)));
member_->executor_.reset(new details::ScopeBufferedSSAGraphExecutor( member_->executor_.reset(new details::ScopeBufferedSSAGraphExecutor(
exec_strategy, member_->local_scopes_, std::move(var_infos), exec_strategy, member_->local_scopes_, std::move(var_infos),
......
...@@ -15,5 +15,102 @@ limitations under the License. */ ...@@ -15,5 +15,102 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
namespace paddle { namespace paddle {
namespace framework {} namespace framework {
extern size_t SizeOfType(std::type_index type);
void Tensor::check_memory_size() const {
PADDLE_ENFORCE_NOT_NULL(
holder_, "Tensor holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE_LE(
numel() * SizeOfType(type()), memory_size(),
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory.\n"
"or maybe the required data-type mismatches the data already stored.");
}
size_t Tensor::memory_size() const {
return holder_ == nullptr ? 0UL : holder_->size() - offset_;
}
void* Tensor::mutable_data(platform::Place place, std::type_index type) {
if (holder_ != nullptr) {
holder_->set_type(type);
}
PADDLE_ENFORCE_GE(numel(), 0,
"When calling this method, the Tensor's numel must be "
"equal or larger than zero. "
"Please check Tensor::Resize has been called first.");
int64_t size = numel() * SizeOfType(type);
/* some versions of boost::variant don't have operator!= */
if (holder_ == nullptr || !(holder_->place() == place) ||
holder_->size() < size + offset_) {
if (platform::is_cpu_place(place)) {
holder_.reset(new PlaceholderImpl<platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), size, type));
} else if (platform::is_gpu_place(place) ||
platform::is_cuda_pinned_place(place)) {
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW(
"CUDAPlace or CUDAPinnedPlace is not supported in CPU-only mode.");
}
#else
if (platform::is_gpu_place(place)) {
holder_.reset(new PlaceholderImpl<platform::CUDAPlace>(
boost::get<platform::CUDAPlace>(place), size, type));
} else if (platform::is_cuda_pinned_place(place)) {
holder_.reset(new PlaceholderImpl<platform::CUDAPinnedPlace>(
boost::get<platform::CUDAPinnedPlace>(place), size, type));
}
}
#endif
offset_ = 0;
}
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
}
void* Tensor::mutable_data(platform::Place place) {
PADDLE_ENFORCE(this->holder_ != nullptr,
"Cannot invoke mutable data if current hold nothing.");
return mutable_data(place, holder_->type());
}
Tensor& Tensor::ShareDataWith(const Tensor& src) {
src.check_memory_size();
*this = src;
return *this;
}
Tensor Tensor::Slice(int begin_idx, int end_idx) const {
check_memory_size();
PADDLE_ENFORCE_GE(begin_idx, 0,
"The start row index must be greater than 0.");
PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is out of bound.");
PADDLE_ENFORCE_LT(
begin_idx, end_idx,
"The start row index must be lesser than the end row index.");
if (dims_[0] == 1) {
return *this;
} else {
size_t base = numel() / dims_[0];
Tensor dst;
dst.holder_ = holder_;
dst.set_layout(layout_);
DDim dst_dims = dims_;
dst_dims[0] = end_idx - begin_idx;
dst.Resize(dst_dims);
dst.offset_ = offset_ + begin_idx * base * SizeOfType(type());
return dst;
}
}
Tensor& Tensor::Resize(const DDim& dims) {
dims_ = dims;
return *this;
}
const DDim& Tensor::dims() const { return dims_; }
int64_t Tensor::numel() const { return product(dims_); }
} // namespace framework
} // namespace paddle } // namespace paddle
...@@ -54,26 +54,24 @@ class Tensor { ...@@ -54,26 +54,24 @@ class Tensor {
/*! Return a pointer to mutable memory block. */ /*! Return a pointer to mutable memory block. */
template <typename T> template <typename T>
inline T* data(); T* data();
/*! Return a pointer to constant memory block. */ /*! Return a pointer to constant memory block. */
template <typename T> template <typename T>
inline const T* data() const; const T* data() const;
inline bool IsInitialized() const; bool IsInitialized() const;
inline void switch_place(platform::Place new_place);
/** /**
* @brief Return a pointer to mutable memory block. * @brief Return a pointer to mutable memory block.
* @note If not exist, then allocation. * @note If not exist, then allocation.
*/ */
template <typename T> template <typename T>
inline T* mutable_data(platform::Place place); T* mutable_data(platform::Place place);
inline void* mutable_data(platform::Place place, std::type_index type); void* mutable_data(platform::Place place, std::type_index type);
inline void* mutable_data(platform::Place place); void* mutable_data(platform::Place place);
/** /**
* @brief Return a pointer to mutable memory block. * @brief Return a pointer to mutable memory block.
...@@ -84,19 +82,19 @@ class Tensor { ...@@ -84,19 +82,19 @@ class Tensor {
* @note If not exist, then allocation. * @note If not exist, then allocation.
*/ */
template <typename T> template <typename T>
inline T* mutable_data(DDim dims, platform::Place place); T* mutable_data(DDim dims, platform::Place place);
/*! Return the dimensions of the memory block. */ /*! Return the dimensions of the memory block. */
inline const DDim& dims() const; const DDim& dims() const;
/*! Return the numel of the memory block. */ /*! Return the numel of the memory block. */
inline int64_t numel() const; int64_t numel() const;
/*! Resize the dimensions of the memory block. */ /*! Resize the dimensions of the memory block. */
inline Tensor& Resize(const DDim& dims); Tensor& Resize(const DDim& dims);
/*! The internal of two tensors share the same memory block. */ /*! The internal of two tensors share the same memory block. */
inline Tensor& ShareDataWith(const Tensor& src); Tensor& ShareDataWith(const Tensor& src);
/** /**
* @brief Return a sub-tensor of the given tensor. * @brief Return a sub-tensor of the given tensor.
...@@ -106,7 +104,7 @@ class Tensor { ...@@ -106,7 +104,7 @@ class Tensor {
* @param[in] end_idx The index of the end row(exclusive) to slice. * @param[in] end_idx The index of the end row(exclusive) to slice.
* The index number begins from 0. * The index number begins from 0.
*/ */
inline Tensor Slice(int begin_idx, int end_idx) const; Tensor Slice(int begin_idx, int end_idx) const;
platform::Place place() const { platform::Place place() const {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
...@@ -123,11 +121,11 @@ class Tensor { ...@@ -123,11 +121,11 @@ class Tensor {
// memory size returns the holding memory size in byte. // memory size returns the holding memory size in byte.
size_t memory_size() const; size_t memory_size() const;
inline void check_memory_size() const; void check_memory_size() const;
inline DataLayout layout() const { return layout_; } DataLayout layout() const { return layout_; }
inline void set_layout(const DataLayout layout) { layout_ = layout; } void set_layout(const DataLayout layout) { layout_ = layout; }
private: private:
/** /**
...@@ -210,15 +208,6 @@ class Tensor { ...@@ -210,15 +208,6 @@ class Tensor {
size_t offset_; size_t offset_;
}; };
inline void Tensor::switch_place(platform::Place new_place) {
if (holder_->place() == new_place) {
return;
}
// TODO(tonyyang-svail): do memcpy here.
PADDLE_THROW("Not Implemented");
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
...@@ -20,21 +20,6 @@ limitations under the License. */ ...@@ -20,21 +20,6 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
extern size_t SizeOfType(std::type_index type);
inline void Tensor::check_memory_size() const {
PADDLE_ENFORCE_NOT_NULL(
holder_, "Tensor holds no memory. Call Tensor::mutable_data first.");
PADDLE_ENFORCE_LE(
numel() * SizeOfType(type()), memory_size(),
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory.\n"
"or maybe the required data-type mismatches the data already stored.");
}
inline size_t Tensor::memory_size() const {
return holder_ == nullptr ? 0UL : holder_->size() - offset_;
}
template <typename T> template <typename T>
inline const T* Tensor::data() const { inline const T* Tensor::data() const {
check_memory_size(); check_memory_size();
...@@ -73,88 +58,6 @@ inline T* Tensor::mutable_data(platform::Place place) { ...@@ -73,88 +58,6 @@ inline T* Tensor::mutable_data(platform::Place place) {
return reinterpret_cast<T*>(mutable_data(place, typeid(T))); return reinterpret_cast<T*>(mutable_data(place, typeid(T)));
} }
inline void* Tensor::mutable_data(platform::Place place, std::type_index type) {
if (holder_ != nullptr) {
holder_->set_type(type);
}
PADDLE_ENFORCE_GE(numel(), 0,
"When calling this method, the Tensor's numel must be "
"equal or larger than zero. "
"Please check Tensor::Resize has been called first.");
int64_t size = numel() * SizeOfType(type);
/* some versions of boost::variant don't have operator!= */
if (holder_ == nullptr || !(holder_->place() == place) ||
holder_->size() < size + offset_) {
if (platform::is_cpu_place(place)) {
holder_.reset(new PlaceholderImpl<platform::CPUPlace>(
boost::get<platform::CPUPlace>(place), size, type));
} else if (platform::is_gpu_place(place) ||
platform::is_cuda_pinned_place(place)) {
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW(
"CUDAPlace or CUDAPinnedPlace is not supported in CPU-only mode.");
}
#else
if (platform::is_gpu_place(place)) {
holder_.reset(new PlaceholderImpl<platform::CUDAPlace>(
boost::get<platform::CUDAPlace>(place), size, type));
} else if (platform::is_cuda_pinned_place(place)) {
holder_.reset(new PlaceholderImpl<platform::CUDAPinnedPlace>(
boost::get<platform::CUDAPinnedPlace>(place), size, type));
}
}
#endif
offset_ = 0;
}
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
}
inline void* Tensor::mutable_data(platform::Place place) {
PADDLE_ENFORCE(this->holder_ != nullptr,
"Cannot invoke mutable data if current hold nothing.");
return mutable_data(place, holder_->type());
}
inline Tensor& Tensor::ShareDataWith(const Tensor& src) {
src.check_memory_size();
*this = src;
return *this;
}
inline Tensor Tensor::Slice(int begin_idx, int end_idx) const {
check_memory_size();
PADDLE_ENFORCE_GE(begin_idx, 0,
"The start row index must be greater than 0.");
PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is out of bound.");
PADDLE_ENFORCE_LT(
begin_idx, end_idx,
"The start row index must be lesser than the end row index.");
if (dims_[0] == 1) {
return *this;
} else {
size_t base = numel() / dims_[0];
Tensor dst;
dst.holder_ = holder_;
dst.set_layout(layout_);
DDim dst_dims = dims_;
dst_dims[0] = end_idx - begin_idx;
dst.Resize(dst_dims);
dst.offset_ = offset_ + begin_idx * base * SizeOfType(type());
return dst;
}
}
inline Tensor& Tensor::Resize(const DDim& dims) {
dims_ = dims;
return *this;
}
inline const DDim& Tensor::dims() const { return dims_; }
inline int64_t Tensor::numel() const { return product(dims_); }
inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) { inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
Tensor res; Tensor res;
res.ShareDataWith(src); res.ShareDataWith(src);
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reverse_op.h"
#include <vector>
namespace paddle {
namespace operators {
class ReverseOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null");
const auto& x_dims = ctx->GetInputDim("X");
const auto& axis = ctx->Attrs().Get<std::vector<int>>("axis");
PADDLE_ENFORCE(!axis.empty(), "'axis' can not be empty.");
for (int a : axis) {
PADDLE_ENFORCE_LT(a, x_dims.size(),
"The axis must be less than input tensor's rank.");
}
ctx->SetOutputDim("Out", x_dims);
}
};
class ReverseOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "The LoDTensor to be flipped.");
AddOutput("Out", "The LoDTensor after flipping.");
AddAttr<std::vector<int>>(
"axis", "The axises that along which order of elements is reversed.");
AddComment(R"DOC(
Reverse Operator.
Reverse the order of elements in the input LoDTensor along given axises.
Case 1:
Given
X = [[1, 2, 3, 4, 5]
[6, 7, 8, 9, 10]
[11, 12, 13, 14, 15]],
and
axis = [0],
we get:
Out = [[11, 12, 13, 14, 15]
[6, 7, 8, 9, 10]
[1, 2, 3, 4, 5]].
Case 2:
Given
X = [[[1, 2, 3, 4]
[5, 6, 7, 8]]
[[9, 10, 11, 12]
[13, 14, 15, 16]]],
and
axis = [0, 2],
we get:
Out = [[[12, 11, 10, 9]
[16, 15, 14, 13]]
[[4, 3, 2, 1]
[8, 7, 6, 5]]],
)DOC");
}
};
class ReverseGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
std::unique_ptr<framework::OpDesc> Apply() const override {
auto* grad_op = new framework::OpDesc();
grad_op->SetType("reverse");
grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttr("axis", GetAttr("axis"));
return std::unique_ptr<framework::OpDesc>(grad_op);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(reverse, ops::ReverseOp, ops::ReverseOpMaker,
ops::ReverseGradMaker);
REGISTER_OPERATOR(reverse_grad, ops::ReverseOp);
REGISTER_OP_CPU_KERNEL(
reverse, ops::ReverseKernel<paddle::platform::CPUDeviceContext, int>,
ops::ReverseKernel<paddle::platform::CPUDeviceContext, uint8_t>,
ops::ReverseKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ReverseKernel<paddle::platform::CPUDeviceContext, bool>,
ops::ReverseKernel<paddle::platform::CPUDeviceContext, float>,
ops::ReverseKernel<paddle::platform::CPUDeviceContext, double>)
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reverse_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
reverse, ops::ReverseKernel<paddle::platform::CUDADeviceContext, int>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, uint8_t>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, bool>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, float>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, double>)
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T, int Rank>
struct ReverseFunctor {
void operator()(const DeviceContext& context, const framework::LoDTensor& in,
framework::LoDTensor* out, const std::vector<int>& axis) {
Eigen::array<bool, Rank> reverse_axis;
for (int i = 0; i < Rank; ++i) {
reverse_axis[i] = false;
}
for (int a : axis) {
reverse_axis[a] = true;
}
auto in_eigen = framework::EigenTensor<T, Rank>::From(in);
auto out_eigen = framework::EigenTensor<T, Rank>::From(*out);
auto* dev = context.eigen_device();
out_eigen.device(*dev) = in_eigen.reverse(reverse_axis);
}
};
template <typename DeviceContext, typename T>
class ReverseKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* x = context.Input<framework::LoDTensor>("X");
auto* out = context.Output<framework::LoDTensor>("Out");
out->mutable_data<T>(context.GetPlace());
const auto& axis = context.Attr<std::vector<int>>("axis");
int rank = x->dims().size();
auto& dev_ctx = context.template device_context<DeviceContext>();
switch (rank) {
case 1:
ReverseFunctor<DeviceContext, T, 1> functor1;
functor1(dev_ctx, *x, out, axis);
break;
case 2:
ReverseFunctor<DeviceContext, T, 2> functor2;
functor2(dev_ctx, *x, out, axis);
break;
case 3:
ReverseFunctor<DeviceContext, T, 3> functor3;
functor3(dev_ctx, *x, out, axis);
break;
case 4:
ReverseFunctor<DeviceContext, T, 4> functor4;
functor4(dev_ctx, *x, out, axis);
break;
case 5:
ReverseFunctor<DeviceContext, T, 5> functor5;
functor5(dev_ctx, *x, out, axis);
break;
case 6:
ReverseFunctor<DeviceContext, T, 6> functor6;
functor6(dev_ctx, *x, out, axis);
break;
default:
PADDLE_THROW(
"Reserve operator doesn't supports tensors whose ranks are greater "
"than 6.");
}
}
};
} // namespace operators
} // namespace paddle
...@@ -553,6 +553,12 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -553,6 +553,12 @@ All parameter, weight, gradient are variables in Paddle.
[](BuildStrategy &self, [](BuildStrategy &self,
BuildStrategy::GradientScaleStrategy strategy) { BuildStrategy::GradientScaleStrategy strategy) {
self.gradient_scale_ = strategy; self.gradient_scale_ = strategy;
})
.def_property(
"debug_graphviz_path",
[](const BuildStrategy &self) { return self.debug_graphviz_path_; },
[](BuildStrategy &self, const std::string &path) {
self.debug_graphviz_path_ = path;
}); });
pe.def(py::init<const std::vector<platform::Place> &, pe.def(py::init<const std::vector<platform::Place> &,
......
...@@ -447,7 +447,7 @@ EOF ...@@ -447,7 +447,7 @@ EOF
# run paddle version to install python packages first # run paddle version to install python packages first
RUN apt-get update &&\ RUN apt-get update &&\
${NCCL_DEPS}\ ${NCCL_DEPS}\
apt-get install -y wget python-pip python-opencv dmidecode python-tk && easy_install -U pip && \ apt-get install -y wget python-pip python-opencv libgtk2.0-dev dmidecode python-tk && easy_install -U pip && \
pip install /*.whl; apt-get install -f -y && \ pip install /*.whl; apt-get install -f -y && \
apt-get clean -y && \ apt-get clean -y && \
rm -f /*.whl && \ rm -f /*.whl && \
......
...@@ -1182,19 +1182,19 @@ def conv2d(input, ...@@ -1182,19 +1182,19 @@ def conv2d(input,
- Input: - Input:
Input shape: $(N, C_{in}, H_{in}, W_{in})$ Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
Filter shape: $(C_{out}, C_{in}, H_f, W_f)$ Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
- Output: - Output:
Output shape: $(N, C_{out}, H_{out}, W_{out})$ Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where Where
.. math:: .. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\ H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1 W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
Args: Args:
input(Variable): The input image with [N, C, H, W] format. input(Variable): The input image with [N, C, H, W] format.
......
...@@ -363,6 +363,40 @@ def zeros(shape, dtype, force_cpu=False): ...@@ -363,6 +363,40 @@ def zeros(shape, dtype, force_cpu=False):
return fill_constant(value=0.0, **locals()) return fill_constant(value=0.0, **locals())
def reverse(x, axis):
"""
**reverse**
This function reverse the input 'x' along given axises.
Args:
x(Vairbale): the input to be reversed.
axis(int|tuple|list): Axis that along which order of elements
is reversed. If it is a tuple or a list, reversing
will be apply on each axis in the tuple or list.
Returns:
Variable: The reversed tensor.
Examples:
.. code-block:: python
out = fluid.layers.reverse(x=in, axis=0)
# or:
out = fluid.layers.reverse(x=in, axis=[0,1])
"""
if isinstance(axis, int):
axis = [axis]
helper = LayerHelper("reverse", **locals())
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(
type='reverse',
inputs={'Input': x},
outputs={'Out': [out]},
attrs={'axis': axis})
return out
def save(x, file_path, overwrite=True): def save(x, file_path, overwrite=True):
""" """
Saves a variable as a file. Saves a variable as a file.
......
...@@ -252,5 +252,25 @@ class TestFP16ElementwiseAddOp_rowwise_add_1(TestFP16ElementwiseAddOp): ...@@ -252,5 +252,25 @@ class TestFP16ElementwiseAddOp_rowwise_add_1(TestFP16ElementwiseAddOp):
self.axis = 1 self.axis = 1
class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(3, 20, 20).astype(self.dtype)
self.y = np.random.rand(3, 1, 1).astype(self.dtype)
self.out = self.x + self.y
def init_axis(self):
self.axis = -1
class TestFP16ElementwiseAddOp_channelwise_add(TestFP16ElementwiseAddOp):
def init_input_output(self):
self.x = np.random.rand(3, 10, 20).astype(self.dtype)
self.y = np.random.rand(3, 1, 1).astype(self.dtype)
self.out = self.x + self.y
def init_axis(self):
self.axis = -1
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from op_test import OpTest
class TestReverseOp(OpTest):
def initTestCase(self):
self.x = np.random.random((3, 4)).astype('float32')
self.axis = [0]
def setUp(self):
self.initTestCase()
self.op_type = "reverse"
self.inputs = {"X": self.x}
self.attrs = {'axis': self.axis}
out = self.x
for a in self.axis:
out = np.flip(out, axis=a)
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestCase0(TestReverseOp):
def initTestCase(self):
self.x = np.random.random((3, 4)).astype('float32')
self.axis = [1]
class TestCase1(TestReverseOp):
def initTestCase(self):
self.x = np.random.random((3, 4)).astype('float32')
self.axis = [0, 1]
class TestCase2(TestReverseOp):
def initTestCase(self):
self.x = np.random.random((3, 4, 5)).astype('float32')
self.axis = [0, 2]
class TestCase3(TestReverseOp):
def initTestCase(self):
self.x = np.random.random((3, 4, 5)).astype('float32')
self.axis = [1, 2]
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册