diff --git a/paddle/contrib/float16/float16_transpiler.py b/paddle/contrib/float16/float16_transpiler.py index 8d95dc0591e1d6bd815cc697528191c2ee8c1cfe..500f64bed9898fa874cbad2ea69aae05df58023e 100644 --- a/paddle/contrib/float16/float16_transpiler.py +++ b/paddle/contrib/float16/float16_transpiler.py @@ -60,7 +60,7 @@ class Float16Transpiler: raise TypeError("place should be as CPUPlace/CUDAPlace type") if scope is None: scope = global_scope() - if not isinstance(scope, core.Scope): + if not isinstance(scope, core._Scope): raise TypeError("scope should be as Scope type or None") self.scope = scope diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index e3b44499258d288fe5692ca23efe1c4ec234f75c..9872631553056fbabd8be8162ecee16250b33862 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -351,6 +351,23 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_b paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)) paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)) +paddle.fluid.contrib.build_compressor ArgSpec(args=['place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'config'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)) +paddle.fluid.contrib.CompressPass.__init__ ArgSpec(args=['self', 'place', 'data_reader', 'data_feeder', 'scope', 'metrics', 'epoch', 'program_exe'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None)) +paddle.fluid.contrib.CompressPass.add_strategy ArgSpec(args=['self', 'strategy'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.CompressPass.apply ArgSpec(args=['self', 'graph'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.ImitationGraph.__init__ ArgSpec(args=['self', 'program'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.contrib.ImitationGraph.all_parameters ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.SensitivePruneStrategy.__init__ ArgSpec(args=['self', 'pruner', 'start_epoch', 'end_epoch', 'delta_rate', 'acc_loss_threshold', 'sensitivities'], varargs=None, keywords=None, defaults=(None, 0, 10, 0.2, 0.2, None)) +paddle.fluid.contrib.SensitivePruneStrategy.on_batch_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.SensitivePruneStrategy.on_batch_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.SensitivePruneStrategy.on_compress_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.SensitivePruneStrategy.on_compress_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_begin ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.SensitivePruneStrategy.on_epoch_end ArgSpec(args=['self', 'context'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.MagnitudePruner.__init__ ArgSpec(args=['self', 'threshold'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.MagnitudePruner.prune ArgSpec(args=['self', 'param', 'threshold'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.contrib.RatioPruner.__init__ ArgSpec(args=['self', 'ratios'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.contrib.RatioPruner.prune ArgSpec(args=['self', 'param', 'ratio'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.load_persistables_for_increment ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.load_persistables_for_inference ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None) paddle.fluid.contrib.convert_dist_to_sparse_program ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None) @@ -447,11 +464,7 @@ paddle.fluid.unique_name.switch ArgSpec(args=['new_generator'], varargs=None, ke paddle.fluid.unique_name.guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None) paddle.fluid.recordio_writer.convert_reader_to_recordio_file ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)) paddle.fluid.recordio_writer.convert_reader_to_recordio_files ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)) -paddle.fluid.Scope.__init__ __init__(self: paddle.fluid.core.Scope) -> None -paddle.fluid.Scope.drop_kids drop_kids(self: paddle.fluid.core.Scope) -> None -paddle.fluid.Scope.find_var find_var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable -paddle.fluid.Scope.new_scope new_scope(self: paddle.fluid.core.Scope) -> paddle.fluid.core.Scope -paddle.fluid.Scope.var var(self: paddle.fluid.core.Scope, arg0: unicode) -> paddle.fluid.core.Variable +paddle.fluid.Scope Scope() -> paddle.fluid.core._Scope paddle.reader.map_readers ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None) paddle.reader.buffered ArgSpec(args=['reader', 'size'], varargs=None, keywords=None, defaults=None) paddle.reader.compose ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 10a637af445c45baaa013a2a1217c6a23f794638..47936a2ac20c6a51063ee7f30c59decdc41342cb 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -48,10 +48,10 @@ if(WITH_GPU) nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context) add_dependencies(tensor tensor_util) else() - nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context) + nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context ) endif(WIN32) else() - cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context) + cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context ) endif() cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) @@ -84,6 +84,7 @@ cc_library(threadpool SRCS threadpool.cc DEPS enforce) cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool) cc_library(scope SRCS scope.cc DEPS glog threadpool xxhash) +cc_library(scope_pool SRCS scope_pool.cc DEPS scope) cc_test(scope_test SRCS scope_test.cc DEPS scope) cc_library(data_device_transform SRCS data_device_transform.cc DEPS tensor) diff --git a/paddle/fluid/framework/attribute.h b/paddle/fluid/framework/attribute.h index d9c76881b7e98d0b7cd29024b98c8f7720398c66..67054eccb3397ea40f0fb3e2ff2530ee1ea64736 100644 --- a/paddle/fluid/framework/attribute.h +++ b/paddle/fluid/framework/attribute.h @@ -165,7 +165,7 @@ template class GreaterThanChecker { public: explicit GreaterThanChecker(T lower_bound) : lower_bound_(lower_bound) {} - void operator()(T& value) const { + void operator()(const T& value) const { PADDLE_ENFORCE(value > lower_bound_, "larger_than check fails."); } @@ -177,7 +177,7 @@ template class EqualGreaterThanChecker { public: explicit EqualGreaterThanChecker(T lower_bound) : lower_bound_(lower_bound) {} - void operator()(T& value) const { + void operator()(const T& value) const { PADDLE_ENFORCE_GE(value, lower_bound_, "equal_larger_than check fails."); } @@ -193,7 +193,7 @@ class DefaultValueSetter { public: explicit DefaultValueSetter(T default_value) : default_value_(default_value) {} - void operator()(T& value) const { value = default_value_; } // NOLINT + void operator()(T* value) const { *value = default_value_; } private: T default_value_; @@ -203,7 +203,7 @@ template class EnumInContainer { public: explicit EnumInContainer(const std::unordered_set& c) : container_(c) {} - void operator()(T& val) const { + void operator()(const T& val) const { PADDLE_ENFORCE(container_.find(val) != container_.end(), "Value %s is not in enum container %s", val, ContainerDebugString()); @@ -232,7 +232,8 @@ class EnumInContainer { // an attribute can have more than one limits template class TypedAttrChecker { - typedef std::function ValueChecker; + typedef std::function DefaultValueChecker; + typedef std::function ValueChecker; public: explicit TypedAttrChecker(const std::string& attr_name) @@ -268,17 +269,17 @@ class TypedAttrChecker { return *this; } - void operator()(AttributeMap& attr_map) const { // NOLINT - if (!attr_map.count(attr_name_)) { + void operator()(AttributeMap* attr_map) const { + if (!attr_map->count(attr_name_)) { // user do not set this attr PADDLE_ENFORCE(!default_value_setter_.empty(), "Attribute '%s' is required!", attr_name_); // default_value_setter_ has no more than one element T val; - (default_value_setter_[0])(val); - attr_map[attr_name_] = val; + (default_value_setter_[0])(&val); + (*attr_map)[attr_name_] = val; } - Attribute& attr = attr_map.at(attr_name_); + Attribute& attr = attr_map->at(attr_name_); ExtractAttribute extract_attr(attr_name_); T* attr_value = extract_attr(attr); for (const auto& checker : value_checkers_) { @@ -289,12 +290,12 @@ class TypedAttrChecker { private: std::string attr_name_; std::vector value_checkers_; - std::vector default_value_setter_; + std::vector default_value_setter_; }; // check whether op's all attributes fit their own limits class OpAttrChecker { - typedef std::function AttrChecker; + typedef std::function AttrChecker; public: template @@ -304,7 +305,7 @@ class OpAttrChecker { return *(checker.target>()); } - void Check(AttributeMap& attr_map) const { // NOLINT + void Check(AttributeMap* attr_map) const { for (const auto& checker : attr_checkers_) { checker(attr_map); } diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.cc b/paddle/fluid/framework/details/multi_devices_graph_pass.cc index 036cef1daaae4bcd52ffcd40bc0f74ee3840f3b2..7e320a08942e4a9a27e6b5c892a993b3a90c43a4 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_pass.cc @@ -355,7 +355,9 @@ std::unique_ptr MultiDevSSAGraphBuilder::ApplyImpl( BuildStrategy::GradientScaleStrategy::kCustomized) { // TODO(paddle-dev): Why is there no input for this op_handle? auto loss_grad_name = node->Op()->OutputArgumentNames()[0]; - CreateScaleLossGradOp(&result, loss_grad_name, node->outputs[0]); + auto out_dtype = all_vars_.at(loss_grad_name)->GetDataType(); + CreateScaleLossGradOp(&result, loss_grad_name, node->outputs[0], + out_dtype); } // This assumes the backward generating code will ensure IsScaleLossOp // is true only for the op that scale the final scalar loss. @@ -658,13 +660,13 @@ int MultiDevSSAGraphBuilder::GetVarDeviceID( void MultiDevSSAGraphBuilder::CreateScaleLossGradOp( ir::Graph *result, const std::string &loss_grad_name, - ir::Node *out_var_node) const { + ir::Node *out_var_node, proto::VarType::Type dtype) const { for (size_t i = 0; i < places_.size(); ++i) { // Insert ScaleCost OpHandle auto *dev_ctx = platform::DeviceContextPool::Instance().Get(places_[i]); auto *op_handle = new ScaleLossGradOpHandle( result->CreateEmptyNode("scale_loss_grad", ir::Node::Type::kOperation), - local_scopes_.size(), local_scopes_[i], places_[i], dev_ctx); + local_scopes_.size(), local_scopes_[i], places_[i], dev_ctx, dtype); result->Get(kGraphOps).emplace_back(op_handle); // FIXME: Currently ScaleLossGradOp only use device_count as scale diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.h b/paddle/fluid/framework/details/multi_devices_graph_pass.h index 0556232aa4754cd123a85a4aa3dce8b3f4c57b08..5736102ddc13418446013307cf8204b677f960dc 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_pass.h +++ b/paddle/fluid/framework/details/multi_devices_graph_pass.h @@ -68,7 +68,8 @@ class MultiDevSSAGraphBuilder : public ir::Pass { void CreateScaleLossGradOp(ir::Graph *result, const std::string &loss_grad_name, - ir::Node *out_var_node) const; + ir::Node *out_var_node, + proto::VarType::Type dtype) const; VarHandle *CreateReduceOp(ir::Graph *result, const std::string &og, int dst_dev_id) const; diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc index ef1626599795a553e654fe5d3ed74ef3a3a67d78..e1b8e8fe05f0615d689e78d9c405cc5d76d2abb1 100644 --- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc @@ -22,39 +22,66 @@ namespace details { ScaleLossGradOpHandle::ScaleLossGradOpHandle(ir::Node *node, size_t num_dev, Scope *scope, platform::Place place, - platform::DeviceContext *dev_ctx) + platform::DeviceContext *dev_ctx, + proto::VarType::Type dtype) : OpHandleBase(node), coeff_(static_cast(1.0 / num_dev)), scope_(scope), - place_(place) { + place_(place), + out_dtype_(dtype) { this->SetDeviceContext(place_, dev_ctx); } ScaleLossGradOpHandle::~ScaleLossGradOpHandle() {} +struct ScaleLossGradFunctor { + float coeff_; + Tensor *out_; + platform::Place place_; + OpHandleBase *op_handle_; + proto::VarType::Type out_dtype_; + platform::DeviceContext *ctx_; + + ScaleLossGradFunctor(float coeff, Tensor *out, platform::Place place, + OpHandleBase *op_handle, proto::VarType::Type dtype, + platform::DeviceContext *ctx) + : coeff_(coeff), out_(out), place_(place), out_dtype_(dtype), ctx_(ctx) {} + + template + void apply() const { + auto *out_data = out_->mutable_data(place_); + if (platform::is_cpu_place(place_)) { + *out_data = static_cast(coeff_); + } else { +#ifdef PADDLE_WITH_CUDA + OutT cast_coeff = static_cast(coeff_); + auto stream = static_cast(ctx_)->stream(); + memory::Copy(boost::get(place_), out_data, + platform::CPUPlace(), &cast_coeff, SizeOfType(out_dtype_), + stream); + VLOG(10) << place_ << "RUN Scale loss grad op"; + +#endif + } + } +}; + void ScaleLossGradOpHandle::RunImpl() { // Doesn't wait any event std::string var_name = static_cast(this->outputs_[0])->name_; auto &local_scope = *scope_->FindVar(kLocalExecScopeName)->Get(); - float *tmp = local_scope.FindVar(var_name) - ->GetMutable() - ->mutable_data(make_ddim({1}), place_); + auto *tensor = local_scope.FindVar(var_name)->GetMutable(); + tensor->Resize(make_ddim({1})); - if (platform::is_cpu_place(place_)) { - *tmp = coeff_; - } else { #ifdef PADDLE_WITH_CUDA - this->RunAndRecordEvent([&] { - auto stream = static_cast( - this->dev_ctxes_.at(place_)) - ->stream(); - memory::Copy(boost::get(place_), tmp, - platform::CPUPlace(), &coeff_, sizeof(float), stream); - VLOG(10) << place_ << "RUN Scale loss grad op"; - }); + ScaleLossGradFunctor func(coeff_, tensor, place_, this, out_dtype_, + this->dev_ctxes_.at(place_)); + this->RunAndRecordEvent([&] { framework::VisitDataType(out_dtype_, func); }); +#else + ScaleLossGradFunctor func(coeff_, tensor, place_, this, out_dtype_, nullptr); + framework::VisitDataType(out_dtype_, func); #endif - } } std::string ScaleLossGradOpHandle::Name() const { return "Scale LossGrad"; } diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.h b/paddle/fluid/framework/details/scale_loss_grad_op_handle.h index 523b55724c82d4e2bef0520c10e5708c952a3ecc..8bedd1643eb9c5e591fa3c40995fcba08980b9fa 100644 --- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.h +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.h @@ -26,8 +26,8 @@ namespace details { struct ScaleLossGradOpHandle : public OpHandleBase { ScaleLossGradOpHandle(ir::Node *node, size_t num_dev, Scope *scope, - platform::Place place, - platform::DeviceContext *context); + platform::Place place, platform::DeviceContext *context, + proto::VarType::Type dtype); ~ScaleLossGradOpHandle() final; @@ -40,6 +40,7 @@ struct ScaleLossGradOpHandle : public OpHandleBase { float coeff_; Scope *scope_; platform::Place place_; + proto::VarType::Type out_dtype_; }; } // namespace details diff --git a/paddle/fluid/framework/ngraph_bridge.cc b/paddle/fluid/framework/ngraph_bridge.cc index 5fcb17b9f3ac390548aba33db7d0b8350cde7e00..42190b52289bfc6fc510f13cb5190a0d3e03b836 100644 --- a/paddle/fluid/framework/ngraph_bridge.cc +++ b/paddle/fluid/framework/ngraph_bridge.cc @@ -31,10 +31,12 @@ std::map>>)>> NgraphBridge::NG_NODE_MAP = { + {"fill_constant", paddle::operators::ngraphs::BuildFillConstantNode}, {"mul", paddle::operators::ngraphs::BuildMulNode}, {"mul_grad", paddle::operators::ngraphs::BuildMulGradNode}, {"relu", paddle::operators::ngraphs::BuildUnaryNode}, - {"tanh", paddle::operators::ngraphs::BuildUnaryNode}}; + {"tanh", paddle::operators::ngraphs::BuildUnaryNode}, + {"top_k", paddle::operators::ngraphs::BuildTopKNode}}; void NgraphBridge::BuildNgNode(const std::shared_ptr& op) { auto& op_type = op->Type(); diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 2fe1c94ec02e8ff0a4acb81868ba2124ea89e506..0e7b0cbeb98f3b6bbf0b37f507fc6022be692bb1 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -643,7 +643,7 @@ void OpDesc::CheckAttrs() { // not by users. return; } - checker->Check(attrs_); + checker->Check(&attrs_); } void OpDesc::InferShape(const BlockDesc &block) const { diff --git a/paddle/fluid/framework/op_proto_maker.cc b/paddle/fluid/framework/op_proto_maker.cc index ca31303f77c4a30eb64c43404e214779ea78aeaf..2311614c335a56501ac777d787f6653659294765 100644 --- a/paddle/fluid/framework/op_proto_maker.cc +++ b/paddle/fluid/framework/op_proto_maker.cc @@ -82,6 +82,10 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto, AddAttr(OpNamescopeAttrName(), "Operator name with namesope.") .SetDefault(""); + AddAttr>(OpCreationCallstackAttrName(), + "Callstack for Op Creatation.") + .SetDefault({}); + Validate(); } diff --git a/paddle/fluid/framework/op_proto_maker.h b/paddle/fluid/framework/op_proto_maker.h index 4c59c73d8779eceb267ad532aabccabbd54b0df2..0a0f8f4655bc34cdb25205ff6eaec9f96c801ebd 100644 --- a/paddle/fluid/framework/op_proto_maker.h +++ b/paddle/fluid/framework/op_proto_maker.h @@ -47,6 +47,7 @@ class OpProtoAndCheckerMaker { static const char *OpRoleAttrName() { return "op_role"; } static const char *OpRoleVarAttrName() { return "op_role_var"; } static const char *OpNamescopeAttrName() { return "op_namescope"; } + static const char *OpCreationCallstackAttrName() { return "op_callstack"; } void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker); diff --git a/paddle/fluid/framework/op_registry.cc b/paddle/fluid/framework/op_registry.cc index bfc411ca2c4a483e344b368da089392d8e4a87c1..346d14d408ea1ed2cfbdbed5f48e56902e6e95b2 100644 --- a/paddle/fluid/framework/op_registry.cc +++ b/paddle/fluid/framework/op_registry.cc @@ -24,7 +24,7 @@ std::unique_ptr OpRegistry::CreateOp( const VariableNameMap& outputs, AttributeMap attrs) { auto& info = OpInfoMap::Instance().Get(type); if (info.Checker() != nullptr) { - info.Checker()->Check(attrs); + info.Checker()->Check(&attrs); } auto op = info.Creator()(type, inputs, outputs, attrs); return std::unique_ptr(op); diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 0f5088103aa2fb19be275a62ea7ab13a7f6f4e59..2b2764db01257208002e0efd4a71cc10850c4260 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -16,10 +16,15 @@ limitations under the License. */ #include #include - +#include +#include +#include +#include "gflags/gflags.h" +#include "glog/logging.h" #include "paddle/fluid/framework/data_transform.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/shape_inference.h" #include "paddle/fluid/framework/transfer_scope_cache.h" @@ -157,27 +162,59 @@ RuntimeContext::RuntimeContext(const VariableNameMap& innames, } void OperatorBase::Run(const Scope& scope, const platform::Place& place) { - VLOG(4) << place << " " << DebugStringEx(&scope); - if (platform::is_gpu_place(place)) { + try { + if (VLOG_IS_ON(4)) { + VLOG(4) << place << " " << DebugStringEx(&scope); + } + if (platform::is_gpu_place(place)) { #ifndef PADDLE_WITH_CUDA - PADDLE_THROW("Cannot run operator on place %s", place); + PADDLE_THROW("Cannot run operator on place %s", place); #else - auto dev_id = boost::get(place).device; - platform::SetDeviceId(dev_id); + auto dev_id = boost::get(place).device; + platform::SetDeviceId(dev_id); #endif - } + } - // The profile has a process-wide mutex, results in serious performance issue - // in concurrency scenerio. Here use an `if` to fix this issue. - // Please not remove the `if`, ask @Superjomn if there are any concern. - if (platform::IsProfileEnabled()) { - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - platform::RecordEvent record_event(Type(), pool.Get(place)); - RunImpl(scope, place); - } else { - RunImpl(scope, place); + // The profile has a process-wide mutex, results in serious performance + // issue + // in concurrency scenerio. Here use an `if` to fix this issue. + // Please not remove the `if`, ask @Superjomn if there are any concern. + if (platform::IsProfileEnabled()) { + platform::DeviceContextPool& pool = + platform::DeviceContextPool::Instance(); + platform::RecordEvent record_event(Type(), pool.Get(place)); + RunImpl(scope, place); + } else { + RunImpl(scope, place); + } + + if (VLOG_IS_ON(3)) { + VLOG(3) << place << " " << DebugStringEx(&scope); + } + } catch (platform::EnforceNotMet exception) { + if (Attrs().count("sub_block") != 0) { + throw exception; + } + + auto& callstack = Attr>( + OpProtoAndCheckerMaker::OpCreationCallstackAttrName()); + + if (callstack.empty()) { + throw exception; + } + std::ostringstream sout; + sout << "Invoke operator " << Type() << " error.\n"; + sout << "Python Callstacks: \n"; + for (auto& line : callstack) { + sout << line; + } + sout << "C++ Callstacks: \n"; + sout << exception.err_str_; + exception.err_str_ = sout.str(); + throw exception; + } catch (...) { + std::rethrow_exception(std::current_exception()); } - VLOG(3) << place << " " << DebugStringEx(&scope); } bool OperatorBase::HasInputs(const std::string& name) const { @@ -1057,8 +1094,8 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType( t = &(var->Get().value()); } if (t != nullptr) { - PADDLE_ENFORCE(t->IsInitialized(), "Input %s is not initialized: %s", - ipt_name, DebugString()); + PADDLE_ENFORCE(t->IsInitialized(), "Input %s is not initialized", + ipt_name); int tmp = static_cast(t->type()); PADDLE_ENFORCE( tmp == data_type || data_type == -1, diff --git a/paddle/fluid/framework/scope_pool.cc b/paddle/fluid/framework/scope_pool.cc new file mode 100644 index 0000000000000000000000000000000000000000..5cb241a7a341d793d8450f0c9cde3929acef8965 --- /dev/null +++ b/paddle/fluid/framework/scope_pool.cc @@ -0,0 +1,54 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/scope_pool.h" +#include "paddle/fluid/framework/threadpool.h" + +namespace paddle { +namespace framework { + +ScopePool &ScopePool::Instance() { // NOLINT + static ScopePool pool; + return pool; +} + +void ScopePool::DeleteScope(Scope *scope) { delete scope; } + +void ScopePool::Insert(std::unique_ptr &&s) { + std::lock_guard guard(mtx_); + scopes_.insert(s.release()); +} + +void ScopePool::Remove(Scope *s) { + size_t has_scope; + { + std::lock_guard guard(mtx_); + has_scope = scopes_.erase(s); + } + PADDLE_ENFORCE(has_scope > 0, "Delete non-existing global scope"); + DeleteScope(s); +} + +ScopePool::~ScopePool() { Clear(); } + +void ScopePool::Clear() { + std::lock_guard guard(mtx_); + for (auto *s : scopes_) { + DeleteScope(s); + } + scopes_.clear(); +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/scope_pool.h b/paddle/fluid/framework/scope_pool.h new file mode 100644 index 0000000000000000000000000000000000000000..a8b468699abe148d44a395cf888158aefab4380b --- /dev/null +++ b/paddle/fluid/framework/scope_pool.h @@ -0,0 +1,46 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include // NOLINT +#include +#include "paddle/fluid/framework/scope.h" + +namespace paddle { +namespace framework { + +class ScopePool { + public: + static ScopePool &Instance(); // NOLINT + + void Insert(std::unique_ptr &&s); + + void Remove(Scope *s); + + void Clear(); + + ~ScopePool(); + + private: + ScopePool() = default; + + static void DeleteScope(Scope *scope); + + std::unordered_set scopes_; + std::mutex mtx_; +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/tensor.cc b/paddle/fluid/framework/tensor.cc index 5b09cad06c3f87ce29a8c986d30217099bd10d74..ef096c2b810187c50fbcde7d93d9e5a2ecd8b0f3 100644 --- a/paddle/fluid/framework/tensor.cc +++ b/paddle/fluid/framework/tensor.cc @@ -28,8 +28,7 @@ void Tensor::check_memory_size() const { "or maybe the required data-type mismatches the data already stored."); } -Tensor::Tensor(std::type_index type) - : type_(framework::ToDataType(type)), offset_(0) {} +Tensor::Tensor(const proto::VarType::Type& dtype) : type_(dtype), offset_(0) {} size_t Tensor::memory_size() const { return holder_ == nullptr ? 0UL : holder_->size() - offset_; diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 2e110133a33ede5c58779f9f7c52abd8e74c2fa0..40606d9b06baf4dbebf87f3c02580e49ae6e2a70 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -69,7 +69,7 @@ class Tensor { public: Tensor() : type_(proto::VarType::FP32), offset_(0) {} - explicit Tensor(std::type_index type); + explicit Tensor(const proto::VarType::Type&); /*! Return a pointer to mutable memory block. */ template diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index cab6d9b67e4e64335be0a386bfffb7ebe4373b3e..871c7bd2a77d1cc5057177619b5cd7b2083ff308 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/temporary_allocator.h" namespace paddle { namespace framework { @@ -151,5 +152,26 @@ void TensorToVector(const Tensor& src, std::vector* dst) { src_ptr, size); } +template +paddle::framework::Tensor GetTensor( + memory::allocation::AllocationPtr temp_allocation_ptr, + const framework::DDim& dim) { + auto& deleter = temp_allocation_ptr.get_deleter(); + auto* allocation_ptr = temp_allocation_ptr.release(); + auto shared_allocation = + std::shared_ptr(allocation_ptr, deleter); + + PADDLE_ENFORCE( + dynamic_cast(allocation_ptr) != nullptr, + "The AllocationPtr must be TemporaryAllocation."); + PADDLE_ENFORCE_EQ(allocation_ptr->size(), + framework::product(dim) * sizeof(T)); + + paddle::framework::Tensor temp_tensor( + framework::ToDataType(std::type_index(typeid(T)))); + temp_tensor.Resize(dim); + temp_tensor.ResetHolder(std::move(shared_allocation)); + return temp_tensor; +} } // namespace framework } // namespace paddle diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index c751e8515829d06970c55f097f50de8bf33ee2a4..3937884ce4a5a16a1093ac8977033eaa98b2678e 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -231,11 +231,14 @@ bool AnalysisPredictor::SetFeed(const std::vector &inputs, inputs[i].data.length()); } else { #ifdef PADDLE_WITH_CUDA + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto *dev_ctx = + static_cast(pool.Get(place_)); auto dst_gpu_place = boost::get(place_); memory::Copy(dst_gpu_place, static_cast(input_ptr), platform::CPUPlace(), inputs[i].data.data(), - inputs[i].data.length(), - 0); // stream 0 for sync copy + inputs[i].data.length(), dev_ctx->stream()); #else PADDLE_THROW("Not compile with CUDA, should not reach here."); #endif diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 3d121e046004dfe6fc6953e0b23852b9ecda5c1b..102147a493ed1454db1a78124200f163f68e555b 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -208,11 +208,14 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, inputs[i].data.length()); } else { #ifdef PADDLE_WITH_CUDA + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto *dev_ctx = + static_cast(pool.Get(place_)); auto dst_gpu_place = boost::get(place_); memory::Copy(dst_gpu_place, static_cast(input_ptr), platform::CPUPlace(), inputs[i].data.data(), - inputs[i].data.length(), - 0); // stream 0 for sync copy + inputs[i].data.length(), dev_ctx->stream()); #else PADDLE_THROW("Not compile with CUDA, should not reach here."); #endif diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 46ce61b73611d05369f90e7d8f97e9b6724b860f..9aa9db031cd46c9d537bd686f0b23f4c9ae71de6 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -75,6 +75,11 @@ set(LAC_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/lac") download_model_and_data(${LAC_INSTALL_DIR} "lac_model.tar.gz" "lac_data.txt.tar.gz") inference_analysis_api_test(test_analyzer_lac ${LAC_INSTALL_DIR} analyzer_lac_tester.cc) +# MM DNN +set(MM_DNN_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/mm_dnn") +download_model_and_data(${MM_DNN_INSTALL_DIR} "MM_DNN_model.tar.gz" "MM_DNN_data.txt.tar.gz") +inference_analysis_api_test(test_analyzer_mm_dnn ${MM_DNN_INSTALL_DIR} analyzer_mm_dnn_tester.cc) + # text_classification set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classification") download_model_and_data(${TEXT_CLASSIFICATION_INSTALL_DIR} "text-classification-Senta.tar.gz" "text_classification_data.txt.tar.gz") @@ -103,6 +108,10 @@ inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose inference_analysis_api_test_with_fake_data(test_analyzer_resnet50 "${INFERENCE_DEMO_INSTALL_DIR}/resnet50" analyzer_resnet50_tester.cc "resnet50_model.tar.gz") +# seq_pool1 +inference_analysis_api_test_with_fake_data(test_analyzer_seq_pool1 +"${INFERENCE_DEMO_INSTALL_DIR}/seq_pool1" analyzer_seq_pool1_tester.cc "seq_pool1.tar.gz") + # mobilenet with depthwise_conv op inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_conv "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz") diff --git a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..8aaab6d6649e1d4b6db7695df0e9dd219c89422c --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc @@ -0,0 +1,178 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { +using contrib::AnalysisConfig; + +struct DataRecord { + std::vector> query_data_all, title_data_all; + std::vector lod1, lod2; + size_t batch_iter{0}; + size_t batch_size{1}; + size_t num_samples; // total number of samples + DataRecord() = default; + explicit DataRecord(const std::string &path, int batch_size = 1) + : batch_size(batch_size) { + Load(path); + } + DataRecord NextBatch() { + DataRecord data; + size_t batch_end = batch_iter + batch_size; + // NOTE skip the final batch, if no enough data is provided. + if (batch_end <= query_data_all.size()) { + data.query_data_all.assign(query_data_all.begin() + batch_iter, + query_data_all.begin() + batch_end); + data.title_data_all.assign(title_data_all.begin() + batch_iter, + title_data_all.begin() + batch_end); + // Prepare LoDs + data.lod1.push_back(0); + data.lod2.push_back(0); + CHECK(!data.query_data_all.empty()); + CHECK(!data.title_data_all.empty()); + CHECK_EQ(data.query_data_all.size(), data.title_data_all.size()); + for (size_t j = 0; j < data.query_data_all.size(); j++) { + // calculate lod + data.lod1.push_back(data.lod1.back() + data.query_data_all[j].size()); + data.lod2.push_back(data.lod2.back() + data.title_data_all[j].size()); + } + } + batch_iter += batch_size; + return data; + } + void Load(const std::string &path) { + std::ifstream file(path); + std::string line; + int num_lines = 0; + while (std::getline(file, line)) { + num_lines++; + std::vector data; + split(line, '\t', &data); + // load query data + std::vector query_data; + split_to_int64(data[0], ' ', &query_data); + // load title data + std::vector title_data; + split_to_int64(data[1], ' ', &title_data); + query_data_all.push_back(std::move(query_data)); + title_data_all.push_back(std::move(title_data)); + } + num_samples = num_lines; + } +}; + +void PrepareInputs(std::vector *input_slots, DataRecord *data, + int batch_size) { + PaddleTensor lod_query_tensor, lod_title_tensor; + lod_query_tensor.name = "left"; + lod_title_tensor.name = "right"; + auto one_batch = data->NextBatch(); + int size1 = one_batch.lod1[one_batch.lod1.size() - 1]; // token batch size + int size2 = one_batch.lod2[one_batch.lod2.size() - 1]; // token batch size + lod_query_tensor.shape.assign({size1, 1}); + lod_query_tensor.lod.assign({one_batch.lod1}); + lod_title_tensor.shape.assign({size2, 1}); + lod_title_tensor.lod.assign({one_batch.lod2}); + // assign data + TensorAssignData(&lod_query_tensor, one_batch.query_data_all); + TensorAssignData(&lod_title_tensor, one_batch.title_data_all); + // Set inputs. + input_slots->assign({lod_query_tensor, lod_title_tensor}); + for (auto &tensor : *input_slots) { + tensor.dtype = PaddleDType::INT64; + } +} + +void SetConfig(contrib::AnalysisConfig *cfg) { + cfg->model_dir = FLAGS_infer_model; + cfg->use_gpu = false; + cfg->device = 0; + cfg->specify_input_name = true; + cfg->enable_ir_optim = true; +} + +void SetInput(std::vector> *inputs) { + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); + std::vector input_slots; + int epoch = FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1; + LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size; + for (int bid = 0; bid < epoch; ++bid) { + PrepareInputs(&input_slots, &data, FLAGS_batch_size); + (*inputs).emplace_back(input_slots); + } +} + +// Easy for profiling independently. +TEST(Analyzer_MM_DNN, profile) { + contrib::AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; + + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(reinterpret_cast(&cfg), + input_slots_all, &outputs, FLAGS_num_threads); + + if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { + PADDLE_ENFORCE_EQ(outputs.size(), 2UL); + for (auto &output : outputs) { + size_t size = GetSize(output); + PADDLE_ENFORCE_GT(size, 0); + float *result = static_cast(output.data.data()); + // output is probability, which is in (-1, 1). + for (size_t i = 0; i < size; i++) { + EXPECT_GT(result[i], -1); + EXPECT_LT(result[i], 1); + } + } + } +} + +// Check the fuse status +TEST(Analyzer_MM_DNN, fuse_statis) { + contrib::AnalysisConfig cfg; + SetConfig(&cfg); + + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); +} + +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_MM_DNN, compare) { + contrib::AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis( + reinterpret_cast(&cfg), input_slots_all); +} + +// Compare Deterministic result +TEST(Analyzer_MM_DNN, compare_determine) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareDeterministic(reinterpret_cast(&cfg), + input_slots_all); +} + +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..2ae840fd11f627d845a20a59ab14118516311d22 --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc @@ -0,0 +1,117 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { + +void SetConfig(AnalysisConfig *cfg) { + cfg->param_file = FLAGS_infer_model + "/params"; + cfg->prog_file = FLAGS_infer_model + "/model"; + cfg->use_gpu = false; + cfg->device = 0; + cfg->enable_ir_optim = true; + cfg->specify_input_name = true; + cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads); +} + +void SetInput(std::vector> *inputs) { + std::vector feed_names = { + "slot10000_embed", "slot10001_embed", "slot10004_embed", + "slot10005_embed", "slot10008_embed", "slot10009_embed", + "slot10012_embed", "slot10013_embed", "slot10108_embed", + "slot13324_embed", "slot13325_embed", "slot13326_embed", + "slot13327_embed", "slot13328_embed", "slot13329_embed", + "slot13330_embed", "slot13331_embed", "slot15501_embed", + "slot15502_embed", "slot15503_embed", "slot15504_embed", + "slot15505_embed", "slot15506_embed", "slot15507_embed", + "slot15508_embed", "slot15516_embed", "slot15519_embed", + "slot15523_embed", "slot15531_embed", "slot15533_embed", + "slot15548_embed", "slot15564_embed", "slot15565_embed", + "slot15566_embed", "slot15570_embed", "slot15571_embed", + "slot15572_embed", "slot15573_embed", "slot15574_embed", + "slot15575_embed", "slot15576_embed", "slot15577_embed", + "slot15579_embed", "slot15581_embed", "slot15582_embed", + "slot15583_embed", "slot15584_embed", "slot5016_embed", + "slot5021_embed", "slot6002_embed", "slot6003_embed", + "slot6004_embed", "slot6005_embed", "slot6006_embed", + "slot6007_embed", "slot6008_embed", "slot6009_embed", + "slot6011_embed", "slot6014_embed", "slot6015_embed", + "slot6023_embed", "slot6024_embed", "slot6025_embed", + "slot6027_embed", "slot6029_embed", "slot6031_embed", + "slot6034_embed", "slot6035_embed", "slot6036_embed", + "slot6037_embed", "slot6039_embed", "slot6048_embed", + "slot6050_embed", "slot6058_embed", "slot6059_embed", + "slot6060_embed", "slot6066_embed", "slot6067_embed", + "slot6068_embed", "slot6069_embed", "slot6070_embed", + "slot6071_embed", "slot6072_embed", "slot6073_embed", + "slot6182_embed", "slot6183_embed", "slot6184_embed", + "slot6185_embed", "slot6186_embed", "slot6188_embed", + "slot6189_embed", "slot6190_embed", "slot6201_embed", + "slot6202_embed", "slot6203_embed", "slot6247_embed", + "slot6248_embed", "slot6250_embed", "slot6251_embed", + "slot6807_embed", "slot6808_embed", "slot6809_embed", + "slot6810_embed", "slot6811_embed", "slot6812_embed", + "slot6813_embed", "slot6814_embed", "slot6815_embed", + "slot6816_embed", "slot6817_embed", "slot6818_embed", + "slot6819_embed", "slot6820_embed", "slot6822_embed", + "slot6823_embed", "slot6826_embed", "slot7002_embed", + "slot7003_embed", "slot7004_embed", "slot7005_embed", + "slot7006_embed", "slot7008_embed", "slot7009_embed", + "slot7010_embed", "slot7011_embed", "slot7013_embed", + "slot7014_embed", "slot7015_embed", "slot7016_embed", + "slot7017_embed", "slot7019_embed", "slot7100_embed", + "slot7506_embed", "slot7507_embed", "slot7514_embed", + "slot7515_embed", "slot7516_embed"}; + SetFakeImageInput(inputs, FLAGS_infer_model, true, "model", "params", + &feed_names); +} + +// Easy for profiling independently. +void profile(bool use_mkldnn = false) { + AnalysisConfig cfg; + SetConfig(&cfg); + + if (use_mkldnn) { + cfg.EnableMKLDNN(); + } + std::vector outputs; + + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(reinterpret_cast(&cfg), + input_slots_all, &outputs, FLAGS_num_threads); +} + +TEST(Analyzer_seq_pool1, profile) { profile(); } + +// Check the fuse status +TEST(Analyzer_seq_pool1, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); + LOG(INFO) << "num_ops: " << num_ops; + EXPECT_EQ(num_ops, 314); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index b0c8f395ce05fbfceaec3d8b69367292eca714e4..ef7e2198c5db4e723406fe1a31d2c3dde5121931 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -132,7 +132,8 @@ std::unordered_map GetFuseStatis(PaddlePredictor *predictor, void SetFakeImageInput(std::vector> *inputs, const std::string &dirname, bool is_combined = true, std::string model_filename = "model", - std::string params_filename = "params") { + std::string params_filename = "params", + const std::vector *feed_names = nullptr) { // Set fake_image_data PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data."); std::vector> feed_target_shapes = GetFeedTargetShapes( @@ -146,26 +147,32 @@ void SetFakeImageInput(std::vector> *inputs, os << "}\n"; } LOG(INFO) << os.str(); - - int dim1 = feed_target_shapes[0][1]; - int dim2 = feed_target_shapes[0][2]; - int dim3 = feed_target_shapes[0][3]; - - PaddleTensor input; - std::vector shape({FLAGS_batch_size, dim1, dim2, dim3}); - input.shape = shape; - input.dtype = PaddleDType::FLOAT32; - - // fill input data, for profile easily, do not use random data here. - size_t size = FLAGS_batch_size * dim1 * dim2 * dim3; - input.data.Resize(size * sizeof(float)); - float *input_data = static_cast(input.data.data()); - for (size_t i = 0; i < size; i++) { - *(input_data + i) = static_cast(i) / size; + if (feed_names) { + PADDLE_ENFORCE_EQ(feed_names->size(), feed_target_shapes.size()); + } + std::vector input_slots(feed_target_shapes.size()); + for (size_t i = 0; i < feed_target_shapes.size(); ++i) { + const auto &feed_shape = feed_target_shapes[i]; + auto &input = input_slots[i]; + std::vector shape({FLAGS_batch_size}); + for (size_t s = 1; s < feed_shape.size(); ++s) { + shape.push_back(static_cast(feed_shape[s])); + } + if (feed_names) { + input.name = (*feed_names)[i]; + } + input.shape = shape; + input.dtype = PaddleDType::FLOAT32; + size_t len = std::accumulate(shape.begin(), shape.end(), 1, + [](int a, int b) { return a * b; }); + input.data.Resize(len * sizeof(float)); + input.lod.assign({{0, static_cast(FLAGS_batch_size)}}); + float *input_data = static_cast(input.data.data()); + // fill input data, for profile easily, do not use random data here. + for (size_t j = 0; j < len; ++j) { + *(input_data + j) = static_cast(j) / len; + } } - - std::vector input_slots; - input_slots.assign({input}); (*inputs).emplace_back(input_slots); } diff --git a/paddle/fluid/operators/conv_op.h b/paddle/fluid/operators/conv_op.h index 4a7b31c7d491f0e4b73e2b574456d1567b7cc5dc..2519f5e7acdb7828743c6e114adfe5e530058406 100644 --- a/paddle/fluid/operators/conv_op.h +++ b/paddle/fluid/operators/conv_op.h @@ -18,11 +18,11 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/depthwise_conv.h" #include "paddle/fluid/operators/math/im2col.h" #include "paddle/fluid/operators/math/vol2col.h" -#include "paddle/fluid/platform/create_tensor_with_allocationptr.h" namespace paddle { namespace operators { @@ -161,10 +161,7 @@ class GemmConvKernel : public framework::OpKernel { auto tmp_allocation_ptr = platform::DeviceTemporaryAllocator::Instance().Get(dev_ctx).Allocate( framework::product(col_shape) * sizeof(T)); - Tensor tep_tensor = - platform::GetTensor(std::move(tmp_allocation_ptr), col_shape); - - col.ShareDataWith(tep_tensor); + col = framework::GetTensor(std::move(tmp_allocation_ptr), col_shape); col_matrix.ShareDataWith(col); col_matrix.Resize(col_matrix_shape); } @@ -299,10 +296,7 @@ class GemmConvGradKernel : public framework::OpKernel { auto tmp_allocation_ptr = platform::DeviceTemporaryAllocator::Instance().Get(dev_ctx).Allocate( framework::product(col_shape) * sizeof(T)); - Tensor tep_tensor = - platform::GetTensor(std::move(tmp_allocation_ptr), col_shape); - - col.ShareDataWith(tep_tensor); + col = framework::GetTensor(std::move(tmp_allocation_ptr), col_shape); col_matrix.ShareDataWith(col); col_matrix.Resize(col_matrix_shape); } diff --git a/paddle/fluid/operators/dequantize_mkldnn_op.cc b/paddle/fluid/operators/dequantize_mkldnn_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..262b7408a7f5f65c4d97120914c16f38ce5fdbe7 --- /dev/null +++ b/paddle/fluid/operators/dequantize_mkldnn_op.cc @@ -0,0 +1,88 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "mkldnn.hpp" +#include "paddle/fluid/framework/data_layout_transform.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/operators/dequantize_op.h" +#include "paddle/fluid/platform/mkldnn_helper.h" + +namespace paddle { +namespace operators { + +using mkldnn::memory; +using mkldnn::primitive; +using mkldnn::reorder; +using platform::to_void_cast; +using Tensor = framework::Tensor; +using framework::DataLayout; +using mkldnn::stream; +using platform::GetMKLDNNFormat; + +template +class DeQuantOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("Input"); + auto scale_data = ctx.Attr("Scale"); + auto* output = ctx.Output("Output"); + auto& dev_ctx = + ctx.template device_context(); + const auto& engine = dev_ctx.GetEngine(); + + const T* input_data = input->data(); + float* output_data = output->mutable_data(ctx.GetPlace()); + std::vector reorder_scale = {1.0f / scale_data}; + + std::vector pipeline; + std::vector src_tz = paddle::framework::vectorize2int(input->dims()); + std::vector dst_tz = paddle::framework::vectorize2int(output->dims()); + mkldnn::memory::data_type src_dt = + paddle::framework::ToMKLDNNDataType(input->type()); + mkldnn::memory::format src_fmt = input->format(); + + mkldnn::primitive_attr attri; + int mask = 0; + attri.set_output_scales(mask, reorder_scale); + + auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt); + auto src_pd = mkldnn::memory::primitive_desc(src_md, engine); + auto src_memory = + std::make_shared(src_pd, to_void_cast(input_data)); + std::shared_ptr src_memory_p = + std::shared_ptr(new primitive::at(*src_memory)); + + auto dst_md = platform::MKLDNNMemDesc({dst_tz}, memory::data_type::f32, + memory::format::nchw); + auto dst_pd = mkldnn::memory::primitive_desc(dst_md, engine); + auto dst_memory = mkldnn::memory(dst_pd, to_void_cast(output_data)); + + auto reorder_pd = std::shared_ptr( + new reorder::primitive_desc(src_pd, dst_pd, attri)); + auto reorder_p = std::shared_ptr( + new reorder(*reorder_pd, *src_memory_p, dst_memory)); + pipeline.push_back(*reorder_p); + stream(stream::kind::eager).submit(pipeline).wait(); + + output->set_format(GetMKLDNNFormat(dst_memory)); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_KERNEL(dequantize, MKLDNN, ::paddle::platform::CPUPlace, + ops::DeQuantOpKernel, ops::DeQuantOpKernel); diff --git a/paddle/fluid/operators/dequantize_op.cc b/paddle/fluid/operators/dequantize_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..38159f84a0d56f45cfef233a3c70c3c6cef17d9f --- /dev/null +++ b/paddle/fluid/operators/dequantize_op.cc @@ -0,0 +1,45 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/dequantize_op.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + +namespace paddle { +namespace operators { + +framework::OpKernelType DeQuantOp::GetExpectedKernelType( + const framework::ExecutionContext& ctx) const { + framework::LibraryType library_ = framework::LibraryType::kMKLDNN; + framework::DataLayout layout_ = framework::DataLayout::kMKLDNN; + + return framework::OpKernelType(ctx.Input("Input")->type(), + ctx.GetPlace(), layout_, library_); +} + +void DeQuantOpMaker::Make() { + AddInput("Input", "input data"); + AddOutput("Output", "output data"); + AddAttr("Scale", "scale data").SetDefault({1.0f}); + AddComment(R"DOC(This op will dequantize data from INT8 to FP32)DOC"); +} + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(dequantize, ops::DeQuantOp, ops::DeQuantOpMaker, + paddle::framework::DefaultGradOpDescMaker); diff --git a/paddle/fluid/operators/dequantize_op.h b/paddle/fluid/operators/dequantize_op.h new file mode 100644 index 0000000000000000000000000000000000000000..75c27a06c210f2d0e4d7cf52aa16f4c123f8ad8e --- /dev/null +++ b/paddle/fluid/operators/dequantize_op.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using framework::OpKernelType; +using framework::Tensor; + +class DeQuantOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + ctx->SetOutputDim("Output", ctx->GetInputDim("Input")); + ctx->ShareLoD("Input", /*->*/ "Output"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; +}; + +class DeQuantOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override; +}; + +class DeQuantGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override {} +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/detection/density_prior_box_op.cu b/paddle/fluid/operators/detection/density_prior_box_op.cu index 6a92762896b89a06a91cd11fb38587f7df69e6c3..acd5993154ed03f206f20082231feb5059ef32e1 100644 --- a/paddle/fluid/operators/detection/density_prior_box_op.cu +++ b/paddle/fluid/operators/detection/density_prior_box_op.cu @@ -142,12 +142,13 @@ class DensityPriorBoxOpCUDAKernel : public framework::OpKernel { vars->mutable_data(ctx.GetPlace()); framework::Tensor d_temp; - framework::TensorCopySync(h_temp, ctx.GetPlace(), &d_temp); + framework::TensorCopy(h_temp, ctx.GetPlace(), &d_temp); // At least use 32 threads, at most 512 threads. // blockx is multiple of 32. int blockx = std::min( - static_cast(((feature_width * num_priors + 31) >> 5) << 5), 512L); + static_cast(((feature_width * num_priors + 31) >> 5) << 5), + 512L); int gridx = (feature_width * num_priors + blockx - 1) / blockx; dim3 threads(blockx, 1); dim3 grids(gridx, feature_height); diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.cu b/paddle/fluid/operators/elementwise/elementwise_div_op.cu index 1a149298fd33f132a90ff5de3b35dd5894a4ae68..ae669f5525443abe424109b6a6869e2ddaf52ba0 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.cu @@ -12,18 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_div_op.h" +#include "paddle/fluid/platform/float16.h" namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( elementwise_div, ops::ElementwiseDivKernel, + ops::ElementwiseDivKernel, ops::ElementwiseDivKernel, ops::ElementwiseDivKernel, ops::ElementwiseDivKernel); REGISTER_OP_CUDA_KERNEL( elementwise_div_grad, ops::ElementwiseDivGradKernel, + ops::ElementwiseDivGradKernel, ops::ElementwiseDivGradKernel, ops::ElementwiseDivGradKernel, ops::ElementwiseDivGradKernel, - ops::ElementwiseMulKernel, - ops::ElementwiseMulKernel, - ops::ElementwiseMulKernel); + elementwise_mul, ops::ElementwiseMulKernel, + ops::ElementwiseMulKernel, + ops::ElementwiseMulKernel, + ops::ElementwiseMulKernel, + ops::ElementwiseMulKernel); REGISTER_OP_CUDA_KERNEL( elementwise_mul_grad, - ops::ElementwiseMulGradKernel, - ops::ElementwiseMulGradKernel, - ops::ElementwiseMulGradKernel, - ops::ElementwiseMulGradKernel); + ops::ElementwiseMulGradKernel, + ops::ElementwiseMulGradKernel, + ops::ElementwiseMulGradKernel, + ops::ElementwiseMulGradKernel, + ops::ElementwiseMulGradKernel); diff --git a/paddle/fluid/operators/fill_zeros_like_op.cu.cc b/paddle/fluid/operators/fill_zeros_like_op.cu.cc index 95381774606b2d8e74519befc9a6f7a3ac20aa45..e80a703c30c0335124c089ea82ba4f6fe055acde 100644 --- a/paddle/fluid/operators/fill_zeros_like_op.cu.cc +++ b/paddle/fluid/operators/fill_zeros_like_op.cu.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fill_zeros_like_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/float16.h" namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( @@ -22,4 +23,6 @@ REGISTER_OP_CUDA_KERNEL( ops::FillZerosLikeKernel, ops::FillZerosLikeKernel, ops::FillZerosLikeKernel, + ops::FillZerosLikeKernel, ops::FillZerosLikeKernel); diff --git a/paddle/fluid/operators/math/concat_and_split.cu b/paddle/fluid/operators/math/concat_and_split.cu index b10a19b658e383b8c7b4fbbe8f90da1fe0d4fd14..e925e7bb5917c9433c3c79b9a21a41b4d48a5ba0 100644 --- a/paddle/fluid/operators/math/concat_and_split.cu +++ b/paddle/fluid/operators/math/concat_and_split.cu @@ -131,8 +131,9 @@ class ConcatFunctor { int in_col = input[0].numel() / in_row; int out_row = in_row, out_col = 0; - std::vector inputs_data(in_num); + std::vector inputs_data; std::vector inputs_col(in_num + 1); + inputs_data.reserve(in_num); inputs_col[0] = 0; bool sameShape = true; @@ -143,7 +144,7 @@ class ConcatFunctor { } out_col += t_cols; inputs_col[i + 1] = out_col; - inputs_data[i] = const_cast(input[i].data()); + inputs_data.emplace_back(input[i].data()); } // computation diff --git a/paddle/fluid/operators/math/selected_rows_functor.cc b/paddle/fluid/operators/math/selected_rows_functor.cc index 3eba268cfa9712e4bc5475dd44076bc768552bce..1a11b584e2bab7eeb395bf391da080ec0ba62ae4 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cc +++ b/paddle/fluid/operators/math/selected_rows_functor.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include @@ -252,23 +253,26 @@ elementwise_add_to(const DeviceContext& ctx, BlasT* blas, template struct MergeAdd { framework::SelectedRows operator()(const platform::CPUDeviceContext& context, - const framework::SelectedRows& input) { + const framework::SelectedRows& input, + const bool sorted_result = false) { framework::SelectedRows out; - (*this)(context, input, &out); + (*this)(context, input, &out, sorted_result); return out; } void operator()(const platform::CPUDeviceContext& context, const framework::SelectedRows& input, - framework::SelectedRows* output) { + framework::SelectedRows* output, + const bool sorted_result = false) { std::vector inputs; inputs.push_back(&input); - (*this)(context, inputs, output); + (*this)(context, inputs, output, sorted_result); } void operator()(const platform::CPUDeviceContext& context, const std::vector& inputs, - framework::SelectedRows* output) { + framework::SelectedRows* output, + const bool sorted_result = false) { if (inputs.size() == 0) { VLOG(3) << "no input! return"; return; @@ -301,6 +305,9 @@ struct MergeAdd { } std::vector merge_rows(merged_row_set.begin(), merged_row_set.end()); + if (sorted_result) { + std::sort(merge_rows.begin(), merge_rows.end()); + } std::unordered_map rows_to_id; for (size_t i = 0; i < merge_rows.size(); ++i) { rows_to_id[merge_rows[i]] = i; diff --git a/paddle/fluid/operators/math/selected_rows_functor.cu b/paddle/fluid/operators/math/selected_rows_functor.cu index c4fccdbf862fda8a599869c30ae598573ca367aa..0d63f641c8670f8629c52b9e5fc380a250d80dd7 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cu +++ b/paddle/fluid/operators/math/selected_rows_functor.cu @@ -266,7 +266,8 @@ __global__ void MergeAddKernel(const T* input, const int64_t* input_rows, template struct MergeAdd { framework::SelectedRows operator()(const platform::CUDADeviceContext& context, - const framework::SelectedRows& input) { + const framework::SelectedRows& input, + const bool sorted_result = false) { framework::SelectedRows out; (*this)(context, input, &out); return out; @@ -274,7 +275,8 @@ struct MergeAdd { void operator()(const platform::CUDADeviceContext& context, const framework::SelectedRows& input, - framework::SelectedRows* output) { + framework::SelectedRows* output, + const bool sorted_result = false) { framework::Vector input_rows(input.rows()); if (input_rows.size() == 0) { return; @@ -312,7 +314,8 @@ struct MergeAdd { void operator()(const platform::CUDADeviceContext& context, const std::vector& inputs, - framework::SelectedRows* output) { + framework::SelectedRows* output, + const bool sorted_result = false) { if (inputs.size() == 0) { VLOG(3) << "no input! return"; return; diff --git a/paddle/fluid/operators/math/selected_rows_functor.h b/paddle/fluid/operators/math/selected_rows_functor.h index 6d146d39d6d07678e859b82b25ba60ed7661546d..222d761ef91d8aee4843d717dabba7edf131f8dc 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.h +++ b/paddle/fluid/operators/math/selected_rows_functor.h @@ -81,13 +81,16 @@ struct MergeAdd { // unary functor, merge by adding duplicated rows in // the input SelectedRows object. framework::SelectedRows operator()(const DeviceContext& context, - const framework::SelectedRows& input); + const framework::SelectedRows& input, + const bool sorted_result = false); void operator()(const DeviceContext& context, const framework::SelectedRows& input, - framework::SelectedRows* output); + framework::SelectedRows* output, + const bool sorted_result = false); void operator()(const DeviceContext& context, const std::vector& inputs, - framework::SelectedRows* output); + framework::SelectedRows* output, + const bool sorted_result = false); }; enum class ScatterOps { ASSIGN, ADD, SUB, SUBBY, MUL, DIV, DIVBY }; diff --git a/paddle/fluid/operators/metrics/accuracy_op.cu b/paddle/fluid/operators/metrics/accuracy_op.cu index b255d2a7c413b4f965f6b874d342dcb93c7b5e66..4682940f7e15bc8af5dcda24ea058ac7351887c6 100644 --- a/paddle/fluid/operators/metrics/accuracy_op.cu +++ b/paddle/fluid/operators/metrics/accuracy_op.cu @@ -16,6 +16,7 @@ limitations under the License. */ #include #include "paddle/fluid/operators/metrics/accuracy_op.h" #include "paddle/fluid/platform/cuda_primitives.h" +#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/gpu_info.h" namespace paddle { @@ -94,6 +95,7 @@ class AccuracyOpCUDAKernel : public framework::OpKernel { // FIXME(typhoonzero): types of T is for inference data. // label data is always int64 -REGISTER_OP_CUDA_KERNEL(accuracy, - paddle::operators::AccuracyOpCUDAKernel, - paddle::operators::AccuracyOpCUDAKernel); +REGISTER_OP_CUDA_KERNEL( + accuracy, paddle::operators::AccuracyOpCUDAKernel, + paddle::operators::AccuracyOpCUDAKernel, + paddle::operators::AccuracyOpCUDAKernel); diff --git a/paddle/fluid/operators/ngraph/ngraph_ops.h b/paddle/fluid/operators/ngraph/ngraph_ops.h index 0ed77ff5577cf4f45a8865db9b42e8bda9839478..8e7457dd56c2413f84008ce467537e07b3e80cc7 100644 --- a/paddle/fluid/operators/ngraph/ngraph_ops.h +++ b/paddle/fluid/operators/ngraph/ngraph_ops.h @@ -22,4 +22,6 @@ limitations under the License. */ #pragma once #include "ops/binary_unnary_op.h" +#include "ops/fill_constant_op.h" #include "ops/mul_op.h" +#include "ops/top_k_op.h" diff --git a/paddle/fluid/operators/ngraph/ops/binary_unnary_op.h b/paddle/fluid/operators/ngraph/ops/binary_unnary_op.h index 4e2f5e231c16cd0fad6db287aa19430c56b534fd..6610380fcf432d0019f7e844fa9304e151b20efd 100644 --- a/paddle/fluid/operators/ngraph/ops/binary_unnary_op.h +++ b/paddle/fluid/operators/ngraph/ops/binary_unnary_op.h @@ -45,7 +45,6 @@ static void BuildUnaryNode( auto out = std::make_shared(input); paddle::platform::SetOutputNode(op, "Out", out, ngb_node_map); } - } // namespace ngraphs } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/ngraph/ops/fill_constant_op.h b/paddle/fluid/operators/ngraph/ops/fill_constant_op.h new file mode 100644 index 0000000000000000000000000000000000000000..5eff69e7b165fa19c775926914b7b3e8fcb043e5 --- /dev/null +++ b/paddle/fluid/operators/ngraph/ops/fill_constant_op.h @@ -0,0 +1,61 @@ +/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_NGRAPH +#pragma once + +#include +#include +#include "ngraph/ngraph.hpp" +#include "paddle/fluid/platform/ngraph_helper.h" + +namespace paddle { +namespace operators { +namespace ngraphs { + +void BuildFillConstantNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto op_attrs = paddle::framework::AttrReader(op->Attrs()); + auto vsp = op_attrs.Get>("shape"); + ngraph::Shape shape; + for (auto& sp : vsp) { + shape.push_back(sp); + } + float value = op_attrs.Get("value"); + ngraph::element::Type ng_dtype; + auto data_type = static_cast( + op_attrs.Get("dtype")); + if (data_type == paddle::framework::proto::VarType::FP32) { + ng_dtype = ngraph::element::f32; + } else if (data_type == paddle::framework::proto::VarType::FP64) { + ng_dtype = ngraph::element::f64; + } else if (data_type == paddle::framework::proto::VarType::INT64) { + ng_dtype = ngraph::element::i64; + } else if (data_type == paddle::framework::proto::VarType::INT32) { + ng_dtype = ngraph::element::i32; + } else if (data_type == paddle::framework::proto::VarType::BOOL) { + ng_dtype = ngraph::element::boolean; + } else { + PADDLE_THROW("unsupported data type: %s", data_type); + } + auto out = ngraph::op::Constant::create(ng_dtype, shape, {value}); + paddle::platform::SetOutputNode(op, "Out", out, ngb_node_map); +} +} // namespace ngraphs +} // namespace operators +} // namespace paddle +#endif diff --git a/paddle/fluid/operators/ngraph/ops/top_k_op.h b/paddle/fluid/operators/ngraph/ops/top_k_op.h new file mode 100644 index 0000000000000000000000000000000000000000..2b7254497c0e1aab2e653e69e6461f262b929703 --- /dev/null +++ b/paddle/fluid/operators/ngraph/ops/top_k_op.h @@ -0,0 +1,51 @@ +/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_NGRAPH +#pragma once + +#include +#include "ngraph/ngraph.hpp" +#include "paddle/fluid/platform/ngraph_helper.h" + +namespace paddle { +namespace operators { +namespace ngraphs { + +void BuildTopKNode( + const std::shared_ptr& op, + std::shared_ptr< + std::unordered_map>> + ngb_node_map) { + auto op_attrs = paddle::framework::AttrReader(op->Attrs()); + int k = op_attrs.Get("k"); + auto input = paddle::platform::GetInputNode(op, "X", ngb_node_map); + auto top_k = std::make_shared( + input, input->get_shape().size() - 1, ngraph::element::i64, k); + std::shared_ptr indices = + std::make_shared(top_k, 0); + std::shared_ptr out = + std::make_shared(top_k, 1); + auto dummy_out = paddle::platform::GetOutputNode(op, "Out", ngb_node_map); + if (dummy_out && dummy_out->get_element_type() != out->get_element_type()) { + out = std::make_shared(out, + dummy_out->get_element_type()); + } + paddle::platform::SetOutputNode(op, "Indices", indices, ngb_node_map); + paddle::platform::SetOutputNode(op, "Out", out, ngb_node_map); +} +} // namespace ngraphs +} // namespace operators +} // namespace paddle +#endif diff --git a/paddle/fluid/operators/optimizers/adam_op.h b/paddle/fluid/operators/optimizers/adam_op.h index f214d8272f5cc5f1cb2e32c9bb59ca60a1066500..1138bb7400e0e7a00983e7bfaad2b2d9704b77ab 100644 --- a/paddle/fluid/operators/optimizers/adam_op.h +++ b/paddle/fluid/operators/optimizers/adam_op.h @@ -157,8 +157,11 @@ struct AdamFunctor { } }; +template +struct SparseAdamFunctor; + template -struct SparseAdamFunctor { +struct SparseAdamFunctor { T beta1_; T beta2_; T epsilon_; @@ -236,6 +239,106 @@ struct SparseAdamFunctor { } }; +template +struct SparseAdamFunctor { + T beta1_; + T beta2_; + T epsilon_; + + const T* beta1_pow_; + const T* beta2_pow_; + const T* moment1_; + T* moment1_out_; + const T* moment2_; + T* moment2_out_; + const T* lr_; + const T* grad_; + const T* param_; + T* param_out_; + + const int64_t* rows_; + int64_t row_numel_; + int64_t row_count_; + + SparseAdamFunctor(T beta1, T beta2, T epsilon, const T* beta1_pow, + const T* beta2_pow, const T* mom1, T* mom1_out, + const T* mom2, T* mom2_out, const T* lr, const T* grad, + const T* param, T* param_out, const int64_t* rows, + int64_t row_numel, int64_t row_count, bool lazy_mode) + : beta1_(beta1), + beta2_(beta2), + epsilon_(epsilon), + beta1_pow_(beta1_pow), + beta2_pow_(beta2_pow), + moment1_(mom1), + moment1_out_(mom1_out), + moment2_(mom2), + moment2_out_(mom2_out), + lr_(lr), + grad_(grad), + param_(param), + param_out_(param_out), + rows_(rows), + row_numel_(row_numel), + row_count_(row_count) {} + + inline HOSTDEVICE void adam_update(size_t i, T g) const { + // The following code is the same as dense + T mom1 = moment1_[i]; + T mom2 = moment2_[i]; + T lr = *lr_; + T beta1_pow = *beta1_pow_; + T beta2_pow = *beta2_pow_; + T p = param_[i]; + + // Calculation + lr *= sqrt(1 - beta2_pow) / (1 - beta1_pow); + + mom1 = beta1_ * mom1 + (1 - beta1_) * g; + mom2 = beta2_ * mom2 + (1 - beta2_) * g * g; + p -= lr * (mom1 / (sqrt(mom2) + epsilon_)); + + // Write back to global memory + moment1_out_[i] = mom1; + moment2_out_[i] = mom2; + param_out_[i] = p; + } + + inline void operator()(size_t numel) const { + // lr could be reuse + T lr = *lr_; + T beta1_pow = *beta1_pow_; + T beta2_pow = *beta2_pow_; + lr *= sqrt(1 - beta2_pow) / (1 - beta1_pow); + size_t row_count = numel / row_numel_; + + for (size_t i = 0U, j = 0U; i != row_count; ++i) { + if (i == *(rows_ + j)) { + for (size_t k = 0U; k != row_numel_; ++k) { + T g = grad_[j * row_numel_ + k]; + adam_update(i * row_numel_ + k, g); + } + ++j; + } else { + for (size_t k = 0U; k != row_numel_; ++k) { + T mom1 = moment1_[i * row_numel_ + k]; + T mom2 = moment2_[i * row_numel_ + k]; + T p = param_[i * row_numel_ + k]; + + mom1 = beta1_ * mom1; + mom2 = beta2_ * mom2; + + p -= lr * (mom1 / (sqrt(mom2) + epsilon_)); + // Write back to global memory + moment1_out_[i * row_numel_ + k] = mom1; + moment2_out_[i * row_numel_ + k] = mom2; + param_out_[i * row_numel_ + k] = p; + } + } + } + } +}; + template class AdamOpKernel : public framework::OpKernel { public: @@ -331,7 +434,7 @@ class AdamOpKernel : public framework::OpKernel { .Var() ->GetMutable(); merge_func(ctx.template device_context(), grad, - grad_merge_var); + grad_merge_var, true); grad_merge_ptr = grad_merge_var; } @@ -347,32 +450,46 @@ class AdamOpKernel : public framework::OpKernel { } else { #endif rows = grad_merge.rows().data(); - #if defined(PADDLE_WITH_CUDA) } #endif auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); - SparseAdamFunctor functor( - beta1, beta2, epsilon, beta1_pow.template data(), - beta2_pow.template data(), mom1.template data(), - mom1_out.template mutable_data(ctx.GetPlace()), - mom2.template data(), - mom2_out.template mutable_data(ctx.GetPlace()), - lr.template data(), grad_data, param.template data(), - param_out.template mutable_data(ctx.GetPlace()), rows, row_numel, - grad_merge.rows().size(), lazy_mode); - VLOG(3) << "lazy_mode :" << lazy_mode; - if (lazy_mode && platform::is_cpu_place(ctx.GetPlace())) { - size_t row_count = grad_merge.rows().size(); - std::vector cpu_rows(grad_merge.rows()); - for (size_t row_index = 0; row_index < row_count; ++row_index) { - for (size_t offset = 0; offset < row_numel; ++offset) { - size_t i = cpu_rows[row_index] * row_numel + offset; - functor.adam_update(i, grad_data[row_index * row_numel + offset]); + if (platform::is_cpu_place(ctx.GetPlace())) { + SparseAdamFunctor functor( + beta1, beta2, epsilon, beta1_pow.template data(), + beta2_pow.template data(), mom1.template data(), + mom1_out.template mutable_data(ctx.GetPlace()), + mom2.template data(), + mom2_out.template mutable_data(ctx.GetPlace()), + lr.template data(), grad_data, param.template data(), + param_out.template mutable_data(ctx.GetPlace()), rows, row_numel, + grad_merge.rows().size(), lazy_mode); + + if (lazy_mode) { + size_t row_count = grad_merge.rows().size(); + std::vector cpu_rows(grad_merge.rows()); + for (size_t row_index = 0; row_index < row_count; ++row_index) { + for (size_t offset = 0; offset < row_numel; ++offset) { + size_t i = cpu_rows[row_index] * row_numel + offset; + functor.adam_update(i, grad_data[row_index * row_numel + offset]); + } } + } else { + functor(param.numel()); } - } else { + } else if (platform::is_gpu_place(ctx.GetPlace())) { + SparseAdamFunctor functor( + beta1, beta2, epsilon, beta1_pow.template data(), + beta2_pow.template data(), mom1.template data(), + mom1_out.template mutable_data(ctx.GetPlace()), + mom2.template data(), + mom2_out.template mutable_data(ctx.GetPlace()), + lr.template data(), grad_data, param.template data(), + param_out.template mutable_data(ctx.GetPlace()), rows, row_numel, + grad_merge.rows().size(), lazy_mode); + + // FIXME(minqiyang): remove BinarySearch in GPU later platform::ForRange for_range( static_cast(ctx.device_context()), param.numel()); diff --git a/paddle/fluid/operators/optimizers/momentum_op.cu b/paddle/fluid/operators/optimizers/momentum_op.cu index 8ce739de8dfd74cb43f9521bf39e3127a8a21925..7f9e7246401bc3c765e539ac4395c4feef3c9508 100644 --- a/paddle/fluid/operators/optimizers/momentum_op.cu +++ b/paddle/fluid/operators/optimizers/momentum_op.cu @@ -14,8 +14,11 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/optimizers/momentum_op.h" +#include "paddle/fluid/platform/float16.h" namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( momentum, ops::MomentumOpKernel, - ops::MomentumOpKernel); + ops::MomentumOpKernel, + ops::MomentumOpKernel); diff --git a/paddle/fluid/operators/optimizers/momentum_op.h b/paddle/fluid/operators/optimizers/momentum_op.h index 71f079e4d97f5259359ee6572f584894551452ca..f6ef83c3bad23d709b386f8e75bbc97fa9ba0aab 100644 --- a/paddle/fluid/operators/optimizers/momentum_op.h +++ b/paddle/fluid/operators/optimizers/momentum_op.h @@ -237,7 +237,8 @@ class SparseMomentumFunctor { inline HOSTDEVICE void operator()(size_t i) { auto row_idx = math::BinarySearch(rows_, row_height_, i / row_numel_); - T g = row_idx >= 0 ? g_[row_idx * row_numel_ + i % row_numel_] : 0; + T g = row_idx >= 0 ? g_[row_idx * row_numel_ + i % row_numel_] + : static_cast(0); // put memory access in register const T p = p_[i]; const T lr = lr_[0]; @@ -282,7 +283,8 @@ class SparseMomentumFunctor { inline HOSTDEVICE void operator()(size_t i) { auto row_idx = math::BinarySearch(rows_, row_height_, i / row_numel_); - T g = row_idx >= 0 ? g_[row_idx * row_numel_ + i % row_numel_] : 0; + T g = row_idx >= 0 ? g_[row_idx * row_numel_ + i % row_numel_] + : static_cast(0); // put memory access in register const T p = p_[i]; const T lr = lr_[0]; diff --git a/paddle/fluid/operators/quantize_mkldnn_op.cc b/paddle/fluid/operators/quantize_mkldnn_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..0638e42873376bcec6e4de61494da46d1f0073d1 --- /dev/null +++ b/paddle/fluid/operators/quantize_mkldnn_op.cc @@ -0,0 +1,89 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "mkldnn.hpp" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/operators/quantize_op.h" +#include "paddle/fluid/platform/mkldnn_helper.h" +#include "paddle/fluid/platform/mkldnn_reuse.h" + +namespace paddle { +namespace operators { + +using mkldnn::memory; +using mkldnn::primitive; +using mkldnn::reorder; +using platform::to_void_cast; +using Tensor = framework::Tensor; +using framework::DataLayout; +using mkldnn::stream; +using platform::GetMKLDNNFormat; + +template +class QuantOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("Input"); + auto scale_data = ctx.Attr("Scale"); + auto* output = ctx.Output("Output"); + auto& dev_ctx = + ctx.template device_context(); + const auto& engine = dev_ctx.GetEngine(); + + std::vector pipeline; + std::vector src_tz = paddle::framework::vectorize2int(input->dims()); + std::vector dst_tz = paddle::framework::vectorize2int(output->dims()); + + const T* input_data = input->data(); + + mkldnn::primitive_attr attri; + int mask = 0; + attri.set_output_scales(mask, {scale_data}); + + auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32, + input->format()); + auto src_pd = mkldnn::memory::primitive_desc(src_md, engine); + auto src_memory = + std::make_shared(src_pd, to_void_cast(input_data)); + std::shared_ptr src_memory_p = + std::shared_ptr(new primitive::at(*src_memory)); + + bool is_negative = ctx.Attr("is_negative_input"); + std::shared_ptr dst_pd; + std::shared_ptr dst_memory; + if (is_negative) { + platform::ConvMKLDNNHandler::SetDstMemory( + ctx, output, dst_tz, engine, dst_pd, dst_memory); + } else { + platform::ConvMKLDNNHandler::SetDstMemory( + ctx, output, dst_tz, engine, dst_pd, dst_memory); + } + auto reorder_pd = std::shared_ptr( + new reorder::primitive_desc(src_pd, *dst_pd, attri)); + auto reorder_p = std::shared_ptr( + new reorder(*reorder_pd, *src_memory_p, *dst_memory)); + pipeline.push_back(*reorder_p); + stream(stream::kind::eager).submit(pipeline).wait(); + output->set_layout(DataLayout::kMKLDNN); + output->set_format(GetMKLDNNFormat(*dst_memory)); + } +}; +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +// TODO(Xiaoli) Support FP32->S8 quantization. + +REGISTER_OP_KERNEL(quantize, MKLDNN, ::paddle::platform::CPUPlace, + ops::QuantOpKernel); diff --git a/paddle/fluid/operators/quantize_op.cc b/paddle/fluid/operators/quantize_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..bf70c08bdb82218a2d0f63f3e70a2a1093e6a542 --- /dev/null +++ b/paddle/fluid/operators/quantize_op.cc @@ -0,0 +1,47 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#include "paddle/fluid/operators/quantize_op.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + +namespace paddle { +namespace operators { + +framework::OpKernelType QuantOp::GetExpectedKernelType( + const framework::ExecutionContext& ctx) const { + framework::LibraryType library_ = framework::LibraryType::kMKLDNN; + framework::DataLayout layout_ = framework::DataLayout::kMKLDNN; + + return framework::OpKernelType(ctx.Input("Input")->type(), + ctx.GetPlace(), layout_, library_); +} + +void QuantOpMaker::Make() { + AddInput("Input", "input data"); + AddOutput("Output", "output data"); + AddAttr("is_negative_input", + "(bool, default false) Only used in mkldnn INT8 kernel") + .SetDefault(false); + AddAttr("Scale", "scale data").SetDefault({1.0f}); + AddComment(R"DOC(This op will quantize data from FP32 to INT8)DOC"); +} + +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +REGISTER_OPERATOR(quantize, ops::QuantOp, ops::QuantOpMaker, + paddle::framework::DefaultGradOpDescMaker); diff --git a/paddle/fluid/operators/quantize_op.h b/paddle/fluid/operators/quantize_op.h new file mode 100644 index 0000000000000000000000000000000000000000..091306e4637c7e2393b6736f0e1edf9dd7fd2c8a --- /dev/null +++ b/paddle/fluid/operators/quantize_op.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using framework::OpKernelType; +using framework::Tensor; + +class QuantOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + ctx->SetOutputDim("Output", ctx->GetInputDim("Input")); + ctx->ShareLoD("Input", /*->*/ "Output"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; +}; + +class QuantOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override; +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op.h b/paddle/fluid/operators/sequence_ops/sequence_mask_op.h index 8fceed3558b4357b7863368c18add329ea9922b3..57d6f4b3ea98d7437f7fa72ed724384a19bcea4a 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_mask_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op.h @@ -52,7 +52,7 @@ class SequenceMaskOpMaker : public framework::OpProtoAndCheckerMaker { "The maximum length of the sequence. If maxlen < 0, maxlen " "= max(Input(X)).") .SetDefault(-1) - .AddCustomChecker([](int &v) { + .AddCustomChecker([](const int &v) { PADDLE_ENFORCE(v < 0 || v >= 1, "Attr(maxlen) must be less than 0 or larger than 1"); }); diff --git a/paddle/fluid/operators/split_lod_tensor_op.cc b/paddle/fluid/operators/split_lod_tensor_op.cc index 767449cde981e5925b7144ff1038560c67651f3e..5ede972c71ff3ef8ff00756b97662aabb54d6349 100644 --- a/paddle/fluid/operators/split_lod_tensor_op.cc +++ b/paddle/fluid/operators/split_lod_tensor_op.cc @@ -63,7 +63,7 @@ class SplitLoDTensorOp : public framework::OperatorBase { } auto *mask_data = cpu_mask->data(); - std::vector> copy_ranges(mask_dim[0]); + std::vector> copy_ranges(2); // set out_true/out_false lod for (size_t t = 0; t < 2; t++) { diff --git a/paddle/fluid/operators/top_k_op.cc b/paddle/fluid/operators/top_k_op.cc index c17d1afc309c65035063348d4934ea1783b018ed..9e77f7252de1545e04bd2feaff27374c189dfc48 100644 --- a/paddle/fluid/operators/top_k_op.cc +++ b/paddle/fluid/operators/top_k_op.cc @@ -21,7 +21,7 @@ class TopkOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext *ctx) const override { + void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of TopkOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), @@ -44,12 +44,25 @@ class TopkOp : public framework::OperatorWithKernel { ctx->ShareLoD("X", "Out"); ctx->ShareLoD("X", "Indices"); } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + framework::LibraryType library_{framework::LibraryType::kPlain}; + framework::DataLayout layout_ = framework::DataLayout::kAnyLayout; + return framework::OpKernelType(ctx.Input("X")->type(), + ctx.device_context(), layout_, library_); + } }; class TopkOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("X", "(Tensor) The input of Topk op"); + AddInput("K", + "(Tensor) Number of top elements to look for along " + "the last dimension (along each row for matrices).") + .AsDispensable(); AddOutput("Out", "(Tensor) The output tensor of Topk op"); AddOutput("Indices", "(Tensor) The indices of Topk elements of input"); AddComment(R"DOC( diff --git a/paddle/fluid/operators/top_k_op.cu b/paddle/fluid/operators/top_k_op.cu index 0cad224ca8860b0e4bc2e3f2bc1659235aadfe2d..c27039dd0a55549fd7ecdc3260154ae90b1a29be 100644 --- a/paddle/fluid/operators/top_k_op.cu +++ b/paddle/fluid/operators/top_k_op.cu @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/top_k_op.h" #include "paddle/fluid/platform/assert.h" #include "paddle/fluid/platform/cuda_device_function.h" +#include "paddle/fluid/platform/float16.h" namespace paddle { namespace operators { @@ -150,7 +151,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair topk[], int* beam, if (k < MaxLength - (*beam)) { topk[k] = topk[k + *beam]; } else { - topk[k].set(-INFINITY, -1); + topk[k].set(-static_cast(INFINITY), -1); } } if (!(*is_empty)) { @@ -160,7 +161,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair topk[], int* beam, } *max = topk[MaxLength - 1]; - if ((*max).v == -1) *is_empty = true; + if ((*max).v == -static_cast(1)) *is_empty = true; *beam = 0; } } @@ -181,7 +182,7 @@ __device__ __forceinline__ void ThreadGetTopK(Pair topk[], int* beam, if (k < MaxLength - *beam) { topk[k] = topk[k + *beam]; } else { - topk[k].set(-INFINITY, -1); + topk[k].set(-static_cast(INFINITY), -1); } } if (!(*is_empty)) { @@ -278,7 +279,7 @@ __global__ void KeMatrixTopK(T* output, int output_stride, int64_t* indices, bool firststep = true; for (int j = 0; j < MaxLength; j++) { - topk[j].set(-INFINITY, -1); + topk[j].set(-static_cast(INFINITY), -1); } while (top_num) { ThreadGetTopK( @@ -326,6 +327,17 @@ class TopkOpCUDAKernel : public framework::OpKernel { auto* indices = ctx.Output("Indices"); size_t k = static_cast(ctx.Attr("k")); + auto* k_t = ctx.Input("K"); + if (k_t) { + Tensor k_host; + framework::TensorCopySync(*k_t, platform::CPUPlace(), &k_host); + k = k_host.data()[0]; + framework::DDim output_dims = output->dims(); + output_dims[output_dims.size() - 1] = k; + output->Resize(output_dims); + indices->Resize(output_dims); + } + const T* input_data = input->data(); T* output_data = output->mutable_data(ctx.GetPlace()); // FIXME(typhoonzero): data is always converted to type T? @@ -362,5 +374,7 @@ class TopkOpCUDAKernel : public framework::OpKernel { } // namespace operators } // namespace paddle -REGISTER_OP_CUDA_KERNEL(top_k, paddle::operators::TopkOpCUDAKernel, - paddle::operators::TopkOpCUDAKernel); +REGISTER_OP_CUDA_KERNEL( + top_k, paddle::operators::TopkOpCUDAKernel, + paddle::operators::TopkOpCUDAKernel, + paddle::operators::TopkOpCUDAKernel); diff --git a/paddle/fluid/operators/top_k_op.h b/paddle/fluid/operators/top_k_op.h index 76ece57b39919148da04caecaa43ea9d2b9d95df..f7bac67300bd56b89d5b08238e78d625f4a773a6 100644 --- a/paddle/fluid/operators/top_k_op.h +++ b/paddle/fluid/operators/top_k_op.h @@ -37,8 +37,16 @@ class TopkKernel : public framework::OpKernel { auto* input = ctx.Input("X"); auto* output = ctx.Output("Out"); auto* indices = ctx.Output("Indices"); - // k is determined by Attr - const size_t k = static_cast(ctx.Attr("k")); + + size_t k = static_cast(ctx.Attr("k")); + auto* k_t = ctx.Input("K"); + if (k_t) { + k = k_t->data()[0]; + framework::DDim output_dims = output->dims(); + output_dims[output_dims.size() - 1] = k; + output->Resize(output_dims); + indices->Resize(output_dims); + } T* output_data = output->mutable_data(ctx.GetPlace()); int64_t* indices_data = indices->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/platform/create_tensor_with_allocationptr.h b/paddle/fluid/platform/create_tensor_with_allocationptr.h deleted file mode 100644 index 00fcc5f86209b2a827ac070773f4b0049b0457d8..0000000000000000000000000000000000000000 --- a/paddle/fluid/platform/create_tensor_with_allocationptr.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/platform/temporary_allocator.h" -namespace paddle { -namespace platform { - -template -paddle::framework::Tensor GetTensor( - memory::allocation::AllocationPtr temp_allocation_ptr, - const framework::DDim &dim) { - auto &deleter = temp_allocation_ptr.get_deleter(); - auto *allocation_ptr = temp_allocation_ptr.release(); - auto shared_allocation = - std::shared_ptr(allocation_ptr, deleter); - - PADDLE_ENFORCE(dynamic_cast(allocation_ptr) != nullptr, - "The AllocationPtr must be TemporaryAllocation."); - PADDLE_ENFORCE_EQ(allocation_ptr->size(), - framework::product(dim) * sizeof(T)); - - paddle::framework::Tensor temp_tensor(std::type_index(typeid(T))); - temp_tensor.Resize(dim); - temp_tensor.ResetHolder(std::move(shared_allocation)); - return temp_tensor; -} - -} // namespace platform -} // namespace paddle diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 81c443d758fcf22545af4bf8e452be8f0ecc0a89..022afb686b29c2c493cfd05600ee372470cbc710 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -256,10 +256,11 @@ CUDADeviceContext::CUDADeviceContext(CUDAPlace place) LOG_FIRST_N(WARNING, 1) << "Please NOTE: device: " << place_.device << ", CUDA Capability: " << compute_capability_ - << ", Driver Version: " << driver_version_ / 1000 + << ", Driver API Version: " << driver_version_ / 1000 << "." << (driver_version_ % 100) / 10 - << ", Runtime Version: " << runtime_version_ / 1000 - << "." << (runtime_version_ % 100) / 10; + << ", Runtime API Version: " + << runtime_version_ / 1000 << "." + << (runtime_version_ % 100) / 10; size_t cudnn_dso_ver = dynload::cudnnGetVersion(); LOG_FIRST_N(WARNING, 1) << "device: " << place_.device << ", cuDNN Version: " << cudnn_dso_ver / 1000 << "." diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index af9744dcb847f8af97e87cc18d2aee376f3f3d6c..7e875801893f3b73f8efaf33af690f8c855beee4 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -41,7 +41,28 @@ limitations under the License. */ namespace paddle { namespace platform { -/*! \brief device temporary allocator singleton */ +/*! \brief device temporary allocator singleton. + * + * Some operator needs temporary memory during computation, for example, + * conv_gemm, which needs use col to store the result of im2col. If we + * create a stack memory which is used by CUDA Kernel, before the + * Computation(...) returns, we should add ctx->Wait(), because the + * execution of CUDA is async, if there doesn't have ctx->Wait(), + * the temporary memory will be released before the CUDA Kernel uses + * it. + * + * DeviceTemporaryAllocator is a singleton, which contains a + * `TemporaryAllocator` for each . And the TemporaryAllocator + * contains a temp_allocation_queue which is used to store the temporary + * allocations. The allocation, which is allocated by TemporaryAllocator, + * is a unique_ptr, and when it is not held by any variable, it will be + * pushed into the temp_allocation_queue. There are two opportunities to free + * the allocations of temp_allocation_queue: + * - when the Stream calls cudaStreamSynchronize; + * - when the allocation size of opportunities exceeds a certain threshold + * (defined by FLAGS_limit_of_temporary_allocation). + * + * */ class DeviceTemporaryAllocator { public: static DeviceTemporaryAllocator& Instance() { diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 23f00406de6190dfef91e259d6af358b5dac1713..584df85e80203c383a89954aac73dd1dcd723f7c 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -15,6 +15,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/place.h" @@ -181,6 +182,21 @@ class MKLDNNHandler { return dims2str(operand_dims) + suffix; } + template + static void SetDstMemory( + const framework::ExecutionContext& ctx, framework::Tensor* output, + std::vector dst_tz, const mkldnn::engine& engine, + std::shared_ptr& dst_pd, // NOLINT + std::shared_ptr& dst_memory) { // NOLINT + M* output_data = output->mutable_data(ctx.GetPlace()); + auto dst_md = platform::MKLDNNMemDesc( + {dst_tz}, paddle::framework::ToMKLDNNDataType( + framework::DataTypeTrait::DataType), + mkldnn::memory::format::nhwc); + dst_pd.reset(new mkldnn::memory::primitive_desc(dst_md, engine)); + dst_memory.reset(new mkldnn::memory(*dst_pd, to_void_cast(output_data))); + } + protected: static std::string dims2str(const mkldnn::memory::dims& operand_dims) { std::string dstr = ""; diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h index cbb090adefda03717a634dab24132d36d1cfc648..6ce4bf8f13922e2756c3ee8f189bd36123d6964c 100644 --- a/paddle/fluid/platform/nccl_helper.h +++ b/paddle/fluid/platform/nccl_helper.h @@ -23,6 +23,7 @@ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/platform/dynload/nccl.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/float16.h" #define NCCL_ID_VARNAME "NCCLID" @@ -38,6 +39,8 @@ inline ncclDataType_t ToNCCLDataType(framework::proto::VarType::Type type) { return ncclInt; } else if (type == framework::proto::VarType::INT64) { return ncclInt64; + } else if (type == framework::proto::VarType::FP16) { + return ncclFloat16; } else { PADDLE_THROW("Not supported"); } diff --git a/paddle/fluid/platform/temporary_allocator.h b/paddle/fluid/platform/temporary_allocator.h index 4e32d2d6959e69c94e869491ef8d11708870f7df..812c4a333189d8c432be398ca0ebbce11f957561 100644 --- a/paddle/fluid/platform/temporary_allocator.h +++ b/paddle/fluid/platform/temporary_allocator.h @@ -29,6 +29,19 @@ class TemporaryAllocation : public memory::allocation::Allocation { memory::allocation::AllocationPtr underlying_allocation_; }; +/*! \brief the TemporaryAllocator is used to alloc the temporary allocation + * which used by CUDA's async operation. + * + * The TemporaryAllocator contains a temp_allocation_queue which + * is used to store the temporary allocations. The allocation, which is + * allocated by TemporaryAllocator, is a unique_ptr, and when it is not held + * by any variable, it will be pushed into the temp_allocation_queue. + * + * There is one opportunity to free the allocations of temp_allocation_queue: + * - when the allocation size of opportunities exceeds a certain threshold + * (defined by FLAGS_limit_of_temporary_allocation). + * + * */ class TemporaryAllocator : public memory::allocation::Allocator { public: explicit TemporaryAllocator(platform::Place place); diff --git a/paddle/fluid/platform/temporary_allocator_test.cc b/paddle/fluid/platform/temporary_allocator_test.cc index 3b940b0e8243c0ae1e0eeb3a2c13f3d16c228925..e4e5be5b89f4cbecd6b5e9deec9cc5bffa6a4917 100644 --- a/paddle/fluid/platform/temporary_allocator_test.cc +++ b/paddle/fluid/platform/temporary_allocator_test.cc @@ -14,8 +14,7 @@ #include "paddle/fluid/platform/temporary_allocator.h" #include -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/platform/create_tensor_with_allocationptr.h" +#include "paddle/fluid/framework/tensor_util.h" DECLARE_double(limit_of_temporary_allocation); namespace paddle { @@ -47,6 +46,7 @@ TEST(temporary_allocator, temporary_allocator) { TEST(temporary_allocator, add_callback) { #ifdef PADDLE_WITH_CUDA + const double limit = FLAGS_limit_of_temporary_allocation; FLAGS_limit_of_temporary_allocation = 10; platform::CUDAPlace gpu_place(0); TemporaryAllocator gpu_alloc(gpu_place); @@ -63,7 +63,7 @@ TEST(temporary_allocator, add_callback) { }); { gpu_alloc.Allocate(100); } PADDLE_ENFORCE(deleted); - FLAGS_limit_of_temporary_allocation = -1; + FLAGS_limit_of_temporary_allocation = limit; #endif } @@ -75,8 +75,8 @@ TEST(temporary_allocator, create_tensor_with_allocationptr) { auto allocation = cpu_alloc.Allocate(memory_size); void* address = allocation->ptr(); int numel = memory_size / sizeof(float); - framework::Tensor tensor = - GetTensor(std::move(allocation), framework::make_ddim({numel})); + framework::Tensor tensor = framework::GetTensor( + std::move(allocation), framework::make_ddim({numel})); PADDLE_ENFORCE_EQ(address, tensor.data()); PADDLE_ENFORCE_EQ(tensor.numel(), numel); } @@ -90,8 +90,8 @@ TEST(temporary_allocator, create_tensor_with_allocationptr) { auto allocation = gpu_alloc.Allocate(memory_size); void* address = allocation->ptr(); int numel = memory_size / sizeof(float); - framework::Tensor tensor = - GetTensor(std::move(allocation), framework::make_ddim({numel})); + framework::Tensor tensor = framework::GetTensor( + std::move(allocation), framework::make_ddim({numel})); PADDLE_ENFORCE_EQ(address, tensor.data()); PADDLE_ENFORCE_EQ(tensor.numel(), numel); } @@ -116,7 +116,7 @@ TEST(temporary_allocator, create_tensor_with_allocationptr2) { { auto allocation = cpu_alloc.Allocate(memory_size); address = allocation->ptr(); - framework::Tensor tensor = GetTensor( + framework::Tensor tensor = framework::GetTensor( std::move(allocation), framework::make_ddim({numel})); PADDLE_ENFORCE_EQ(address, tensor.data()); PADDLE_ENFORCE_EQ(tensor.numel(), numel); @@ -138,7 +138,7 @@ TEST(temporary_allocator, create_tensor_with_allocationptr2) { { auto allocation = gpu_alloc.Allocate(memory_size); address = allocation->ptr(); - framework::Tensor tensor = GetTensor( + framework::Tensor tensor = framework::GetTensor( std::move(allocation), framework::make_ddim({numel})); PADDLE_ENFORCE_EQ(address, tensor.data()); PADDLE_ENFORCE_EQ(tensor.numel(), numel); diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index fb8bcb190bda59e23d118547f451be46c963cce9..72b0f216d3aafbb95931590935b0bf967a8d5be8 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -1,5 +1,5 @@ -set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer) +set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune feed_fetch_method pass_builder parallel_executor profiler layer scope_pool) if(WITH_PYTHON) list(APPEND PYBIND_DEPS py_func_op) endif() diff --git a/paddle/fluid/pybind/const_value.cc b/paddle/fluid/pybind/const_value.cc index 06d8b65fb1480d9f621ca937c1d66ab7e910f010..f8ded9f94ecaf3df1e14aead60ae12abcf8c34a9 100644 --- a/paddle/fluid/pybind/const_value.cc +++ b/paddle/fluid/pybind/const_value.cc @@ -49,6 +49,9 @@ void BindConstValue(pybind11::module* m) { op_proto_and_checker_maker.def( "kOpNameScopeAttrName", framework::OpProtoAndCheckerMaker::OpNamescopeAttrName); + op_proto_and_checker_maker.def( + "kOpCreationCallstackAttrName", + framework::OpProtoAndCheckerMaker::OpCreationCallstackAttrName); } } // namespace pybind diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 88a2a5276ab52e25b4f790e3a2f1386ed0715b4e..81d63aace04a44b8ab589f212d83d3c2f5c2d522 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -32,6 +32,7 @@ limitations under the License. */ #include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/reader.h" +#include "paddle/fluid/framework/scope_pool.h" #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/version.h" #include "paddle/fluid/imperative/layer.h" @@ -117,6 +118,9 @@ PYBIND11_MODULE(core, m) { return paddle::operators::AppendPythonCallableObjectAndReturnId(py_obj); }); + m.add_object("_cleanup", + py::capsule([]() { ScopePool::Instance().Clear(); })); + py::class_(m, "VarBase", R"DOC()DOC") .def(py::init<>()) .def("_run_backward", @@ -454,7 +458,7 @@ All parameter, weight, gradient are variables in Paddle. }, py::return_value_policy::copy); - py::class_(m, "Scope", R"DOC( + py::class_(m, "_Scope", R"DOC( Scope is an association of a name to Variable. All variables belong to Scope. Variables in a parent scope can be retrieved from local scope. @@ -474,17 +478,26 @@ All parameter, weight, gradient are variables in Paddle. param.set(param_array, place) )DOC") + .def("_remove_from_pool", + [](Scope &self) { ScopePool::Instance().Remove(&self); }) .def("var", [](Scope &self, const std::string &name) -> Variable * { return self.Var(name); }, py::return_value_policy::reference) .def("find_var", &Scope::FindVar, py::return_value_policy::reference) - .def(py::init<>()) .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); }, py::return_value_policy::reference) .def("drop_kids", &Scope::DropKids); + m.def("Scope", + []() -> Scope * { + auto *s = new Scope(); + ScopePool::Instance().Insert(std::unique_ptr(s)); + return s; + }, + py::return_value_policy::reference); + //! @note: Be careful! PyBind will return std::string as an unicode, not //! Python str. If you want a str object, you should cast them in Python. m.def("get_all_op_protos", []() -> std::vector { diff --git a/paddle/scripts/installation_validate.py b/paddle/scripts/installation_validate.py new file mode 100644 index 0000000000000000000000000000000000000000..f84e2f4b176609dec28a8e29afea74d3654e9e4c --- /dev/null +++ b/paddle/scripts/installation_validate.py @@ -0,0 +1,18 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as fluid +import paddle as pd + +print(pd.__version__) diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 2e6b40148d4675cc35c77673b7ed93070f31b854..418dc1346840f707f38836e09a14ca2c19c063ff 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -79,6 +79,7 @@ function cmake_gen() { PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/Library/Frameworks/Python.framework/Versions/2.7/bin/python2.7 -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/2.7/include/python2.7 -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/2.7/lib/libpython2.7.dylib" + pip install --user -r ${PADDLE_ROOT}/python/requirements.txt else exit 1 fi @@ -91,6 +92,7 @@ function cmake_gen() { -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.5/include/python3.5m/ -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.5/lib/libpython3.5m.dylib" WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON} + pip3.5 install --user -r ${PADDLE_ROOT}/python/requirements.txt else exit 1 fi @@ -103,6 +105,7 @@ function cmake_gen() { -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.6/include/python3.6m/ -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.6/lib/libpython3.6m.dylib" WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON} + pip3.6 install --user -r ${PADDLE_ROOT}/python/requirements.txt else exit 1 fi @@ -115,6 +118,7 @@ function cmake_gen() { -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.7/include/python3.7m/ -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.7/lib/libpython3.7m.dylib" WITH_FLUID_ONLY=${WITH_FLUID_ONLY:-ON} + pip3.7 install --user -r ${PADDLE_ROOT}/python/requirements.txt else exit 1 fi @@ -441,7 +445,9 @@ EOF # make install should also be test when unittest make install -j 8 if [ "$1" == "cp27-cp27m" ]; then + set -e pip install --user ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl + python ${PADDLE_ROOT}/paddle/scripts/installation_validate.py elif [ "$1" == "cp35-cp35m" ]; then pip3.5 install --user ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl elif [ "$1" == "cp36-cp36m" ]; then diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 8f3660ca387ba10309f829bd04ac2ffdc573f3d6..e0078e53141ac7834fd00e4f2dbd8a6c8a1d6b1b 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -46,7 +46,7 @@ from . import transpiler from . import distribute_lookup_table from .param_attr import ParamAttr, WeightNormParamAttr from .data_feeder import DataFeeder -from .core import LoDTensor, LoDTensorArray, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope +from .core import LoDTensor, LoDTensorArray, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope, _Scope from .transpiler import DistributeTranspiler, \ memory_optimize, release_memory, DistributeTranspilerConfig from .lod_tensor import create_lod_tensor, create_random_int_lodtensor diff --git a/python/paddle/fluid/contrib/__init__.py b/python/paddle/fluid/contrib/__init__.py index ece97b661fd7d60f8822439a84ee4403b9e3d81c..24621110b18f63779da14edc42765aae3bf4abd6 100644 --- a/python/paddle/fluid/contrib/__init__.py +++ b/python/paddle/fluid/contrib/__init__.py @@ -22,6 +22,8 @@ from . import op_frequence from .op_frequence import * from . import quantize from .quantize import * +from . import slim +from .slim import * from . import utils from .utils import * @@ -30,4 +32,5 @@ __all__ += decoder.__all__ __all__ += memory_usage_calc.__all__ __all__ += op_frequence.__all__ __all__ += quantize.__all__ +__all__ += slim.__all__ __all__ += utils.__all__ diff --git a/python/paddle/fluid/contrib/slim/__init__.py b/python/paddle/fluid/contrib/slim/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..22dbf7c8b6bb2da7c310a20bdcbaffca248575b0 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .core import * +from .graph import * +from .prune import * +__all__ = [ + 'build_compressor', + 'CompressPass', + 'ImitationGraph', + 'SensitivePruneStrategy', + 'MagnitudePruner', + 'RatioPruner', +] diff --git a/python/paddle/fluid/contrib/slim/core/__init__.py b/python/paddle/fluid/contrib/slim/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7826d5830a6f7f6d42cb1275c2289695c080e52f --- /dev/null +++ b/python/paddle/fluid/contrib/slim/core/__init__.py @@ -0,0 +1,24 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import config +from .config import * +from . import compress_pass +from .compress_pass import * +from . import strategy +from .strategy import * +from . import pass_builder +from .pass_builder import * + +__all__ = config.__all__ + compress_pass.__all__ + strategy.__all__ + pass_builder.__all__ diff --git a/python/paddle/fluid/contrib/slim/core/compress_pass.py b/python/paddle/fluid/contrib/slim/core/compress_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..c4c348b878a1df43d7fb909f506c8cf65366866f --- /dev/null +++ b/python/paddle/fluid/contrib/slim/core/compress_pass.py @@ -0,0 +1,129 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ....core import CPUPlace +from ..graph import get_executor + +__all__ = ['Context', 'CompressPass'] + + +class Context(object): + """ + The context in the process of compression. + Args: + exe: The executor used to execute graph. + graph: The graph to be compressed. + scope: The scope used to execute graph. + program_exe: The program_exe is used to execute the program + created for modifying the variables in scope. + """ + + def __init__(self, exe, graph, scope, program_exe=None): + # The total number of epoches to be trained. + self.epoch = 0 + # Current epoch + self.epoch_id = 0 + # Current batch + self.batch_id = 0 + self.exe = exe + self.graph = graph + self.scope = scope + self.program_exe = program_exe + + +class CompressPass(object): + """ + The pass used to compress model. + Args: + place: The device used in compression. + data_reader: The data_reader used to run graph. + data_feeder: The data_feeder used to run graph. + scope: The scope used to run graph. + metrics: The metrics for evaluating model. + epoch: The total epoches of trainning in compression. + program_exe: The program_exe is used to execute the program + created for modifying the variables in scope. + """ + + def __init__(self, + place=None, + data_reader=None, + data_feeder=None, + scope=None, + metrics=None, + epoch=None, + program_exe=None): + self.strategies = [] + self.place = CPUPlace() if place is None else place + self.data_reader = data_reader + self.data_feeder = data_feeder + self.scope = scope + self.metrics = metrics + self.epoch = epoch + self.program_exe = program_exe + + def add_strategy(self, strategy): + """ + Add a strategy to current compress pass. + Args: + strategy: The strategy to be added into current compress pass. + """ + self.strategies.append(strategy) + self.epoch = max(strategy.end_epoch, self.epoch) + + def apply(self, graph): + """ + Compress a model. + Args: + graph: The target graph to be compressed. + """ + self.executor = get_executor(graph, self.place) + context = Context( + self.executor, graph, self.scope, program_exe=self.program_exe) + + for strategy in self.strategies: + strategy.on_compress_begin(context) + + for epoch in range(self.epoch): + + for strategy in self.strategies: + strategy.on_epoch_begin(context) + + for data in self.data_reader(): + + for strategy in self.strategies: + strategy.on_batch_begin(context) + fetches = None + if self.metrics: + fetches = self.metrics.values() + feed = None + if self.data_feeder: + feed = self.data_feeder.feed(data) + results = self.executor.run(graph, + fetches=fetches, + scope=self.scope, + feed=feed) + if results: + print("results: {}".format( + zip(self.metrics.keys(), results))) + for strategy in self.strategies: + strategy.on_batch_end(context) + context.batch_id += 1 + + for strategy in self.strategies: + strategy.on_epoch_end(context) + context.epoch_id += 1 + + for strategy in self.strategies: + strategy.on_compress_end(context) diff --git a/python/paddle/fluid/contrib/slim/core/config.py b/python/paddle/fluid/contrib/slim/core/config.py new file mode 100644 index 0000000000000000000000000000000000000000..811c45700376aff9883fe197007b582f63817f03 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/core/config.py @@ -0,0 +1,111 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import funcsigs +import yaml +from collections import OrderedDict +from ..prune import * +from .compress_pass import * +from .strategy import * + +__all__ = ['ConfigFactory'] +"""This factory is used to create instances by loading and parsing configure file with yaml format. +""" + + +class ConfigFactory(object): + def __init__(self, config): + """Init a factory from configure file.""" + self.instances = {} + self.version = None + self._parse_config(config) + + def get_compress_pass(self): + """ + Get compress pass from factory. + """ + return self.instance('compress_pass') + + def instance(self, name): + """ + Get instance from factory. + """ + if name in self.instances: + return self.instances[name] + else: + return None + + def _new_instance(self, name, attrs): + if name not in self.instances: + class_ = globals()[attrs['class']] + sig = funcsigs.signature(class_.__init__) + keys = [ + param.name for param in sig.parameters.values() + if (param.kind == param.POSITIONAL_OR_KEYWORD) + ][1:] + keys = set(attrs.keys()).intersection(set(keys)) + args = {} + for key in keys: + value = attrs[key] + if isinstance(value, str) and value in self.instances: + value = self.instances[value] + args[key] = value + self.instances[name] = class_(**args) + return self.instances.get(name) + + def _parse_config(self, config): + assert config + with open(config, 'r') as config_file: + key_values = self._ordered_load(config_file) + for key in key_values: + # parse version + if key == 'version' and self.version is None: + self.version = int(key_values['version']) + assert self.version == int(key_values['version']) + + # parse pruners + if key == 'pruners' or key == 'strategies': + instances = key_values[key] + for name in instances: + self._new_instance(name, instances[name]) + + if key == 'compress_pass': + compress_pass = self._new_instance(key, key_values[key]) + for name in key_values[key]['strategies']: + strategy = self.instance(name) + compress_pass.add_strategy(strategy) + + if key == 'include': + for config_file in key_values[key]: + self._parse_config(config_file.strip()) + + def _ordered_load(self, + stream, + Loader=yaml.Loader, + object_pairs_hook=OrderedDict): + """ + See: https://stackoverflow.com/questions/5121931/in-python-how-can-you-load-yaml-mappings-as-ordereddicts + """ + + class OrderedLoader(Loader): + pass + + def construct_mapping(loader, node): + loader.flatten_mapping(node) + return object_pairs_hook(loader.construct_pairs(node)) + + OrderedLoader.add_constructor( + yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping) + return yaml.load(stream, OrderedLoader) diff --git a/python/paddle/fluid/contrib/slim/core/pass_builder.py b/python/paddle/fluid/contrib/slim/core/pass_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..fc1ddc94e04f1d606292071ba7e5cc74fedd5d36 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/core/pass_builder.py @@ -0,0 +1,39 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .compress_pass import CompressPass +from .config import ConfigFactory + +__all__ = ['build_compressor'] + + +def build_compressor(place=None, + data_reader=None, + data_feeder=None, + scope=None, + metrics=None, + epoch=None, + config=None): + if config is not None: + factory = ConfigFactory(config) + comp_pass = factory.get_compress_pass() + else: + comp_pass = CompressPass() + comp_pass.place = place + comp_pass.data_reader = data_reader + comp_pass.data_feeder = data_feeder + comp_pass.scope = scope + comp_pass.metrics = metrics + comp_pass.epoch = epoch + return comp_pass diff --git a/python/paddle/fluid/contrib/slim/core/strategy.py b/python/paddle/fluid/contrib/slim/core/strategy.py new file mode 100644 index 0000000000000000000000000000000000000000..74d98e98b0c390599acfaefeb0636a599b46d391 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/core/strategy.py @@ -0,0 +1,48 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ['Strategy'] + + +class Strategy(object): + """ + Base class for all strategies. + """ + + def __init__(self, start_epoch=0, end_epoch=10): + """ + Args: + start_epoch: The first epoch to apply the strategy. + end_epoch: The last epoch to apply the strategy. + """ + self.start_epoch = start_epoch + self.end_epoch = end_epoch + + def on_compress_begin(self, context): + pass + + def on_epoch_begin(self, context): + pass + + def on_epoch_end(self, context): + pass + + def on_batch_begin(self, context): + pass + + def on_batch_end(self, context): + pass + + def on_compress_end(self, context): + pass diff --git a/python/paddle/fluid/contrib/slim/demo/filter_prune/config.yaml b/python/paddle/fluid/contrib/slim/demo/filter_prune/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea888fa2c74a23b4769f75dce6a776afcca41a51 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/demo/filter_prune/config.yaml @@ -0,0 +1,28 @@ +version: 1.0 +pruners: + pruner_1: + class: 'RatioPruner' + ratios: + 'conv1_1.w': 0.3 + 'conv1_2.w': 0.4 + '*': 0.9 + group_dims: + '*': [1, 2, 3] + criterions: + '*': 'l1-norm' +strategies: + strategy_1: + class: 'SensitivePruneStrategy' + pruner: 'pruner_1' + start_epoch: 0 + end_epoch: 10 + delta_rate: 0.20 + acc_loss_threshold: 0.2 + sensitivities: + 'conv1_1.w': 0.4 + +compress_pass: + class: 'CompressPass' + epoch: 100 + strategies: + - strategy_1 diff --git a/python/paddle/fluid/contrib/slim/demo/filter_prune/demo.py b/python/paddle/fluid/contrib/slim/demo/filter_prune/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..21c59c0c9d2d9b76932ab6eeff73754940a3bfa0 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/demo/filter_prune/demo.py @@ -0,0 +1,69 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as fluid +import paddle +import os +import sys +from paddle.fluid.contrib.slim import CompressPass +from paddle.fluid.contrib.slim import build_compressor +from paddle.fluid.contrib.slim import ImitationGraph + + +class LinearModel(object): + def __init__(slef): + pass + + def train(self): + train_program = fluid.Program() + startup_program = fluid.Program() + startup_program.random_seed = 10 + with fluid.program_guard(train_program, startup_program): + x = fluid.layers.data(name='x', shape=[13], dtype='float32') + y = fluid.layers.data(name='y', shape=[1], dtype='float32') + predict = fluid.layers.fc(input=x, size=1, act=None) + cost = fluid.layers.square_error_cost(input=predict, label=y) + avg_cost = fluid.layers.mean(cost) + eval_program = train_program.clone() + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) + sgd_optimizer.minimize(avg_cost) + + train_reader = paddle.batch( + paddle.dataset.uci_housing.train(), batch_size=1) + eval_reader = paddle.batch( + paddle.dataset.uci_housing.test(), batch_size=1) + place = fluid.CPUPlace() + train_feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) + eval_feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) + exe = fluid.Executor(place) + exe.run(startup_program) + train_metrics = {"loss": avg_cost.name} + eval_metrics = {"loss": avg_cost.name} + + graph = ImitationGraph(train_program) + config = './config.yaml' + comp_pass = build_compressor( + place, + data_reader=train_reader, + data_feeder=train_feeder, + scope=fluid.global_scope(), + metrics=train_metrics, + epoch=1, + config=config) + comp_pass.apply(graph) + + +if __name__ == "__main__": + model = LinearModel() + model.train() diff --git a/python/paddle/fluid/contrib/slim/graph/__init__.py b/python/paddle/fluid/contrib/slim/graph/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d65472d193b639f0766e278ec14b5dc36c5d62bc --- /dev/null +++ b/python/paddle/fluid/contrib/slim/graph/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import executor +from .executor import * +from . import graph +from .graph import * +from . import graph_pass +from .graph_pass import * +__all__ = executor.__all__ +__all__ += graph.__all__ +__all__ += graph_pass.__all__ diff --git a/python/paddle/fluid/contrib/slim/graph/executor.py b/python/paddle/fluid/contrib/slim/graph/executor.py new file mode 100644 index 0000000000000000000000000000000000000000..c02c3af82013287bf19e1869cb60dc65239b720a --- /dev/null +++ b/python/paddle/fluid/contrib/slim/graph/executor.py @@ -0,0 +1,62 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +from abc import abstractmethod +from .... import executor +from .graph import IRGraph, ImitationGraph + +__all__ = ['get_executor'] + + +class GraphExecutor(object): + __metaclass__ = abc.ABCMeta + + def __init__(self, place): + self.place = place + + @abstractmethod + def run(self, graph, feches=None, feed=None): + pass + + +class IRGraphExecutor(GraphExecutor): + def run(self, grah, fetches, feed=None): + pass + + +class ImitationGraphExecutor(GraphExecutor): + def __init__(self, place): + super(ImitationGraphExecutor, self).__init__(place) + self.exe = executor.Executor(place) + + def run(self, graph, scope=None, fetches=None, feed=None): + assert isinstance(graph, ImitationGraph) + fetch_list = None + if fetches: + fetch_list = [ + graph.program.global_block().var(name) for name in fetches + ] + results = self.exe.run(graph.program, + scope=scope, + fetch_list=fetch_list, + feed=feed) + return results + + +def get_executor(graph, place): + if isinstance(graph, ImitationGraph): + return ImitationGraphExecutor(place) + if isinstance(graph, IRGraph): + return IRGraphExecutor(place) diff --git a/python/paddle/fluid/contrib/slim/graph/graph.py b/python/paddle/fluid/contrib/slim/graph/graph.py new file mode 100644 index 0000000000000000000000000000000000000000..7d6b0702035d49189c0919f976ea3c0c52663547 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/graph/graph.py @@ -0,0 +1,45 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ....framework import Program + +__all__ = ['Graph', 'ImitationGraph', 'IRGraph'] + + +class Graph(object): + """ + Base class for all graph. + """ + + def __init__(self): + pass + + def all_parameters(self): + """ + Return all the parameters in current graph. + """ + pass + + +class ImitationGraph(Graph): + def __init__(self, program=None): + super(ImitationGraph, self).__init__() + self.program = Program() if program is None else program + + def all_parameters(self): + return self.program.global_block().all_parameters() + + +class IRGraph(Graph): + pass diff --git a/python/paddle/fluid/contrib/slim/graph/graph_pass.py b/python/paddle/fluid/contrib/slim/graph/graph_pass.py new file mode 100644 index 0000000000000000000000000000000000000000..1db6c4f110daa44be7fcbcc36f47224797b6dc88 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/graph/graph_pass.py @@ -0,0 +1,42 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = ['GraphPass', 'PruneParameterPass'] + + +class GraphPass(object): + """ + Base class for all graph pass. + """ + + def __init__(self): + pass + + def apply(self, graph): + pass + + +class PruneParameterPass(GraphPass): + """ + Generate a graph for pruning parameters from target graph. + """ + + def __init__(self, pruned_params, thresholds): + super(PruneParameterPass, self).__init__() + self.pruned_params = pruned_params + self.thresholds = thresholds + self.default_threshold = thresholds['*'] + + def apply(self, graph): + pass diff --git a/python/paddle/fluid/contrib/slim/prune/__init__.py b/python/paddle/fluid/contrib/slim/prune/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..764a45bb130a9993015858f1cbdbc9f3b864bd5e --- /dev/null +++ b/python/paddle/fluid/contrib/slim/prune/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import pruner +from .pruner import * +from . import prune_strategy +from .prune_strategy import * + +__all__ = pruner.__all__ +__all__ += prune_strategy.__all__ diff --git a/python/paddle/fluid/contrib/slim/prune/prune_strategy.py b/python/paddle/fluid/contrib/slim/prune/prune_strategy.py new file mode 100644 index 0000000000000000000000000000000000000000..34c5107daa3cde10e7995902be37e34e19664da8 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/prune/prune_strategy.py @@ -0,0 +1,66 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ..core.strategy import Strategy +from ....framework import Program, program_guard +from .... import layers +import numpy as np + +__all__ = ['SensitivePruneStrategy', 'PruneStrategy'] + + +class SensitivePruneStrategy(Strategy): + def __init__(self, + pruner=None, + start_epoch=0, + end_epoch=10, + delta_rate=0.20, + acc_loss_threshold=0.2, + sensitivities=None): + super(SensitivePruneStrategy, self).__init__(start_epoch, end_epoch) + self.pruner = pruner + self.delta_rate = delta_rate + self.acc_loss_threshold = acc_loss_threshold + self.sensitivities = sensitivities + + +class PruneStrategy(Strategy): + """ + The strategy that pruning weights by threshold or ratio iteratively. + """ + + def __init__(self, + pruner, + mini_batch_pruning_frequency=1, + start_epoch=0, + end_epoch=10): + super(PruneStrategy, self).__init__(start_epoch, end_epoch) + self.pruner = pruner + self.mini_batch_pruning_frequency = mini_batch_pruning_frequency + + def _triger(self, context): + return (context.batch_id % self.mini_batch_pruning_frequency == 0 and + self.start_epoch <= context.epoch_id < self.end_epoch) + + def on_batch_end(self, context): + if self._triger(context): + prune_program = Program() + with program_guard(prune_program): + for param in context.graph.all_parameters(): + prune_program.global_block().clone_variable(param) + p = prune_program.global_block().var(param.name) + zeros_mask = self.pruner.prune(p) + pruned_param = p * zeros_mask + layers.assign(input=pruned_param, output=param) + context.program_exe.run(prune_program, scope=context.scope) diff --git a/python/paddle/fluid/contrib/slim/prune/pruner.py b/python/paddle/fluid/contrib/slim/prune/pruner.py new file mode 100644 index 0000000000000000000000000000000000000000..ca72bcb6f6004c18f3ec794850e0aeaecb92d7ac --- /dev/null +++ b/python/paddle/fluid/contrib/slim/prune/pruner.py @@ -0,0 +1,83 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from .... import layers + +__all__ = ['Pruner', 'MagnitudePruner', 'RatioPruner'] + + +class Pruner(object): + """ + Base class of all pruners. + """ + + def __init__(self): + pass + + def prune(self, param): + pass + + +class MagnitudePruner(Pruner): + """ + Pruner used to pruning a parameter by threshold. + """ + + def __init__(self, threshold): + self.threshold = threshold + + def prune(self, param, threshold=None): + if threshold is None: + thres = layers.fill_constant( + shape=[1], dtype='float32', value=self.threshold) + else: + thres = threshold + zeros_mask = layers.less_than(x=param, y=thres) + return zeros_mask + + +class RatioPruner(Pruner): + """ + Pruner used to pruning a parameter by ratio. + """ + + def __init__(self, ratios=None): + """ + Args: + ratios: dict with pair (paramer_name, pruned_ratio). + """ + self.ratios = ratios + + def prune(self, param, ratio=None): + """ + Args: + ratio: `ratio=40%` means pruning (1 - 40%) weights to zero. + """ + if ratio is None: + rat = self.ratios[ + param.name] if param.name in self.ratios else self.ratios['*'] + else: + rat = ratio + if rat < 1.0: + k = max(int(rat * np.prod(param.shape)), 1) + param_vec = layers.reshape(x=param, shape=[1, -1]) + param_topk, _ = layers.topk(param_vec, k=k) + threshold = layers.slice( + param_topk, axes=[1], starts=[-1], ends=[k]) + threshold = layers.reshape(x=threshold, shape=[1]) + zeros_mask = layers.less_than(x=param, y=threshold) + else: + zeros_mask = layers.ones(param.shape) + return zeros_mask diff --git a/python/paddle/fluid/contrib/slim/unitest/__init__.py b/python/paddle/fluid/contrib/slim/unitest/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6d41233e227dc7bab94ee4284cc25e12b45bf469 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/unitest/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/paddle/fluid/contrib/slim/unitest/configs/config.yaml b/python/paddle/fluid/contrib/slim/unitest/configs/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db488b96330210df15b02b19d90abd5c9101f844 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/unitest/configs/config.yaml @@ -0,0 +1,29 @@ +version: 1.0 +include: ["./unitest/configs/pruners.yaml", "./unitest/configs/pruners_0.yaml"] +pruners: + pruner_1: + class: 'RatioPruner' + ratios: + 'conv1_1.w': 0.3 + 'conv1_2.w': 0.4 + '*': 0.9 + group_dims: + '*': [1, 2, 3] + criterions: + '*': 'l1-norm' +strategies: + strategy_1: + class: 'SensitivePruneStrategy' + pruner: 'pruner_2' + start_epoch: 0 + end_epoch: 10 + delta_rate: 0.20 + acc_loss_threshold: 0.2 + sensitivities: + 'conv1_1.w': 0.4 + +compress_pass: + class: 'CompressPass' + epoch: 100 + strategies: + - strategy_1 diff --git a/python/paddle/fluid/contrib/slim/unitest/configs/pruners.yaml b/python/paddle/fluid/contrib/slim/unitest/configs/pruners.yaml new file mode 100644 index 0000000000000000000000000000000000000000..235092c595bf7c653221c7fe2b381fecf487fa49 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/unitest/configs/pruners.yaml @@ -0,0 +1,12 @@ +version: 1.0 +pruners: + pruner_2: + class: 'RatioPruner' + ratios: + 'conv1_1.w': 0.5 + 'conv1_2.w': 0.2 + '*': 0.7 + group_dims: + '*': [1, 2, 3] + criterions: + '*': 'l1-norm' diff --git a/python/paddle/fluid/contrib/slim/unitest/configs/pruners_0.yaml b/python/paddle/fluid/contrib/slim/unitest/configs/pruners_0.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd2ef9eb56ddbc1367ce2e3b413372fbcd542bde --- /dev/null +++ b/python/paddle/fluid/contrib/slim/unitest/configs/pruners_0.yaml @@ -0,0 +1,12 @@ +version: 1.0 +pruners: + pruner_3: + class: 'RatioPruner' + ratios: + 'conv1_1.w': 0.5 + 'conv1_2.w': 0.2 + '*': 0.7 + group_dims: + '*': [1, 2, 3] + criterions: + '*': 'l1-norm' diff --git a/python/paddle/fluid/contrib/slim/unitest/test_factory.py b/python/paddle/fluid/contrib/slim/unitest/test_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..07f28aac905d1a2813dbde6143235c7916fd9278 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/unitest/test_factory.py @@ -0,0 +1,41 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle.fluid.contrib.slim import ConfigFactory +import unittest + + +class TestFactory(unittest.TestCase): + def test_parse(self): + factory = ConfigFactory('./unitest/configs/config.yaml') + + pruner = factory.instance('pruner_1') + self.assertEquals(pruner.ratios['conv1_1.w'], 0.3) + + pruner = factory.instance('pruner_2') + self.assertEquals(pruner.ratios['*'], 0.7) + + strategy = factory.instance('strategy_1') + pruner = strategy.pruner + self.assertEquals(pruner.ratios['*'], 0.7) + + compress_pass = factory.get_compress_pass() + self.assertEquals(compress_pass.epoch, 100) + + strategy = compress_pass.strategies[0] + self.assertEquals(strategy.delta_rate, 0.2) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index 13d2893fd146b5a3d9100ee1ba6c2243cb9c411b..af02721eb72c1d0f8aa3d7ab8db504c4c33b64d5 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -44,6 +44,8 @@ class DataToLoDTensorConverter(object): self.dtype = 'int64' elif dtype == core.VarDesc.VarType.FP64: self.dtype = 'float64' + elif dtype == core.VarDesc.VarType.FP16: + self.dtype = 'float16' elif dtype == core.VarDesc.VarType.INT32: self.dtype = 'int32' elif dtype == core.VarDesc.VarType.UINT8: diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index f2886090d75f87654b33cf7aa6f98ebf6f2e27d1..5a9e908b61eeeea3fdfdfcc54d1f150f59a3973b 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -191,7 +191,7 @@ def _fetch_var(name, scope=None, return_numpy=True): assert isinstance(name, str) if scope is None: scope = global_scope() - assert isinstance(scope, core.Scope) + assert isinstance(scope, core._Scope) var = scope.find_var(name) assert var is not None, ( diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index de30ed2fc5858187d2ecede299832701304e4198..3427fb0c4ae4dad0323b39cf85bfb5b04f796996 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -20,6 +20,7 @@ import os import re import six import sys +import traceback import numpy as np @@ -604,6 +605,10 @@ class Operator(object): if role_var_name in op_attrs and len(op_attrs[role_var_name]) == 0: del op_attrs[role_var_name] + callstack_var_name = op_maker.kOpCreationCallstackAttrName() + op_attrs[callstack_var_name] = list( + reversed(traceback.format_stack()))[1:] + if len(self.desc.type()) != 0: return if type is None: diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index b37ebbe5179ba6e36be70ff936cb8a3ca0d89d13..26d1f8f4d2bd67a35c4ec96a025ee273cec4dbd1 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -18,6 +18,7 @@ from . import framework import numpy as np import contextlib from .core import VarDesc +from . import unique_name __all__ = [ 'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier', 'Bilinear', @@ -207,16 +208,39 @@ class UniformInitializer(Initializer): # Initialization Ops should be prepended and not appended if self._seed == 0: self._seed = block.program.random_seed + + # to be compatible of fp16 initalizers + if var.dtype == VarDesc.VarType.FP16: + out_dtype = VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate(".".join(['gaussian_random', 'tmp'])), + shape=var.shape, + dtype=out_dtype, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False) + else: + out_dtype = var.dtype + out_var = var + op = block._prepend_op( type="uniform_random", - outputs={"Out": var}, + outputs={"Out": out_var}, attrs={ "shape": var.shape, - "dtype": int(var.dtype), + "dtype": out_dtype, "min": self._low, "max": self._high, "seed": self._seed }) + + if var.dtype == VarDesc.VarType.FP16: + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, + "out_dtype": var.dtype}) + var.op = op return op @@ -261,17 +285,39 @@ class NormalInitializer(Initializer): # Initialization Ops should be prepended and not appended if self._seed == 0: self._seed = block.program.random_seed + + # to be compatible of fp16 initalizers + if var.dtype == VarDesc.VarType.FP16: + out_dtype = VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate(".".join(['gaussian_random', 'tmp'])), + shape=var.shape, + dtype=out_dtype, + type=VarDesc.VarType.LOD_TENSOR, + persistable=False) + else: + out_dtype = var.dtype + out_var = var + op = block._prepend_op( type="gaussian_random", - outputs={"Out": var}, + outputs={"Out": out_var}, attrs={ "shape": var.shape, - "dtype": int(var.dtype), + "dtype": out_dtype, "mean": self._mean, "std": self._std_dev, "seed": self._seed, "use_mkldnn": False }) + + if var.dtype == VarDesc.VarType.FP16: + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, + "out_dtype": var.dtype}) var.op = op return op diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index bdfcc8c4e2604fca9e93c5bc35a31a75db2cf78e..cc1fdbd285611379cc4fa44d2373748aa6e24faf 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -2801,6 +2801,10 @@ def batch_norm(input, helper = LayerHelper('batch_norm', **locals()) dtype = helper.input_dtype() + # use fp32 for bn parameter + if dtype == core.VarDesc.VarType.FP16: + dtype = core.VarDesc.VarType.FP32 + input_shape = input.shape if data_layout == 'NCHW': channel_num = input_shape[1] @@ -2835,7 +2839,7 @@ def batch_norm(input, trainable=False, do_model_average=do_model_average_for_mean_and_var), shape=param_shape, - dtype=input.dtype) + dtype=dtype) mean.stop_gradient = True variance = helper.create_parameter( @@ -2845,7 +2849,7 @@ def batch_norm(input, trainable=False, do_model_average=do_model_average_for_mean_and_var), shape=param_shape, - dtype=input.dtype) + dtype=dtype) variance.stop_gradient = True # create output @@ -4526,7 +4530,7 @@ def topk(input, k, name=None): Args: input(Variable): The input variable which can be a vector or Tensor with higher rank. - k(int): The number of top elements to look for along the last dimension + k(int | Variable): The number of top elements to look for along the last dimension of input. name(str|None): A name for this layer(optional). If set None, the layer will be named automatically. @@ -4549,12 +4553,18 @@ def topk(input, k, name=None): helper = LayerHelper("top_k", **locals()) values = helper.create_variable_for_type_inference(dtype=input.dtype) indices = helper.create_variable_for_type_inference(dtype="int64") + inputs = {"X": [input]} + attrs = None + if isinstance(k, Variable): + inputs['K'] = k + else: + attrs = {'k': k} helper.append_op( type="top_k", - inputs={"X": [input]}, + inputs=inputs, outputs={"Out": [values], "Indices": [indices]}, - attrs={"k": k}) + attrs=attrs) values.stop_gradient = True indices.stop_gradient = True return values, indices @@ -7943,7 +7953,7 @@ def unstack(x, axis=0, num=None): num = x.shape[axis] outs = [] - for _ in num: + for _ in range(num): outs.append(helper.create_variable_for_type_inference(x.dtype)) helper.append_op( diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py new file mode 100644 index 0000000000000000000000000000000000000000..835376ffe78f9119a9be6c379998e3a3b50aab43 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ngraph/test_fill_constant_ngraph_op.py @@ -0,0 +1,37 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +from paddle.fluid.tests.unittests.test_fill_constant_op import TestFillConstantOp1, TestFillConstantOp2, TestFillConstantOpWithSelectedRows + + +class TestNGRAPHFillConstantOp1(TestFillConstantOp1): + def setUp(self): + super(TestNGRAPHFillConstantOp1, self).setUp() + + +class TestNGRAPHFillConstantOp2(TestFillConstantOp2): + def setUp(self): + super(TestNGRAPHFillConstantOp2, self).setUp() + + +class TestNGRAPHFillConstantOpWithSelectedRows( + TestFillConstantOpWithSelectedRows): + def setUp(self): + super(TestFillConstantOpWithSelectedRows, self).setUp() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py b/python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py new file mode 100644 index 0000000000000000000000000000000000000000..3a0171087dce5d4c7b72eca7f7e4fb955af94812 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ngraph/test_top_k_ngraph_op.py @@ -0,0 +1,41 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function + +import unittest +from paddle.fluid.tests.unittests.test_top_k_op import TestTopkOp, TestTopkOp3d, TestTopkOp2, TestTopkOp3, TestTopkOp4 + + +class TestNGRAPHTopkOp(TestTopkOp): + def setUp(self): + super(TestNGRAPHTopkOp, self).setUp() + + +class TestNGRAPHTopkOp2(TestTopkOp2): + def setUp(self): + super(TestNGRAPHTopkOp2, self).setUp() + + +class TestNGRAPHTopkOp3(TestTopkOp3): + def setUp(self): + super(TestNGRAPHTopkOp3, self).setUp() + + +class TestNGRAPHTopkOp4(TestTopkOp4): + def setUp(self): + super(TestNGRAPHTopkOp4, self).setUp() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index 76a707efdc0804be0316ab12c347ffed6199529a..0fe836683b029698b670bbb9f9bb258c2f3b68a0 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -368,6 +368,8 @@ class OpTest(unittest.TestCase): place = core.CUDAPlace(0) if core.is_float16_supported(place): return [place] + else: + return [] else: return [] places = [fluid.CPUPlace()] diff --git a/python/paddle/fluid/tests/unittests/test_accuracy_op.py b/python/paddle/fluid/tests/unittests/test_accuracy_op.py index 1b2b53f2d4ce91ae7b5b191ed770b5338f0948c8..5257b0be6f61bc90a6492c44044c122485f4742c 100644 --- a/python/paddle/fluid/tests/unittests/test_accuracy_op.py +++ b/python/paddle/fluid/tests/unittests/test_accuracy_op.py @@ -22,8 +22,10 @@ from op_test import OpTest class TestAccuracyOp(OpTest): def setUp(self): self.op_type = "accuracy" + self.dtype = np.float32 + self.init_dtype() n = 8192 - infer = np.random.random((n, 1)).astype("float32") + infer = np.random.random((n, 1)).astype(self.dtype) indices = np.random.randint(0, 2, (n, 1)) label = np.random.randint(0, 2, (n, 1)) self.inputs = {'Out': infer, 'Indices': indices, "Label": label} @@ -34,14 +36,25 @@ class TestAccuracyOp(OpTest): num_correct += 1 break self.outputs = { - 'Accuracy': np.array([num_correct / float(n)]).astype("float32"), + 'Accuracy': np.array([num_correct / float(n)]).astype(self.dtype), 'Correct': np.array([num_correct]).astype("int32"), 'Total': np.array([n]).astype("int32") } + def init_dtype(self): + pass + def test_check_output(self): self.check_output() +class TestAccuracyOpFp16(TestAccuracyOp): + def init_dtype(self): + self.dtype = np.float16 + + def test_check_output(self): + self.check_output(atol=1e-3) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dequantize_mkldnn_op.py b/python/paddle/fluid/tests/unittests/test_dequantize_mkldnn_op.py new file mode 100644 index 0000000000000000000000000000000000000000..0c5e1abd7c8fb010357998c0ceaebaf21619fda9 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dequantize_mkldnn_op.py @@ -0,0 +1,73 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest + + +class TestDeQuantizeOp(OpTest): + def setUp(self): + self.op_type = 'dequantize' + self.scale = 2.0 + self.input_size = [1, 1, 5, 5] #Naive nChw16c + self.data_type = 'int8' + self.set_scale() + self.set_data_type() + + if self.data_type == 'int8': + input = (np.random.randint(0, 100, self.input_size) - 50 + ).astype(self.data_type) + output = (input * (1 / self.scale)).astype('float') + else: + input = (np.random.randint(0, 100, + self.input_size)).astype(self.data_type) + output = (input * (1 / self.scale)).astype('float') + + self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(input)} + + self.outputs = {'Output': output} + + self.attrs = {'Scale': self.scale, } + + def test_check_output(self): + self.check_output() + + def set_scale(self): + pass + + def set_data_type(OpTest): + pass + + +class TestDeQuantizeOp1(TestDeQuantizeOp): + def set_scale(self): + self.scale = 1.5 + + def set_data_type(self): + self.data_type = 'int8' + + +class TestDeQuantizeOp2(TestDeQuantizeOp): + def set_scale(self): + self.scale = 0.8 + + def set_data_type(self): + self.data_type = 'uint8' + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py index cadaf1df53af0af56afa8c3631b0f5ce390f318c..15d4db590edc9012604361751e9860ba63239bba 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py @@ -21,14 +21,16 @@ from op_test import OpTest class ElementwiseDivOp(OpTest): def setUp(self): self.op_type = "elementwise_div" + self.dtype = np.float32 + self.init_dtype() """ Warning CPU gradient check error! 'X': np.random.random((32,84)).astype("float32"), 'Y': np.random.random((32,84)).astype("float32") """ self.inputs = { - 'X': np.random.uniform(0.1, 1, [13, 17]).astype("float32"), - 'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float32") + 'X': np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype), + 'Y': np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) } self.outputs = {'Out': np.divide(self.inputs['X'], self.inputs['Y'])} @@ -46,6 +48,9 @@ class ElementwiseDivOp(OpTest): self.check_grad( ['X'], 'Out', max_relative_error=0.05, no_grad_set=set('Y')) + def init_dtype(self): + pass + class TestElementwiseDivOp_scalar(ElementwiseDivOp): def setUp(self): @@ -126,5 +131,21 @@ class TestElementwiseDivOp_broadcast_3(ElementwiseDivOp): } +class TestElementwiseDivOpFp16(ElementwiseDivOp): + def init_dtype(self): + self.dtype = np.float16 + + def test_check_grad_normal(self): + self.check_grad(['X', 'Y'], 'Out', max_relative_error=1) + + def test_check_grad_ingore_x(self): + self.check_grad( + ['Y'], 'Out', max_relative_error=1, no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + self.check_grad( + ['X'], 'Out', max_relative_error=1, no_grad_set=set('Y')) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py index 57ba34f833f824d13e0b82caea789f7f57622bc9..04840991883229614c1ca4890e5cec2e7ae21084 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py @@ -135,5 +135,10 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): } +class TestElementwiseMulOpFp16(ElementwiseMulOp): + def init_dtype(self): + self.dtype = np.float16 + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py b/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py index eec73d0beb39c49f535a03532e536092001c8445..20f1a110c35d689064c49efba246f078c3badd33 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op.py @@ -22,12 +22,22 @@ from op_test import OpTest class TestFillZerosLikeOp(OpTest): def setUp(self): self.op_type = "fill_zeros_like" - self.inputs = {'X': np.random.random((219, 232)).astype("float32")} + self.dtype = np.float32 + self.init_dtype() + self.inputs = {'X': np.random.random((219, 232)).astype(self.dtype)} self.outputs = {'Out': np.zeros_like(self.inputs["X"])} + def init_dtype(self): + pass + def test_check_output(self): self.check_output() +class TestFillZerosLikeOpFp16(TestFillZerosLikeOp): + def init_dtype(self): + self.dtype = np.float16 + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_momentum_op.py b/python/paddle/fluid/tests/unittests/test_momentum_op.py index cf4346cf2e7a099334ec273546901a91d0ad925d..77ec6f9b6bcda7568325698634fd4f86557cd1be 100644 --- a/python/paddle/fluid/tests/unittests/test_momentum_op.py +++ b/python/paddle/fluid/tests/unittests/test_momentum_op.py @@ -24,11 +24,13 @@ from op_test import OpTest class TestMomentumOp1(OpTest): def setUp(self): self.op_type = "momentum" + self.dtype = np.float32 + self.init_dtype() - param = np.random.random((123, 321)).astype("float32") - grad = np.random.random((123, 321)).astype("float32") - velocity = np.zeros((123, 321)).astype("float32") - learning_rate = np.array([0.001]).astype("float32") + param = np.random.random((123, 321)).astype(self.dtype) + grad = np.random.random((123, 321)).astype(self.dtype) + velocity = np.zeros((123, 321)).astype(self.dtype) + learning_rate = np.array([0.001]).astype(self.dtype) mu = 0.0001 use_nesterov = False @@ -50,10 +52,21 @@ class TestMomentumOp1(OpTest): self.outputs = {'ParamOut': param_out, 'VelocityOut': velocity_out} + def init_dtype(self): + pass + def test_check_output(self): self.check_output() +class TestMomentumOpFp16(TestMomentumOp1): + def init_dtype(self): + self.dtype = np.float16 + + def test_check_output(self): + self.check_output(atol=1e-3) + + class TestMomentumOp2(OpTest): '''Test Momentum with default values for attributes ''' diff --git a/python/paddle/fluid/tests/unittests/test_operator_desc.py b/python/paddle/fluid/tests/unittests/test_operator_desc.py index 4153394c1da776d0a41e1415a09fa7d6f4b14d6d..37b9a9188ab44df81029ae6d9925ae21c1929cff 100644 --- a/python/paddle/fluid/tests/unittests/test_operator_desc.py +++ b/python/paddle/fluid/tests/unittests/test_operator_desc.py @@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase): set(mul_op.attr_names), set([ "x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var", - "op_namescope" + "op_namescope", "op_callstack" ])) self.assertEqual(mul_op.has_attr("x_num_col_dims"), True) self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT) diff --git a/python/paddle/fluid/tests/unittests/test_py_func_op.py b/python/paddle/fluid/tests/unittests/test_py_func_op.py index 943ad3ed22480193dc51375cdcca5ed36ce35158..655378f7f8c18f5936643f6c178d0e6b3dd8cac8 100644 --- a/python/paddle/fluid/tests/unittests/test_py_func_op.py +++ b/python/paddle/fluid/tests/unittests/test_py_func_op.py @@ -26,7 +26,7 @@ os.environ['CPU_NUM'] = str(dev_cnt) def dummy_func_with_no_input(): - return float(1.0) + return np.array([0], dtype='float32') def dummy_func_with_no_output(x): @@ -105,7 +105,7 @@ def simple_fc_net(img, label, use_py_func_op): name='test_tmp_var', dtype='float32', shape=[1]) fluid.layers.py_func( func=dummy_func_with_no_input, x=None, out=dummy_var) - + loss += dummy_var fluid.layers.py_func(func=dummy_func_with_no_output, x=loss, out=None) loss = fluid.layers.mean(loss) @@ -174,7 +174,7 @@ class TestPyFuncOpUseExecutor(unittest.TestCase): self.assertAlmostEqual(max_diff, 0, delta=1e-3) -class TestPyFuncOpUseParallelExecutor(unittest.TestCase): +class TestPyFuncOpUseParallelExecutor(TestPyFuncOpUseExecutor): def setUp(self): self.use_parallel_executor = True diff --git a/python/paddle/fluid/tests/unittests/test_quantize_mkldnn_op.py b/python/paddle/fluid/tests/unittests/test_quantize_mkldnn_op.py new file mode 100644 index 0000000000000000000000000000000000000000..99607928648be437b7f944f86a0c28b99d1775c4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_quantize_mkldnn_op.py @@ -0,0 +1,76 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest + + +class TestQuantizeOp(OpTest): + def setUp(self): + self.op_type = 'quantize' + self.scale = 2.0 + self.input_size = [1, 1, 5, 5] #Naive nChw16c + self.is_negative = False + self.set_scale() + self.set_is_negative() + + if self.is_negative: + input = (100 * np.random.random_sample(self.input_size) - 50 + ).astype('float32') + output = np.round(input * self.scale).astype('int8') + else: + input = (100 * + np.random.random_sample(self.input_size)).astype('float32') + output = np.round(input * self.scale).astype('uint8') + + self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(input)} + + self.outputs = {'Output': output} + + self.attrs = { + 'Scale': self.scale, + 'is_negative_input': self.is_negative + } + + def test_check_output(self): + self.check_output() + + def set_scale(self): + pass + + def set_is_negative(self): + pass + + +class TestQuantizeOp1(TestQuantizeOp): + def set_scale(self): + self.scale = 1.5 + + def set_is_negative(self): + self.is_nagative = True + + +class TestQuantizeOp2(TestQuantizeOp): + def set_scale(self): + self.scale = 0.1 + + def set_is_negative(self): + self.is_nagative = False + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_top_k_op.py b/python/paddle/fluid/tests/unittests/test_top_k_op.py index 69b29db83a43d18c0825b610642009a0377b9901..9fbf59ed669766077a456b3d83b7162e495ae8ae 100644 --- a/python/paddle/fluid/tests/unittests/test_top_k_op.py +++ b/python/paddle/fluid/tests/unittests/test_top_k_op.py @@ -21,15 +21,22 @@ from op_test import OpTest class TestTopkOp(OpTest): def setUp(self): + self.variable_k = False self.set_args() self.op_type = "top_k" + self.dtype = np.float32 + self.init_dtype() + k = self.top_k - input = np.random.random((self.row, k)).astype("float32") + input = np.random.random((self.row, k)).astype(self.dtype) output = np.ndarray((self.row, k)) indices = np.ndarray((self.row, k)).astype("int64") - self.inputs = {'X': input} - self.attrs = {'k': k} + + if self.variable_k: + self.inputs['K'] = np.array([k]).astype("int32") + else: + self.attrs = {'k': k} for rowid in range(self.row): row = input[rowid] @@ -38,6 +45,9 @@ class TestTopkOp(OpTest): self.outputs = {'Out': output, 'Indices': indices} + def init_dtype(self): + pass + def set_args(self): self.row = 32 self.top_k = 1 @@ -46,6 +56,11 @@ class TestTopkOp(OpTest): self.check_output() +class TestTopkOpFp16(TestTopkOp): + def init_dtype(self): + self.dtype = np.float16 + + class TestTopkOp3d(OpTest): def setUp(self): self.op_type = "top_k" @@ -107,5 +122,12 @@ class TestTopkOp4(TestTopkOp): self.top_k = 1 +class TestTopkOp5(TestTopkOp): + def set_args(self): + self.row = 40000 + self.top_k = 3 + self.variable_k = True + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/transpiler/inference_transpiler.py b/python/paddle/fluid/transpiler/inference_transpiler.py index ccf7af334d091dff5cf6a94e5875cdf582fa0c5c..cc7f5ec90c26c87b7c5514c900e853be9e16d6eb 100644 --- a/python/paddle/fluid/transpiler/inference_transpiler.py +++ b/python/paddle/fluid/transpiler/inference_transpiler.py @@ -57,7 +57,7 @@ class InferenceTranspiler(object): raise TypeError("place should be as CPUPlace/CUDAPlace type") if scope is None: scope = global_scope() - if not isinstance(scope, core.Scope): + if not isinstance(scope, core._Scope): raise TypeError("scope should be as Scope type or None") use_mkldnn = bool(os.getenv("FLAGS_use_mkldnn", False)) diff --git a/python/requirements.txt b/python/requirements.txt index 2f81d85df0626b294f4d861706b5c1b7ec9841d5..03d5e33e88cd5f1138ca8f6a6e885d6acfbc260e 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -9,3 +9,5 @@ Pillow nltk>=3.2.2 graphviz six +funcsigs +pyyaml diff --git a/python/setup.py.in b/python/setup.py.in index bfcd47f5b0a6c2ede8d3e4d25e97ed809360cbca..c9afe6c885658b88ac520aad2e7b13facda02a92 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -109,6 +109,10 @@ packages=['paddle', 'paddle.fluid.contrib', 'paddle.fluid.contrib.decoder', 'paddle.fluid.contrib.quantize', + 'paddle.fluid.contrib.slim', + 'paddle.fluid.contrib.slim.core', + 'paddle.fluid.contrib.slim.graph', + 'paddle.fluid.contrib.slim.prune', 'paddle.fluid.contrib.utils', 'paddle.fluid.transpiler', 'paddle.fluid.transpiler.details']