diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 256693528960a9747f89df2c553102322ff1cbeb..c870efb16bc903d7415a47496fe758d578435848 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -160,6 +160,12 @@ paddle.fluid.layers.relu ArgSpec(args=['x', 'name'], varargs=None, keywords=None paddle.fluid.layers.log ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.elu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None)) +paddle.fluid.layers.relu6 ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None)) +paddle.fluid.layers.pow ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None)) +paddle.fluid.layers.stanh ArgSpec(args=['x', 'scale_a', 'scale_b', 'name'], varargs=None, keywords=None, defaults=(0.6666666666666666, 1.7159, None)) +paddle.fluid.layers.hard_sigmoid ArgSpec(args=['x', 'slope', 'offset', 'name'], varargs=None, keywords=None, defaults=(0.2, 0.5, None)) +paddle.fluid.layers.swish ArgSpec(args=['x', 'beta', 'name'], varargs=None, keywords=None, defaults=(1.0, None)) paddle.fluid.layers.prelu ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.layers.brelu ArgSpec(args=['x', 't_min', 't_max', 'name'], varargs=None, keywords=None, defaults=(0.0, 24.0, None)) paddle.fluid.layers.leaky_relu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(0.02, None)) @@ -260,12 +266,6 @@ paddle.fluid.layers.slice ArgSpec(args=[], varargs='args', keywords='kwargs', de paddle.fluid.layers.shape ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.softshrink ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.elu ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.relu6 ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.pow ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.stanh ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.hard_sigmoid ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.swish ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.exp ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) diff --git a/paddle/fluid/framework/details/cow_ptr.h b/paddle/fluid/framework/details/cow_ptr.h index 4fb015b0ffe27e6fa91d4eaf373fca4feca66361..21f75957be5f33f3dfc09c41fa9a1e1ca590f99e 100644 --- a/paddle/fluid/framework/details/cow_ptr.h +++ b/paddle/fluid/framework/details/cow_ptr.h @@ -20,41 +20,79 @@ namespace paddle { namespace framework { namespace details { -template -class COWPtr { +// Change it to thread safe flags if needed. +class ThreadUnsafeOwnershipFlags { public: - typedef std::shared_ptr RefPtr; + explicit ThreadUnsafeOwnershipFlags(bool flag) : flag_(flag) {} - private: - RefPtr m_sp; + ThreadUnsafeOwnershipFlags(const ThreadUnsafeOwnershipFlags& other) = delete; + ThreadUnsafeOwnershipFlags& operator=( + const ThreadUnsafeOwnershipFlags& other) = delete; + ThreadUnsafeOwnershipFlags(ThreadUnsafeOwnershipFlags&& other) = default; - void detach() { - T* tmp = m_sp.get(); - if (!(tmp == nullptr || m_sp.unique())) { - m_sp = RefPtr(new T(*tmp)); + void SetOwnership(bool flag) { flag_ = flag; } + + // Invoke the callback if it is not owned. + template + void AcquireOwnershipOnce(Callback acquire) { + if (!flag_) { + acquire(); + flag_ = true; } } - public: - COWPtr() : m_sp(nullptr) {} - explicit COWPtr(T* t) : m_sp(t) {} - explicit COWPtr(const RefPtr& refptr) : m_sp(refptr) {} + private: + bool flag_; +}; - const T& Data() const { return operator*(); } +// Copy-On-Write pointer. +// It will hold a T* pointer, and only copy once when `MutableData` is invoked. +// +// The template parameter OwnershipFlags should have: +// * a constructor takes a bool. True if own. +// * SetOwnership(bool flag). +// * AcquireOwnershipOnce(Callback). It will invoke the callback if it is not +// owned. +// +// https://en.wikipedia.org/wiki/Copy-on-write +template +class COWPtr { + public: + // Ctor from raw pointer. + explicit COWPtr(T* ptr) : payload_(ptr), ownership_{true} {} - T* MutableData() { return operator->(); } + // Move methods. Steal ownership from origin + COWPtr(COWPtr&& other) + : payload_(other.payload_), ownership_{std::move(other.ownership_)} {} + COWPtr& operator=(COWPtr&& origin) = default; - const T& operator*() const { return *m_sp; } - T& operator*() { - detach(); - return *m_sp; + // Copy methods. Not own payload + COWPtr(const COWPtr& other) : payload_(other.payload_), ownership_{false} {} + COWPtr& operator=(const COWPtr& other) { + payload_ = other.payload_; + ownership_.SetOwnership(false); + return *this; } - const T* operator->() const { return m_sp.operator->(); } - T* operator->() { - detach(); - return m_sp.operator->(); + + // Access read only data. + const T& Data() const { return *payload_; } + + // Access mutable data. If the data is not owned, the data will be copied + // before. + T* MutableData() { + ownership_.AcquireOwnershipOnce( + [this] { payload_.reset(new T(*payload_)); }); + return payload_.get(); } + + private: + // Actual data pointer. + std::shared_ptr payload_; + + // Ownership flag. + OwnershipFlags ownership_; }; + } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/cow_ptr_test.cc b/paddle/fluid/framework/details/cow_ptr_test.cc index 5b055d7cb4d127dc20f2cf70869134f24a93d429..d2142af277c0b356d83941b3baab1947cce31dac 100644 --- a/paddle/fluid/framework/details/cow_ptr_test.cc +++ b/paddle/fluid/framework/details/cow_ptr_test.cc @@ -30,14 +30,6 @@ TEST(COWPtr, all) { ASSERT_EQ(ptr2.Data(), 10); } -TEST(COWPtr, change_old) { - COWPtr ptr(new int{0}); - COWPtr ptr2 = ptr; - *ptr.MutableData() = 10; - ASSERT_EQ(ptr2.Data(), 0); - ASSERT_EQ(ptr.Data(), 10); -} - } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/reference_count_op_handle.h b/paddle/fluid/framework/details/reference_count_op_handle.h index 71db8d952f4c205b875ad254dc19c0c1f74e61b3..fc479a4c4a1e7d5c824d3c202e0cccf743dd52c9 100644 --- a/paddle/fluid/framework/details/reference_count_op_handle.h +++ b/paddle/fluid/framework/details/reference_count_op_handle.h @@ -22,6 +22,7 @@ #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/garbage_collector.h" #include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/tensor.h" namespace paddle { @@ -46,17 +47,15 @@ class ReferenceCountOpHandle : public OpHandleBase { const std::vector &var_names, GarbageCollector *gc, AtomicReferenceCountMap *ref_cnts) - : OpHandleBase(node), - scope_(scope), - var_names_(var_names), - gc_(gc), - ref_cnts_(ref_cnts) { + : OpHandleBase(node), scope_(scope), gc_(gc), ref_cnts_(ref_cnts) { dev_ctx_ = static_cast( platform::DeviceContextPool::Instance().Get(place)); if (IsStreamGarabageCollector()) { PADDLE_ENFORCE(cudaSetDevice(place.device)); PADDLE_ENFORCE(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming)); } + + for (auto &name : var_names) AddVar(name); } ~ReferenceCountOpHandle() { @@ -69,19 +68,35 @@ class ReferenceCountOpHandle : public OpHandleBase { std::string Name() const override { return "reference_count"; } + void AddVar(const std::string &name) { + auto it = var_names_.find(name); + if (it != var_names_.end()) + ++(it->second); + else + var_names_[name] = 1; + } + protected: void RunImpl() override { auto *exec_scope = scope_->FindVar(kLocalExecScopeName)->Get(); - std::vector tensors; - for (auto &name : var_names_) { + std::vector tensors; + for (auto &pair : var_names_) { + auto &name = pair.first; auto it = ref_cnts_->find(name); if (it == ref_cnts_->end()) continue; auto *var = exec_scope->FindVar(name); - if (var == nullptr || !var->IsType()) continue; - - if (it->second.fetch_sub(1) <= 1) { - tensors.emplace_back(var->GetMutable()); + if (var == nullptr) continue; + + if (var->IsType()) { + if (it->second.fetch_sub(pair.second) <= pair.second) { + tensors.emplace_back(var->GetMutable()); + } + } else if (var->IsType()) { + if (it->second.fetch_sub(pair.second) <= pair.second) { + tensors.emplace_back( + var->GetMutable()->mutable_value()); + } } } @@ -91,7 +106,7 @@ class ReferenceCountOpHandle : public OpHandleBase { } private: - void ClearTensors(const std::vector &tensors) { + void ClearTensors(const std::vector &tensors) { auto *gc = dynamic_cast *>(gc_); if (gc != nullptr) { auto compute_stream = dev_ctx_->stream(); @@ -112,7 +127,7 @@ class ReferenceCountOpHandle : public OpHandleBase { const Scope *scope_; platform::CUDADeviceContext *dev_ctx_; - std::vector var_names_; + std::unordered_map var_names_; GarbageCollector *gc_; // not own AtomicReferenceCountMap *ref_cnts_; // not own cudaEvent_t event_; diff --git a/paddle/fluid/framework/details/reference_count_pass.cc b/paddle/fluid/framework/details/reference_count_pass.cc index 344754d5a1e119c04cae08ad50126924b5824315..b1ce551ce73de33bcede187c72feebad6e2fa1a5 100644 --- a/paddle/fluid/framework/details/reference_count_pass.cc +++ b/paddle/fluid/framework/details/reference_count_pass.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include @@ -23,6 +24,25 @@ namespace paddle { namespace framework { namespace details { +static ComputationOpHandle *FindNextComputationOpHandle(VarHandle *var_in) { + std::queue queue; + queue.push(var_in); + do { + auto *var = queue.front(); + queue.pop(); + for (auto *op : var->PendingOps()) { + auto *compute_op = dynamic_cast(op); + if (compute_op != nullptr && compute_op->GetPlace() == var_in->place_) { + return compute_op; + } + for (auto *out_var : op->Outputs()) { + queue.push(out_var); + } + } + } while (!queue.empty()); + return nullptr; +} + std::unique_ptr ReferenceCountPass::ApplyImpl( std::unique_ptr graph) const { auto &ref_cnts = Get(kGlobalReferenceCount); @@ -34,6 +54,9 @@ std::unique_ptr ReferenceCountPass::ApplyImpl( // Step 2: Find all variables in non-computation ops which refers to variables // in computation ops std::unordered_set names; + std::unordered_map> + compute_ref_cnt_map; + auto get_ref_cnts_from_compute_op = [&]( const std::unique_ptr &op, const std::vector &vars) { @@ -54,15 +77,18 @@ std::unique_ptr ReferenceCountPass::ApplyImpl( VarDesc *var_desc = var_handle->Node()->Var(); auto var_name = var_handle->Node()->Name(); - // This is wierd but there is really some variables without var_desc + // This is weird but there is really some variables without var_desc // in computation_op if (var_desc == nullptr) { if (compute_op->Node()->Op()->Block()->FindVar(var_name) == nullptr) continue; } else { - if (var_desc->Persistable() || - var_desc->Proto()->type().type() != proto::VarType::LOD_TENSOR) + if (var_desc->Persistable()) continue; + auto var_type = var_desc->Proto()->type().type(); + if (var_type != proto::VarType::LOD_TENSOR && + var_type != proto::VarType::SELECTED_ROWS) { continue; + } } // compute op only runs in one device @@ -93,12 +119,33 @@ std::unique_ptr ReferenceCountPass::ApplyImpl( if (ref_cnts.count(place.device) && ref_cnts[place.device]->count(var_name)) { ++(*ref_cnts[place.device])[var_name]; + + auto *next_compute_op = FindNextComputationOpHandle(var_handle); + if (next_compute_op != nullptr) { + if (compute_ref_cnt_map.count(next_compute_op)) { + compute_ref_cnt_map[next_compute_op]->AddVar(var_name); + VLOG(5) << "Add reference count of " << var_name << " to Operator " + << next_compute_op->Name(); + } else { + // Create new reference_count_op_handle + ir::Node *ref_cnt_node = graph->CreateEmptyNode( + "reference_count", ir::Node::Type::kOperation); + auto *ref_cnt_handle = new ReferenceCountOpHandle( + ref_cnt_node, next_compute_op->GetScope(), place, {var_name}, + gcs[place.device].get(), cur_ref_cnts[place.device].get()); + if (next_compute_op->Outputs().empty()) { + auto *dep_var = new DummyVarHandle(graph->CreateControlDepVar()); + next_compute_op->AddOutput(dep_var); + graph->Get(kGraphDepVars).emplace(dep_var); + } + ref_cnt_handle->AddInput(next_compute_op->Outputs().front()); + compute_ref_cnt_map[next_compute_op].reset(ref_cnt_handle); + } + } } } }; - std::unordered_map - compute_ref_cnt_map; auto &all_ops = graph->Get(kGraphOps); for (auto &op : all_ops) { auto in_var_names = get_ref_cnts_from_compute_op(op, op->Inputs()); @@ -113,11 +160,13 @@ std::unique_ptr ReferenceCountPass::ApplyImpl( auto *ref_cnt_handle = new ReferenceCountOpHandle( ref_cnt_node, compute_op->GetScope(), place, in_var_names, gcs[place.device].get(), cur_ref_cnts[place.device].get()); - auto *dep_var = new DummyVarHandle(graph->CreateControlDepVar()); - compute_op->AddOutput(dep_var); - ref_cnt_handle->AddInput(dep_var); - graph->Get(kGraphDepVars).emplace(dep_var); - compute_ref_cnt_map[compute_op] = ref_cnt_handle; + if (compute_op->Outputs().empty()) { + auto *dep_var = new DummyVarHandle(graph->CreateControlDepVar()); + compute_op->AddOutput(dep_var); + graph->Get(kGraphDepVars).emplace(dep_var); + } + ref_cnt_handle->AddInput(compute_op->Outputs().front()); + compute_ref_cnt_map[compute_op].reset(ref_cnt_handle); } for (auto &op : all_ops) { @@ -131,7 +180,11 @@ std::unique_ptr ReferenceCountPass::ApplyImpl( new_all_ops.emplace_back(std::move(op)); auto it = compute_ref_cnt_map.find(new_all_ops.back().get()); if (it != compute_ref_cnt_map.end()) { - new_all_ops.emplace_back(it->second); + // Add LeafNode to ReferenceCountOpHandle + auto *dummy_leaf = new DummyVarHandle(graph->CreateControlDepVar()); + graph->Get(kGraphDepVars).emplace(dummy_leaf); + it->second->AddOutput(dummy_leaf); + new_all_ops.emplace_back(std::move(it->second)); } } diff --git a/paddle/fluid/framework/mixed_vector.h b/paddle/fluid/framework/mixed_vector.h index ba2c41eb8968e30bc8a801f4d8d239da8c522de9..7836ecb1272a07a79a70c9cb040335f9a42e5684 100644 --- a/paddle/fluid/framework/mixed_vector.h +++ b/paddle/fluid/framework/mixed_vector.h @@ -17,12 +17,10 @@ #include #include #include -#include #include -#include "paddle/fluid/framework/details/cow_ptr.h" + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/memory/memcpy.h" #include "glog/logging.h" @@ -30,401 +28,206 @@ namespace paddle { namespace framework { #if defined(PADDLE_WITH_CUDA) -namespace details { -struct CUDABuffer { - void *data_{nullptr}; - size_t size_{0}; - platform::CUDAPlace place_; - - CUDABuffer() {} - CUDABuffer(platform::Place place, size_t size) - : size_(size), place_(boost::get(place)) { - data_ = memory::Alloc(place_, size); - } - - ~CUDABuffer() { ClearMemory(); } - - CUDABuffer(const CUDABuffer &o) = delete; - CUDABuffer &operator=(const CUDABuffer &o) = delete; - - void Resize(platform::Place place, size_t size) { - ClearMemory(); - place_ = boost::get(place); - data_ = memory::Alloc(place_, size); - size_ = size; - } - - void Swap(CUDABuffer &o) { - std::swap(data_, o.data_); - std::swap(place_, o.place_); - std::swap(size_, o.size_); - } - - private: - void ClearMemory() const { - if (data_) { - memory::Free(place_, data_); - } - } -}; -} // namespace details - // Vector implements the std::vector interface, and can get Data or // MutableData from any place. The data will be synced implicitly inside. template class Vector { public: using value_type = T; - using iterator = typename std::vector::iterator; - using const_iterator = typename std::vector::const_iterator; - - private: - // The actual class to implement vector logic - class VectorData { - public: - VectorData() : flag_(kDataInCPU) {} - VectorData(size_t count, const T &value) - : cpu_(count, value), flag_(kDataInCPU) {} - VectorData(std::initializer_list init) : cpu_(init), flag_(kDataInCPU) {} - template - explicit VectorData(const std::vector &dat) - : cpu_(dat), flag_(kDataInCPU) {} - - VectorData(const VectorData &o) { - o.ImmutableCPU(); - cpu_ = o.cpu_; - flag_ = kDataInCPU; - } - - VectorData &operator=(const VectorData &o) { - o.ImmutableCPU(); - cpu_ = o.cpu_; - flag_ = kDataInCPU; - details::CUDABuffer null; - gpu_.Swap(null); - return *this; - } - - T &operator[](size_t i) { - MutableCPU(); - return cpu_[i]; - } - - const T &operator[](size_t i) const { - ImmutableCPU(); - return cpu_[i]; - } - - size_t size() const { return cpu_.size(); } - - iterator begin() { - MutableCPU(); - return cpu_.begin(); - } - - iterator end() { - MutableCPU(); - return cpu_.end(); - } - - T &front() { - MutableCPU(); - return cpu_.front(); - } - - T &back() { - MutableCPU(); - return cpu_.back(); - } - - const_iterator begin() const { - ImmutableCPU(); - return cpu_.begin(); - } - - const_iterator end() const { - ImmutableCPU(); - return cpu_.end(); - } - - const T &back() const { - ImmutableCPU(); - return cpu_.back(); - } - - T *data() { return &(*this)[0]; } - - const T *data() const { return &(*this)[0]; } - - const T &front() const { - ImmutableCPU(); - return cpu_.front(); - } - - // assign this from iterator. - // NOTE: the iterator must support `end-begin` - template - void assign(Iter begin, Iter end) { - MutableCPU(); - cpu_.assign(begin, end); - } - - // push_back. If the previous capacity is not enough, the memory will - // double. - void push_back(T elem) { - MutableCPU(); - cpu_.push_back(elem); - } - - // extend a vector by iterator. - // NOTE: the iterator must support end-begin - template - void Extend(It begin, It end) { - MutableCPU(); - auto out_it = std::back_inserter>(this->cpu_); - std::copy(begin, end, out_it); - } - - // resize the vector - void resize(size_t size) { - MutableCPU(); - cpu_.resize(size); - } - - // get cuda ptr. immutable - const T *CUDAData(platform::Place place) const { - PADDLE_ENFORCE(platform::is_gpu_place(place), - "CUDA Data must on CUDA place"); - ImmutableCUDA(place); - return reinterpret_cast(gpu_.data_); - } - - // get cuda ptr. mutable - T *CUDAMutableData(platform::Place place) { - const T *ptr = CUDAData(place); - flag_ = kDirty | kDataInCUDA; - return const_cast(ptr); - } - - // clear - void clear() { - cpu_.clear(); - flag_ = kDirty | kDataInCPU; - } - - size_t capacity() const { return cpu_.capacity(); } - - // reserve data - void reserve(size_t size) { cpu_.reserve(size); } - - // implicit cast operator. Vector can be cast to std::vector implicitly. - operator std::vector() const { - ImmutableCPU(); - return cpu_; - } - - bool operator==(const VectorData &other) const { - ImmutableCPU(); - other.ImmutableCPU(); - return cpu_ == other.cpu_; - } - - private: - enum DataFlag { - kDataInCPU = 0x01, - kDataInCUDA = 0x02, - // kDirty means the data has been changed in one device. - kDirty = 0x10 - }; - - void CopyToCPU() const { - // COPY GPU Data To CPU - void *src = gpu_.data_; - void *dst = cpu_.data(); - memory::Copy(platform::CPUPlace(), dst, gpu_.place_, src, gpu_.size_, - nullptr); - } - - void MutableCPU() { - if (IsInCUDA() && IsDirty()) { - CopyToCPU(); - } - flag_ = kDirty | kDataInCPU; - } - - void ImmutableCUDA(platform::Place place) const { - if (IsDirty()) { - if (IsInCPU()) { - CopyCPUDataToCUDA(place); - UnsetFlag(kDirty); - SetFlag(kDataInCUDA); - } else if (IsInCUDA() && - !(boost::get(place) == gpu_.place_)) { - CopyCUDADataToAnotherPlace(place); - // Still dirty - } else { - // Dirty && DataInCUDA && Device is same - // Do nothing - } - } else { - if (!IsInCUDA()) { - // Even data is not dirty. However, data is not in CUDA. Copy data. - CopyCPUDataToCUDA(place); - SetFlag(kDataInCUDA); - } else if (!(boost::get(place) == gpu_.place_)) { - CopyCUDADataToAnotherPlace(place); - } else { - // Not Dirty && DataInCUDA && Device is same - // Do nothing. - } - } - } - void CopyCUDADataToAnotherPlace(const platform::Place &place) const { - details::CUDABuffer tmp(place, gpu_.size_); - const void *src = gpu_.data_; - void *dst = tmp.data_; - - memory::Copy(tmp.place_, dst, gpu_.place_, src, gpu_.size_, nullptr); - gpu_.Swap(tmp); - } - void CopyCPUDataToCUDA(const platform::Place &place) const { - void *src = cpu_.data(); - gpu_.Resize(place, cpu_.size() * sizeof(T)); - void *dst = gpu_.data_; - auto stream = static_cast( - platform::DeviceContextPool::Instance().Get(place)) - ->stream(); - memory::Copy(gpu_.place_, dst, platform::CPUPlace(), src, gpu_.size_, - stream); - } - - void ImmutableCPU() const { - if (IsDirty() && !IsInCPU()) { // If data has been changed in CUDA, or - // CPU has no data. - CopyToCPU(); - UnsetFlag(kDirty); - } - SetFlag(kDataInCPU); - } - - void UnsetFlag(int flag) const { flag_ &= ~flag; } - void SetFlag(int flag) const { flag_ |= flag; } - - bool IsDirty() const { return flag_ & kDirty; } - - bool IsInCUDA() const { return flag_ & kDataInCUDA; } - bool IsInCPU() const { return flag_ & kDataInCPU; } - - mutable std::vector cpu_; - mutable details::CUDABuffer gpu_; - mutable int flag_; - }; - - public: // Default ctor. Create empty Vector - Vector() : m_(new VectorData()) {} + Vector() { InitEmpty(); } // Fill vector with value. The vector size is `count`. - explicit Vector(size_t count, const T &value = T()) - : m_(new VectorData(count, value)) {} + explicit Vector(size_t count, const T &value = T()) { + InitEmpty(); + if (count != 0) { + resize(count); + T *ptr = begin(); + for (size_t i = 0; i < count; ++i) { + ptr[i] = value; + } + } + } // Ctor with init_list - Vector(std::initializer_list init) : m_(new VectorData(init)) {} + Vector(std::initializer_list init) { + if (init.size() == 0) { + InitEmpty(); + } else { + InitByIter(init.size(), init.begin(), init.end()); + } + } // implicit cast from std::vector. template - Vector(const std::vector &dat) : m_(new VectorData(dat)) { // NOLINT + Vector(const std::vector &dat) { // NOLINT + if (dat.size() == 0) { + InitEmpty(); + } else { + InitByIter(dat.size(), dat.begin(), dat.end()); + } } // Copy ctor - Vector(const Vector &other) { m_ = other.m_; } + Vector(const Vector &other) { this->operator=(other); } // Copy operator Vector &operator=(const Vector &other) { - m_ = other.m_; + if (other.size() != 0) { + this->InitByIter(other.size(), other.begin(), other.end()); + } else { + InitEmpty(); + } return *this; } // Move ctor - Vector(Vector &&other) { m_ = std::move(other.m_); } + Vector(Vector &&other) { + this->size_ = other.size_; + this->flag_ = other.flag_; + if (other.cuda_vec_.memory_size()) { + this->cuda_vec_.ShareDataWith(other.cuda_vec_); + } + if (other.cpu_vec_.memory_size()) { + this->cpu_vec_.ShareDataWith(other.cpu_vec_); + } + } // CPU data access method. Mutable. - T &operator[](size_t i) { return (*m_)[i]; } + T &operator[](size_t i) { + MutableCPU(); + return const_cast(cpu_vec_.data())[i]; + } // CPU data access method. Immutable. - const T &operator[](size_t i) const { return (*m_)[i]; } + const T &operator[](size_t i) const { + ImmutableCPU(); + return cpu_vec_.data()[i]; + } // std::vector iterator methods. Based on CPU data access method - size_t size() const { return m_->size(); } + size_t size() const { return size_; } - iterator begin() { return m_->begin(); } + T *begin() { return capacity() == 0 ? &EmptyDummy() : &this->operator[](0); } - iterator end() { return m_->end(); } + T *end() { + return capacity() == 0 ? &EmptyDummy() : &this->operator[](size()); + } - T &front() { return m_->front(); } + T &front() { return *begin(); } - T &back() { return m_->back(); } + T &back() { + auto it = end(); + --it; + return *it; + } - const_iterator begin() const { return m_->begin(); } + const T *begin() const { + return capacity() == 0 ? &EmptyDummy() : &this->operator[](0); + } - const_iterator end() const { return m_->end(); } + const T *end() const { + return capacity() == 0 ? &EmptyDummy() : &this->operator[](size()); + } - const_iterator cbegin() const { return begin(); } + const T *cbegin() const { return begin(); } - const_iterator cend() const { return end(); } + const T *cend() const { return end(); } - const T &back() const { return m_->back(); } + const T &back() const { + auto it = end(); + --it; + return *it; + } - T *data() { return m_->data(); } + T *data() { return begin(); } - const T *data() const { return m_->data(); } + const T *data() const { return begin(); } - const T &front() const { return m_->front(); } + const T &front() const { return *begin(); } // end of std::vector iterator methods // assign this from iterator. // NOTE: the iterator must support `end-begin` template void assign(Iter begin, Iter end) { - m_->assign(begin, end); + InitByIter(end - begin, begin, end); } // push_back. If the previous capacity is not enough, the memory will // double. - void push_back(T elem) { m_->push_back(elem); } + void push_back(T elem) { + if (size_ + 1 > capacity()) { + reserve((size_ + 1) << 1); + } + *end() = elem; + ++size_; + } // extend a vector by iterator. // NOTE: the iterator must support end-begin template void Extend(It begin, It end) { - m_->Extend(begin, end); + size_t pre_size = size_; + resize(pre_size + (end - begin)); + T *ptr = this->begin() + pre_size; + for (; begin < end; ++begin, ++ptr) { + *ptr = *begin; + } } // resize the vector void resize(size_t size) { - if (m_.Data().size() != size) { - m_->resize(size); + if (size + 1 <= capacity()) { + size_ = size; + } else { + MutableCPU(); + Tensor cpu_tensor; + platform::Place cpu = platform::CPUPlace(); + T *ptr = cpu_tensor.mutable_data( + framework::make_ddim({static_cast(size)}), cpu); + const T *old_ptr = + cpu_vec_.memory_size() == 0 ? nullptr : cpu_vec_.data(); + if (old_ptr != nullptr) { + std::copy(old_ptr, old_ptr + size_, ptr); + } + size_ = size; + cpu_vec_.ShareDataWith(cpu_tensor); } } // get cuda ptr. immutable const T *CUDAData(platform::Place place) const { - return m_.Data().CUDAData(place); + PADDLE_ENFORCE(platform::is_gpu_place(place), + "CUDA Data must on CUDA place"); + ImmutableCUDA(place); + return cuda_vec_.data(); } // get cuda ptr. mutable T *CUDAMutableData(platform::Place place) { - return m_->CUDAMutableData(place); + const T *ptr = CUDAData(place); + flag_ = kDirty | kDataInCUDA; + return const_cast(ptr); } // clear - void clear() { m_->clear(); } + void clear() { + size_ = 0; + flag_ = kDirty | kDataInCPU; + } - size_t capacity() const { return m_->capacity(); } + size_t capacity() const { + return cpu_vec_.memory_size() / SizeOfType(typeid(T)); + } // reserve data - void reserve(size_t size) { m_->reserve(size); } + void reserve(size_t size) { + size_t pre_size = size_; + resize(size); + resize(pre_size); + } // the unify method to access CPU or CUDA data. immutable. const T *Data(platform::Place place) const { @@ -445,7 +248,12 @@ class Vector { } // implicit cast operator. Vector can be cast to std::vector implicitly. - operator std::vector() const { return *m_; } + operator std::vector() const { + std::vector result; + result.resize(size()); + std::copy(begin(), end(), result.begin()); + return result; + } bool operator==(const Vector &other) const { if (size() != other.size()) return false; @@ -459,11 +267,118 @@ class Vector { return true; } - const void *Handle() const { return &m_.Data(); } - private: - // Vector is an COW object. - details::COWPtr m_; + void InitEmpty() { + size_ = 0; + flag_ = kDataInCPU; + } + + template + void InitByIter(size_t size, Iter begin, Iter end) { + platform::Place cpu = platform::CPUPlace(); + T *ptr = this->cpu_vec_.template mutable_data( + framework::make_ddim({static_cast(size)}), cpu); + for (size_t i = 0; i < size; ++i) { + *ptr++ = *begin++; + } + flag_ = kDataInCPU | kDirty; + size_ = size; + } + + enum DataFlag { + kDataInCPU = 0x01, + kDataInCUDA = 0x02, + // kDirty means the data has been changed in one device. + kDirty = 0x10 + }; + + void CopyToCPU() const { + // COPY GPU Data To CPU + TensorCopy(cuda_vec_, platform::CPUPlace(), &cpu_vec_); + WaitPlace(cuda_vec_.place()); + } + + void MutableCPU() { + if (IsInCUDA() && IsDirty()) { + CopyToCPU(); + } + flag_ = kDirty | kDataInCPU; + } + + void ImmutableCUDA(platform::Place place) const { + if (IsDirty()) { + if (IsInCPU()) { + TensorCopy(cpu_vec_, boost::get(place), + &cuda_vec_); + WaitPlace(place); + UnsetFlag(kDirty); + SetFlag(kDataInCUDA); + } else if (IsInCUDA() && !(place == cuda_vec_.place())) { + framework::Tensor tmp; + TensorCopy(cuda_vec_, boost::get(place), &tmp); + WaitPlace(cuda_vec_.place()); + cuda_vec_.ShareDataWith(tmp); + // Still dirty + } else { + // Dirty && DataInCUDA && Device is same + // Do nothing + } + } else { + if (!IsInCUDA()) { + // Even data is not dirty. However, data is not in CUDA. Copy data. + TensorCopy(cpu_vec_, boost::get(place), + &cuda_vec_); + WaitPlace(place); + SetFlag(kDataInCUDA); + } else if (!(place == cuda_vec_.place())) { + framework::Tensor tmp; + WaitPlace(cuda_vec_.place()); + TensorCopy(cuda_vec_, boost::get(place), &tmp); + WaitPlace(cuda_vec_.place()); + WaitPlace(place); + cuda_vec_.ShareDataWith(tmp); + } else { + // Not Dirty && DataInCUDA && Device is same + // Do nothing. + } + } + } + + void ImmutableCPU() const { + if (IsDirty() && + !IsInCPU()) { // If data has been changed in CUDA, or CPU has no data. + CopyToCPU(); + UnsetFlag(kDirty); + } + SetFlag(kDataInCPU); + } + + void UnsetFlag(int flag) const { flag_ &= ~flag; } + void SetFlag(int flag) const { flag_ |= flag; } + + bool IsDirty() const { return flag_ & kDirty; } + + bool IsInCUDA() const { return flag_ & kDataInCUDA; } + + bool IsInCPU() const { return flag_ & kDataInCPU; } + + static void WaitPlace(const platform::Place place) { + if (platform::is_gpu_place(place)) { + platform::DeviceContextPool::Instance() + .Get(boost::get(place)) + ->Wait(); + } + } + + static T &EmptyDummy() { + static T dummy = T(); + return dummy; + } + + mutable int flag_; + mutable Tensor cpu_vec_; + mutable Tensor cuda_vec_; + size_t size_; }; #else // PADDLE_WITH_CUDA diff --git a/paddle/fluid/operators/adam_op.h b/paddle/fluid/operators/adam_op.h index 5b27068c9e805146b8bce03f4f676ef0d4d16c53..4cb1f3a80e95bdda79e6451dc3cc87e899b11779 100644 --- a/paddle/fluid/operators/adam_op.h +++ b/paddle/fluid/operators/adam_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include // for sqrt in CPU and CUDA #include +#include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detail/safe_ref.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" @@ -306,26 +307,43 @@ class AdamOpKernel : public framework::OpKernel { VLOG(3) << "grad row size is 0!!"; return; } - // merge duplicated rows if any. - // The rows of grad_merge have been sorted inside MergeAdd functor - scatter::MergeAdd merge_func; - auto& grad_merge = *(ctx.scope() - .NewScope() - .Var("sparse_adam_grad_merge") - ->GetMutable()); - merge_func(ctx.template device_context(), grad, - &grad_merge); + + std::vector cpu_rows(grad.rows().begin(), grad.rows().end()); + bool is_strict_sorted = true; + for (size_t i = 1; i < cpu_rows.size(); ++i) { + if (cpu_rows[i - 1] >= cpu_rows[i]) { + is_strict_sorted = false; + break; + } + } + + const framework::SelectedRows* grad_merge_ptr; + if (is_strict_sorted) { + grad_merge_ptr = &grad; + } else { + // merge duplicated rows if any. + // The rows of grad_merge have been sorted inside MergeAdd functor + scatter::MergeAdd merge_func; + auto* grad_merge_var = const_cast(ctx.scope()) + .Var() + ->GetMutable(); + merge_func(ctx.template device_context(), grad, + grad_merge_var); + grad_merge_ptr = grad_merge_var; + } + + auto& grad_merge = *grad_merge_ptr; auto& grad_tensor = grad_merge.value(); const T* grad_data = grad_tensor.template data(); - int64_t* rows = nullptr; -// When compiled without CUDA, the CUDAMutableData() interface should not be + const int64_t* rows = nullptr; +// When compiled without CUDA, the CUDAData() interface should not be // provided. #if defined(PADDLE_WITH_CUDA) if (platform::is_gpu_place(ctx.GetPlace())) { - rows = grad_merge.mutable_rows()->CUDAMutableData(ctx.GetPlace()); + rows = grad_merge.rows().CUDAData(ctx.GetPlace()); } else { #endif - rows = grad_merge.mutable_rows()->data(); + rows = grad_merge.rows().data(); #if defined(PADDLE_WITH_CUDA) } diff --git a/paddle/fluid/operators/detection_map_op.h b/paddle/fluid/operators/detection_map_op.h index dd5d138a1e979826d59c4731920379b030e3b492..dd1ab85fd8d0c8170afcd9dd2a49ee55c41dc8be 100644 --- a/paddle/fluid/operators/detection_map_op.h +++ b/paddle/fluid/operators/detection_map_op.h @@ -76,8 +76,8 @@ class DetectionMAPOpKernel : public framework::OpKernel { auto ap_type = GetAPType(ctx.Attr("ap_type")); int class_num = ctx.Attr("class_num"); - auto& label_lod = in_label->lod(); - auto& detect_lod = in_detect->lod(); + auto label_lod = in_label->lod(); + auto detect_lod = in_detect->lod(); PADDLE_ENFORCE_EQ(label_lod.size(), 1UL, "Only support one level sequence now."); PADDLE_ENFORCE_EQ(label_lod[0].size(), detect_lod[0].size(), @@ -166,11 +166,11 @@ class DetectionMAPOpKernel : public framework::OpKernel { auto labels = framework::EigenTensor::From(input_label); auto detect = framework::EigenTensor::From(input_detect); - auto& label_lod = input_label.lod(); - auto& detect_lod = input_detect.lod(); + auto label_lod = input_label.lod(); + auto detect_lod = input_detect.lod(); int batch_size = label_lod[0].size() - 1; - auto& label_index = label_lod[0]; + auto label_index = label_lod[0]; for (int n = 0; n < batch_size; ++n) { std::map> boxes; @@ -274,6 +274,7 @@ class DetectionMAPOpKernel : public framework::OpKernel { output_true_pos->set_lod(true_pos_lod); output_false_pos->set_lod(false_pos_lod); + return; } void GetInputPos(const framework::Tensor& input_pos_count, @@ -291,7 +292,7 @@ class DetectionMAPOpKernel : public framework::OpKernel { auto SetData = [](const framework::LoDTensor& pos_tensor, std::map>>& pos) { const T* pos_data = pos_tensor.data(); - auto& pos_data_lod = pos_tensor.lod()[0]; + auto pos_data_lod = pos_tensor.lod()[0]; for (size_t i = 0; i < pos_data_lod.size() - 1; ++i) { for (size_t j = pos_data_lod[i]; j < pos_data_lod[i + 1]; ++j) { T score = pos_data[j * 2]; @@ -316,23 +317,20 @@ class DetectionMAPOpKernel : public framework::OpKernel { std::map>>* false_pos) const { int batch_size = gt_boxes.size(); for (int n = 0; n < batch_size; ++n) { - auto& image_gt_boxes = gt_boxes[n]; - for (auto& image_gt_box : image_gt_boxes) { + auto image_gt_boxes = gt_boxes[n]; + for (auto it = image_gt_boxes.begin(); it != image_gt_boxes.end(); ++it) { size_t count = 0; - auto& labeled_bboxes = image_gt_box.second; + auto labeled_bboxes = it->second; if (evaluate_difficult) { count = labeled_bboxes.size(); } else { - for (auto& box : labeled_bboxes) { - if (!box.is_difficult) { - ++count; - } - } + for (size_t i = 0; i < labeled_bboxes.size(); ++i) + if (!(labeled_bboxes[i].is_difficult)) ++count; } if (count == 0) { continue; } - int label = image_gt_box.first; + int label = it->first; if (label_pos_count->find(label) == label_pos_count->end()) { (*label_pos_count)[label] = count; } else { diff --git a/paddle/fluid/operators/extract_rows_op.cc b/paddle/fluid/operators/extract_rows_op.cc index 3acae3bcdf4a509ab6e7e19f21c4b2ec4d72b7d7..9a297d03cfb041e584159a5fc5ba214f8ac404b4 100644 --- a/paddle/fluid/operators/extract_rows_op.cc +++ b/paddle/fluid/operators/extract_rows_op.cc @@ -50,7 +50,7 @@ class ExtractRowsOp : public framework::OperatorBase { auto &in = scope.FindVar(Input("X"))->Get(); auto out = scope.FindVar(Output("Out"))->GetMutable(); - auto &in_rows = in.rows(); + auto in_rows = in.rows(); auto out_dim = framework::make_ddim( std::vector{static_cast(in_rows.size()), 1}); auto dst_ptr = out->mutable_data(out_dim, in.place()); diff --git a/paddle/fluid/operators/math/selected_rows_functor.cu b/paddle/fluid/operators/math/selected_rows_functor.cu index ba8eccf82042b679f69a32f9d053f05ac8fb9a99..b27880c232a51d32777569cf9ac67656ce02f232 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cu +++ b/paddle/fluid/operators/math/selected_rows_functor.cu @@ -60,9 +60,11 @@ struct SelectedRowsAdd { auto out_place = context.GetPlace(); PADDLE_ENFORCE(platform::is_gpu_place(out_place)); - memory::Copy(boost::get(out_place), out_data, - boost::get(in1_place), in1_data, - in1_value.numel() * sizeof(T), context.stream()); + memory::Copy( + boost::get(out_place), out_data, + boost::get(in1_place), in1_data, + in1_value.numel() * sizeof(T), + reinterpret_cast(context).stream()); auto* in2_data = in2_value.data(); memory::Copy(boost::get(out_place), @@ -146,7 +148,7 @@ struct SelectedRowsAddTo { auto in1_height = input1.height(); PADDLE_ENFORCE_EQ(in1_height, input2->height()); - auto& in1_rows = input1.rows(); + framework::Vector in1_rows(input1.rows()); auto& in2_rows = *(input2->mutable_rows()); auto& in1_value = input1.value(); diff --git a/paddle/fluid/operators/sum_op.h b/paddle/fluid/operators/sum_op.h index 2c4c2411259e9b7bfa223e4d65823ce4610596d0..6dffe527c1072ee97fcde1725bfc1a47ed1ad74a 100644 --- a/paddle/fluid/operators/sum_op.h +++ b/paddle/fluid/operators/sum_op.h @@ -123,6 +123,7 @@ class SumKernel : public framework::OpKernel { out_value->Resize(framework::make_ddim(in_dim)); out_value->mutable_data(context.GetPlace()); + // if all the input sparse vars are empty, no need to // merge these vars. if (first_dim == 0UL) { diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index 7a7a0078a557c47492a4396897aafabe6c9c5dcb..a26b8df5a240be8340597b9627866c323fa98a2d 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -74,7 +74,7 @@ class Initializer(object): directly, but need to use one of its implementations. """ - def __init_(self): + def __init__(self): pass def __call__(self, param, block): @@ -293,7 +293,7 @@ class TruncatedNormalInitializer(Initializer): assert loc is not None assert scale is not None assert seed is not None - super(NormalInitializer, self).__init__() + super(TruncatedNormalInitializer, self).__init__() self._mean = loc self._std_dev = scale self._seed = seed diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index e99e8afd877a8ea342aac0ce30a9750d3d30942d..d558f2436441c6be8728ced6670b04cc367634a2 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -107,6 +107,12 @@ __all__ = [ 'log', 'crop', 'rank_loss', + 'elu', + 'relu6', + 'pow', + 'stanh', + 'hard_sigmoid', + 'swish', 'prelu', 'brelu', 'leaky_relu', @@ -5898,6 +5904,148 @@ def pad2d(input, return out +@templatedoc() +def elu(x, alpha=1.0, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + alpha(${alpha_type}|1.0): ${alpha_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('elu', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='elu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'alpha': alpha}) + return out + + +@templatedoc() +def relu6(x, threshold=6.0, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + threshold(${threshold_type}|6.0): ${threshold_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('relu6', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='relu6', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'threshold': threshold}) + return out + + +@templatedoc() +def pow(x, factor=1.0, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + factor(${factor_type}|1.0): ${factor_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('pow', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='pow', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'factor': factor}) + return out + + +@templatedoc() +def stanh(x, scale_a=2.0 / 3.0, scale_b=1.7159, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + scale_a(${scale_a_type}|2.0 / 3.0): ${scale_a_comment} + scale_b(${scale_b_type}|1.7159): ${scale_b_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('stanh', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='stanh', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'scale_a': scale_a, + 'scale_b': scale_b}) + return out + + +@templatedoc() +def hard_sigmoid(x, slope=0.2, offset=0.5, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + slope(${slope_type}|0.2): ${slope_comment} + offset(${offset_type}|0.5): ${offset_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('hard_sigmoid', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='hard_sigmoid', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'slope': slope, + 'offset': offset}) + return out + + +@templatedoc() +def swish(x, beta=1.0, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + beta(${beta_type}|1.0): ${beta_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('swish', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='swish', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'slope': beta}) + return out + + def prelu(x, mode, param_attr=None, name=None): """ Equation: diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 6c9cbe740e8fdf8fb0a042943ce0b2008efdfc42..4d942170f21d8d3944c47adece85c8888abe5309 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -17,12 +17,6 @@ from .layer_function_generator import generate_layer_fn, generate_layer_fn_noatt __activations__ = [ 'softshrink', - 'elu', - 'relu6', - 'pow', - 'stanh', - 'hard_sigmoid', - 'swish', ] __activations_noattr__ = [ diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index e97643cddef22465436051a41ef4b825e9634d23..b5549c507ed753f4504afd655be59b444164e6f3 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -345,7 +345,7 @@ class OpTest(unittest.TestCase): actual_t, expect_t, atol=atol, equal_nan=equal_nan), "Output (" + out_name + ") has diff at " + str(place) + "\nExpect " + str(expect_t) + "\n" + "But Got" + - str(actual_t) + " in class " + self.__class__.__name__) + str(actual_t)) if isinstance(expect, tuple): self.assertListEqual(actual.recursive_sequence_lengths(), expect[1], "Output (" + out_name + diff --git a/python/paddle/fluid/tests/unittests/test_detection_map_op.py b/python/paddle/fluid/tests/unittests/test_detection_map_op.py index 0c5343a97d5ef0f97fc6b144dfc82174eacb8573..f6eb8f2c6d8b94f92e24ff789c91efb53a645a46 100644 --- a/python/paddle/fluid/tests/unittests/test_detection_map_op.py +++ b/python/paddle/fluid/tests/unittests/test_detection_map_op.py @@ -20,7 +20,6 @@ import six import sys import collections import math -import paddle.fluid as fluid from op_test import OpTest @@ -33,7 +32,7 @@ class TestDetectionMAPOp(OpTest): self.detect = np.array(self.detect).astype('float32') self.mAP = np.array(self.mAP).astype('float32') - if len(self.class_pos_count) > 0: + if (len(self.class_pos_count) > 0): self.class_pos_count = np.array(self.class_pos_count).astype( 'int32') self.true_pos = np.array(self.true_pos).astype('float32') @@ -274,7 +273,7 @@ class TestDetectionMAPOp11Point(TestDetectionMAPOp): class TestDetectionMAPOpMultiBatch(TestDetectionMAPOp): def init_test_case(self): super(TestDetectionMAPOpMultiBatch, self).init_test_case() - self.class_pos_count = [0, 2, 1, 0] + self.class_pos_count = [0, 2, 1] self.true_pos_lod = [[0, 3, 2]] self.true_pos = [[0.7, 1.], [0.3, 0.], [0.2, 1.], [0.8, 0.], [0.1, 1.]] self.false_pos_lod = [[0, 3, 2]]