diff --git a/libgpos/CMakeLists.txt b/libgpos/CMakeLists.txt index 88321c57b66fd3a95ebda5e805b1f239f0df7176..4669c84d2631941997f9085dffbe89e4f7f278b6 100644 --- a/libgpos/CMakeLists.txt +++ b/libgpos/CMakeLists.txt @@ -44,6 +44,8 @@ add_library(gpos include/gpos/common/CHashMapIter.inl include/gpos/common/CHashSet.h include/gpos/common/CHashSet.inl + include/gpos/common/CHashSetIter.h + include/gpos/common/CHashSetIter.inl include/gpos/common/clibtypes.h include/gpos/common/CList.h include/gpos/common/CList.inl diff --git a/libgpos/include/gpos/common/CHashSet.h b/libgpos/include/gpos/common/CHashSet.h index 1c58ac2c28af2e5b2a045eed8a633d2172915629..4ed0618e14a703aaf3354138bcbd5d9ae425185c 100644 --- a/libgpos/include/gpos/common/CHashSet.h +++ b/libgpos/include/gpos/common/CHashSet.h @@ -29,6 +29,13 @@ namespace gpos { + // fwd declaration + template + class CHashSetIter; + //--------------------------------------------------------------------------- // @class: // CHashSet @@ -43,6 +50,8 @@ namespace gpos void (*pfnDestroy)(T*)> class CHashSet : public CRefCount { + // fwd declaration + friend class CHashSetIter; private: @@ -101,6 +110,13 @@ namespace gpos typedef CDynamicPtrArray DrgHashChain; DrgHashChain **m_ppdrgchain; + // array for elements + // We use CleanupNULL because the elements are owned by the hash table + typedef CDynamicPtrArray DrgElements; + DrgElements *const m_pdrgElements; + + DrgPi *const m_pdrgPiFilledBuckets; + // private copy ctor CHashSet(const CHashSet &); @@ -115,6 +131,9 @@ namespace gpos // clear elements void Clear(); + // lookup an element by its key + void Lookup(const T *pt, CHashSetElem **pphse) const; + public: // ctor diff --git a/libgpos/include/gpos/common/CHashSet.inl b/libgpos/include/gpos/common/CHashSet.inl index 1a4cea826175fe2e725b40312c9d3c98784ecb5b..a1d532dec63dbcbdf9726a276bb48f0c4a92bfea 100644 --- a/libgpos/include/gpos/common/CHashSet.inl +++ b/libgpos/include/gpos/common/CHashSet.inl @@ -1,20 +1,8 @@ -//--------------------------------------------------------------------------- // Greenplum Database -// Copyright (C) 2015 Pivotal, Inc. +// Copyright (C) 2017 Pivotal Software, Inc. // -// @filename: -// CHashSet.inl -// -// @doc: -// Inline implementation of hash set template -// -// @owner: -// solimm1 -// -// @test: -// -// -//--------------------------------------------------------------------------- +// Inline implementation of hash set template + #ifndef GPOS_CHashSet_INL #define GPOS_CHashSet_INL @@ -23,15 +11,8 @@ namespace gpos { - //--------------------------------------------------------------------------- - // @class: - // CHashSet::CHashSetElem::CHashSetElem - // - // @doc: - // ctor - // - //--------------------------------------------------------------------------- - template @@ -48,15 +29,8 @@ namespace gpos } - //--------------------------------------------------------------------------- - // @class: - // CHashSet::CHashSetElem::~CHashSetElem - // - // @doc: - // dtor - // - //--------------------------------------------------------------------------- - template @@ -71,15 +45,8 @@ namespace gpos } - //--------------------------------------------------------------------------- - // @class: - // CHashSet::CHashSet - // - // @doc: - // ctor - // - //--------------------------------------------------------------------------- - template @@ -91,24 +58,19 @@ namespace gpos : m_pmp(pmp), m_ulSize(ulSize), - m_ulEntries(0) + m_ulEntries(0), + m_ppdrgchain(GPOS_NEW_ARRAY(m_pmp, DrgHashChain*, m_ulSize)), + m_pdrgElements(GPOS_NEW(m_pmp) DrgElements(m_pmp)), + m_pdrgPiFilledBuckets(GPOS_NEW(pmp) DrgPi(pmp)) { GPOS_ASSERT(ulSize > 0); - m_ppdrgchain = GPOS_NEW_ARRAY(m_pmp, DrgHashChain*, m_ulSize); (void) clib::PvMemSet(m_ppdrgchain, 0, m_ulSize * sizeof(DrgHashChain*)); } - //--------------------------------------------------------------------------- - // @class: - // CHashSet::~CHashSet - // - // @doc: - // dtor - // - //--------------------------------------------------------------------------- - template @@ -118,30 +80,26 @@ namespace gpos Clear(); GPOS_DELETE_ARRAY(m_ppdrgchain); + m_pdrgElements->Release(); + m_pdrgPiFilledBuckets->Release(); } - //--------------------------------------------------------------------------- - // @class: - // CHashSet::Clear - // - // @doc: - // Destroy all hash chains; delete elements as per destroy function - // - //--------------------------------------------------------------------------- - template void CHashSet::Clear() { - for (ULONG ul = 0; ul < m_ulSize; ul++) + for (ULONG i = 0; i < m_pdrgPiFilledBuckets->UlLength(); i++) { // release each hash chain - CRefCount::SafeRelease(m_ppdrgchain[ul]); + m_ppdrgchain[*(*m_pdrgPiFilledBuckets)[i]]->Release(); } m_ulEntries = 0; + m_pdrgPiFilledBuckets->Clear(); } @@ -173,25 +131,22 @@ namespace gpos if (NULL == *ppdrgchain) { *ppdrgchain = GPOS_NEW(m_pmp) DrgHashChain(m_pmp); + INT iBucket = pfnHash(pt) % m_ulSize; + m_pdrgPiFilledBuckets->Append(GPOS_NEW(m_pmp) INT(iBucket)); } CHashSetElem *phse = GPOS_NEW(m_pmp) CHashSetElem(pt, true /*fOwn*/); (*ppdrgchain)->Append(phse); m_ulEntries++; + m_pdrgElements->Append(pt); + return true; } - //--------------------------------------------------------------------------- - // @class: - // CHashSet::FExists - // - // @doc: - // Look up element by given key - // - //--------------------------------------------------------------------------- - template @@ -213,6 +168,34 @@ namespace gpos return false; } + + // Look up element + template + void + CHashSet::Lookup + ( + const T *pt, + CHashSetElem **pphse // output : pointer to found set entry + ) + const + { + GPOS_ASSERT(NULL != pphse); + + CHashSetElem hse(const_cast(pt), false /*fOwn*/); + CHashSetElem *phse = NULL; + DrgHashChain **ppdrgchain = PpdrgChain(pt); + if (NULL != *ppdrgchain) + { + phse = (*ppdrgchain)->PtLookup(&hse); + GPOS_ASSERT_IMP(NULL != phse, *phse == hse); + } + + *pphse = phse; + } + } diff --git a/libgpos/include/gpos/common/CHashSetIter.h b/libgpos/include/gpos/common/CHashSetIter.h new file mode 100644 index 0000000000000000000000000000000000000000..aa1c376f69b4a5d5aab6292e3ae3383d0180f6c7 --- /dev/null +++ b/libgpos/include/gpos/common/CHashSetIter.h @@ -0,0 +1,74 @@ +// Greenplum Database +// Copyright (C) 2017 Pivotal Software, Inc +// +// Hash set iterator + +#ifndef GPOS_CHashSetIter_H +#define GPOS_CHashSetIter_H + +#include "gpos/base.h" +#include "gpos/common/CStackObject.h" +#include "gpos/common/CHashSet.h" +#include "gpos/common/CDynamicPtrArray.h" + +namespace gpos +{ + + // Hash set iterator + template + class CHashSetIter : public CStackObject + { + + // short hand for hashset type + typedef CHashSet TSet; + + private: + + // set to iterate + const TSet *m_pts; + + // current hashchain + ULONG m_ulChain; + + // current element + ULONG m_ulElement; + + // is initialized? + BOOL m_fInit; + + // private copy ctor + CHashSetIter(const CHashSetIter &); + + // method to return the current element + const typename TSet::CHashSetElem *Phse() const; + + public: + + // ctor + CHashSetIter (TSet *); + + // dtor + virtual + ~CHashSetIter () + {} + + // advance iterator to next element + BOOL FAdvance(); + + // current element + const T *Pt() const; + + }; // class CHashSetIter + +} + +// inline'd functions +#include "CHashSetIter.inl" + +#endif // !GPOS_CHashSetIter_H + +// EOF + diff --git a/libgpos/include/gpos/common/CHashSetIter.inl b/libgpos/include/gpos/common/CHashSetIter.inl new file mode 100644 index 0000000000000000000000000000000000000000..9b76870e4424fe058a42198eb4660ac7b11e3b60 --- /dev/null +++ b/libgpos/include/gpos/common/CHashSetIter.inl @@ -0,0 +1,82 @@ +// Greenplum Database +// Copyright (C) 2017 Pivotal Software, Inc +// +// Inline implementation of hash set iterator template + +#ifndef GPOS_CHashSetIter_INL +#define GPOS_CHashSetIter_INL + +namespace gpos +{ + // ctor + template + CHashSetIter::CHashSetIter + ( + TSet *pts + ) + : + m_pts(pts), + m_ulChain(0), + m_ulElement(0) + { + GPOS_ASSERT(NULL != pts); + } + + + // Get the next existent hash chain + template + BOOL + CHashSetIter::FAdvance() + { + if (m_ulElement < m_pts->m_pdrgElements->UlLength()) + { + m_ulElement++; + return true; + } + + return false; + } + + // Look up current element + template + const typename CHashSet::CHashSetElem * + CHashSetIter::Phse() const + { + typename TSet::CHashSetElem *phse = NULL; + T *t = (*(m_pts->m_pdrgElements))[m_ulElement-1]; + m_pts->Lookup(t, &phse); + + return phse; + } + + + // Look up current element + template + const T* + CHashSetIter::Pt() const + { + const typename TSet::CHashSetElem *phse = Phse(); + if (NULL != phse) + { + return phse->Pt(); + } + return NULL; + } +} + + +#endif // !GPOS_CHashSetIter_INL + +// EOF diff --git a/libgpos/server/CMakeLists.txt b/libgpos/server/CMakeLists.txt index 68acdcc2859eb03fce45af4351bc12b21b84ba1e..049d15d7fbc2f2a4f4f36960e9b16863a11783bc 100644 --- a/libgpos/server/CMakeLists.txt +++ b/libgpos/server/CMakeLists.txt @@ -40,6 +40,7 @@ add_executable(gpos_test src/unittest/gpos/common/CEnumSetTest.cpp src/unittest/gpos/common/CHashMapIterTest.cpp src/unittest/gpos/common/CHashMapTest.cpp + src/unittest/gpos/common/CHashSetIterTest.cpp src/unittest/gpos/common/CHashSetTest.cpp src/unittest/gpos/common/CListTest.cpp src/unittest/gpos/common/CRefCountTest.cpp @@ -109,6 +110,7 @@ add_gpos_test(CDoubleTest) add_gpos_test(CHashMapTest) add_gpos_test(CHashMapIterTest) add_gpos_test(CHashSetTest) +add_gpos_test(CHashSetIterTest) add_gpos_test(CRefCountTest) add_gpos_test(CListTest) add_gpos_test(CStackTest) diff --git a/libgpos/server/include/unittest/gpos/common/CHashSetIterTest.h b/libgpos/server/include/unittest/gpos/common/CHashSetIterTest.h new file mode 100644 index 0000000000000000000000000000000000000000..f25d397188bd48ad72d49e8e66443fb188ac9a19 --- /dev/null +++ b/libgpos/server/include/unittest/gpos/common/CHashSetIterTest.h @@ -0,0 +1,27 @@ +// Greenplum Database +// Copyright (C) 2017 Pivotal Software, Inc + +#ifndef GPOS_CHashSetIterTest_H +#define GPOS_CHashSetIterTest_H + +#include "gpos/base.h" + +namespace gpos +{ + + // Static unit tests + class CHashSetIterTest + { + public: + + // unittests + static GPOS_RESULT EresUnittest(); + static GPOS_RESULT EresUnittest_Basic(); + + }; // class CHashSetIterTest +} + +#endif // !GPOS_CHashSetIterTest_H + +// EOF + diff --git a/libgpos/server/src/startup/main.cpp b/libgpos/server/src/startup/main.cpp index 01cabde59fdaac6b6f33f61f5cb12610241ab64c..5ca8ca1031158563a7206da7345113d8f1107b5d 100644 --- a/libgpos/server/src/startup/main.cpp +++ b/libgpos/server/src/startup/main.cpp @@ -32,6 +32,7 @@ #include "unittest/gpos/common/CHashMapTest.h" #include "unittest/gpos/common/CHashMapIterTest.h" #include "unittest/gpos/common/CHashSetTest.h" +#include "unittest/gpos/common/CHashSetIterTest.h" #include "unittest/gpos/common/CListTest.h" #include "unittest/gpos/common/CRefCountTest.h" #include "unittest/gpos/common/CStackTest.h" @@ -91,6 +92,7 @@ static gpos::CUnittest rgut[] = GPOS_UNITTEST_STD(CHashMapTest), GPOS_UNITTEST_STD(CHashMapIterTest), GPOS_UNITTEST_STD(CHashSetTest), + GPOS_UNITTEST_STD(CHashSetIterTest), GPOS_UNITTEST_STD(CRefCountTest), GPOS_UNITTEST_STD(CListTest), GPOS_UNITTEST_STD(CStackTest), diff --git a/libgpos/server/src/unittest/gpos/common/CHashSetIterTest.cpp b/libgpos/server/src/unittest/gpos/common/CHashSetIterTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..69d4380fba2176d35968fb20cf9270dd993451b9 --- /dev/null +++ b/libgpos/server/src/unittest/gpos/common/CHashSetIterTest.cpp @@ -0,0 +1,81 @@ +//--------------------------------------------------------------------------- +// Greenplum Database +// Copyright (C) 2017 Pivotal Software, Inc + +#include "gpos/base.h" +#include "gpos/common/CHashSetIter.h" +#include "gpos/memory/CAutoMemoryPool.h" +#include "gpos/test/CUnittest.h" + +#include "unittest/gpos/common/CHashSetIterTest.h" + +using namespace gpos; + +// Unittest for basic hash set iterator +GPOS_RESULT +CHashSetIterTest::EresUnittest() +{ + CUnittest rgut[] = + { + GPOS_UNITTEST_FUNC(CHashSetIterTest::EresUnittest_Basic), + }; + + return CUnittest::EresExecute(rgut, GPOS_ARRAY_SIZE(rgut)); +} + + +// Basic iterator test +GPOS_RESULT +CHashSetIterTest::EresUnittest_Basic() +{ + // create memory pool + CAutoMemoryPool amp; + IMemoryPool *pmp = amp.Pmp(); + + // test data + ULONG_PTR rgul[] = {1,2,3,4,5,6,7,8,9}; + const ULONG ulCnt = GPOS_ARRAY_SIZE(rgul); + + typedef CHashSet, gpos::FEqual, CleanupNULL > Set; + + typedef CHashSetIter, gpos::FEqual, CleanupNULL > SetIter; + + + // using N - 2 slots guarantees collisions + Set *ps = GPOS_NEW(pmp) Set(pmp, ulCnt - 2); + +#ifdef GPOS_DEBUG + + // iteration over empty set + SetIter siEmpty(ps); + GPOS_ASSERT(!siEmpty.FAdvance()); + +#endif // GPOS_DEBUG + + // load set and iterate over it after each step + for (ULONG ul = 0; ul < ulCnt; ++ul) + { + (void) ps->FInsert(&rgul[ul]); + + // checksum over elements + ULONG_PTR ulpChkSumElement = 0; + + // iterate over full set + SetIter si(ps); + while (si.FAdvance()) + { + ulpChkSumElement += *(si.Pt()); + } + + // use Gauss's formula for checksum-ing + GPOS_ASSERT(ulpChkSumElement == ((ul + 2) * (ul + 1)) / 2); + } + + ps->Release(); + + return GPOS_OK; +} + + +// EOF +