提交 cd856d3d 编写于 作者: O Omer Arap

New CHashSet with matching iterator

This commit provides better implementation of `CHashSet`. The improvements
follows the same logic as `CHashMap` and provided a new iterator called
`CHashSetIter`. Even though `CHashSet` exists in the code base, there
was no real usage in the Orca codebase.
上级 62b4f337
......@@ -44,6 +44,8 @@ add_library(gpos
include/gpos/common/CHashMapIter.inl
include/gpos/common/CHashSet.h
include/gpos/common/CHashSet.inl
include/gpos/common/CHashSetIter.h
include/gpos/common/CHashSetIter.inl
include/gpos/common/clibtypes.h
include/gpos/common/CList.h
include/gpos/common/CList.inl
......
......@@ -29,6 +29,13 @@
namespace gpos
{
// fwd declaration
template <class T,
ULONG (*pfnHash)(const T*),
BOOL (*pfnEq)(const T*, const T*),
void (*pfnDestroy)(T*)>
class CHashSetIter;
//---------------------------------------------------------------------------
// @class:
// CHashSet
......@@ -43,6 +50,8 @@ namespace gpos
void (*pfnDestroy)(T*)>
class CHashSet : public CRefCount
{
// fwd declaration
friend class CHashSetIter<T, pfnHash, pfnEq, pfnDestroy>;
private:
......@@ -101,6 +110,13 @@ namespace gpos
typedef CDynamicPtrArray<CHashSetElem, CleanupDelete> DrgHashChain;
DrgHashChain **m_ppdrgchain;
// array for elements
// We use CleanupNULL because the elements are owned by the hash table
typedef CDynamicPtrArray<T, CleanupNULL> DrgElements;
DrgElements *const m_pdrgElements;
DrgPi *const m_pdrgPiFilledBuckets;
// private copy ctor
CHashSet(const CHashSet<T, pfnHash, pfnEq, pfnDestroy> &);
......@@ -115,6 +131,9 @@ namespace gpos
// clear elements
void Clear();
// lookup an element by its key
void Lookup(const T *pt, CHashSetElem **pphse) const;
public:
// ctor
......
//---------------------------------------------------------------------------
// Greenplum Database
// Copyright (C) 2015 Pivotal, Inc.
// Copyright (C) 2017 Pivotal Software, Inc.
//
// @filename:
// CHashSet.inl
//
// @doc:
// Inline implementation of hash set template
//
// @owner:
// solimm1
//
// @test:
//
//
//---------------------------------------------------------------------------
// Inline implementation of hash set template
#ifndef GPOS_CHashSet_INL
#define GPOS_CHashSet_INL
......@@ -23,15 +11,8 @@
namespace gpos
{
//---------------------------------------------------------------------------
// @class:
// CHashSet::CHashSetElem::CHashSetElem
//
// @doc:
// ctor
//
//---------------------------------------------------------------------------
template <class T,
// ctor
template <class T,
ULONG (*pfnHash)(const T*),
BOOL (*pfnEq)(const T*, const T*),
void (*pfnDestroy)(T*)>
......@@ -48,15 +29,8 @@ namespace gpos
}
//---------------------------------------------------------------------------
// @class:
// CHashSet::CHashSetElem::~CHashSetElem
//
// @doc:
// dtor
//
//---------------------------------------------------------------------------
template <class T,
// dtor
template <class T,
ULONG (*pfnHash)(const T*),
BOOL (*pfnEq)(const T*, const T*),
void (*pfnDestroy)(T*)>
......@@ -71,15 +45,8 @@ namespace gpos
}
//---------------------------------------------------------------------------
// @class:
// CHashSet::CHashSet
//
// @doc:
// ctor
//
//---------------------------------------------------------------------------
template <class T,
// ctor
template <class T,
ULONG (*pfnHash)(const T*),
BOOL (*pfnEq)(const T*, const T*),
void (*pfnDestroy)(T*)>
......@@ -91,24 +58,19 @@ namespace gpos
:
m_pmp(pmp),
m_ulSize(ulSize),
m_ulEntries(0)
m_ulEntries(0),
m_ppdrgchain(GPOS_NEW_ARRAY(m_pmp, DrgHashChain*, m_ulSize)),
m_pdrgElements(GPOS_NEW(m_pmp) DrgElements(m_pmp)),
m_pdrgPiFilledBuckets(GPOS_NEW(pmp) DrgPi(pmp))
{
GPOS_ASSERT(ulSize > 0);
m_ppdrgchain = GPOS_NEW_ARRAY(m_pmp, DrgHashChain*, m_ulSize);
(void) clib::PvMemSet(m_ppdrgchain, 0, m_ulSize * sizeof(DrgHashChain*));
}
//---------------------------------------------------------------------------
// @class:
// CHashSet::~CHashSet
//
// @doc:
// dtor
//
//---------------------------------------------------------------------------
template <class T,
// dtor
template <class T,
ULONG (*pfnHash)(const T*),
BOOL (*pfnEq)(const T*, const T*),
void (*pfnDestroy)(T*)>
......@@ -118,30 +80,26 @@ namespace gpos
Clear();
GPOS_DELETE_ARRAY(m_ppdrgchain);
m_pdrgElements->Release();
m_pdrgPiFilledBuckets->Release();
}
//---------------------------------------------------------------------------
// @class:
// CHashSet::Clear
//
// @doc:
// Destroy all hash chains; delete elements as per destroy function
//
//---------------------------------------------------------------------------
template <class T,
// Destroy all hash chains; delete elements as per destroy function
template <class T,
ULONG (*pfnHash)(const T*),
BOOL (*pfnEq)(const T*, const T*),
void (*pfnDestroy)(T*)>
void
CHashSet<T, pfnHash, pfnEq, pfnDestroy>::Clear()
{
for (ULONG ul = 0; ul < m_ulSize; ul++)
for (ULONG i = 0; i < m_pdrgPiFilledBuckets->UlLength(); i++)
{
// release each hash chain
CRefCount::SafeRelease(m_ppdrgchain[ul]);
m_ppdrgchain[*(*m_pdrgPiFilledBuckets)[i]]->Release();
}
m_ulEntries = 0;
m_pdrgPiFilledBuckets->Clear();
}
......@@ -173,25 +131,22 @@ namespace gpos
if (NULL == *ppdrgchain)
{
*ppdrgchain = GPOS_NEW(m_pmp) DrgHashChain(m_pmp);
INT iBucket = pfnHash(pt) % m_ulSize;
m_pdrgPiFilledBuckets->Append(GPOS_NEW(m_pmp) INT(iBucket));
}
CHashSetElem *phse = GPOS_NEW(m_pmp) CHashSetElem(pt, true /*fOwn*/);
(*ppdrgchain)->Append(phse);
m_ulEntries++;
m_pdrgElements->Append(pt);
return true;
}
//---------------------------------------------------------------------------
// @class:
// CHashSet::FExists
//
// @doc:
// Look up element by given key
//
//---------------------------------------------------------------------------
template <class T,
// Look up element by given element
template <class T,
ULONG (*pfnHash)(const T*),
BOOL (*pfnEq)(const T*, const T*),
void (*pfnDestroy)(T*)>
......@@ -213,6 +168,34 @@ namespace gpos
return false;
}
// Look up element
template <class T,
ULONG (*pfnHash)(const T*),
BOOL (*pfnEq)(const T*, const T*),
void (*pfnDestroy)(T*)>
void
CHashSet<T, pfnHash, pfnEq, pfnDestroy>::Lookup
(
const T *pt,
CHashSetElem **pphse // output : pointer to found set entry
)
const
{
GPOS_ASSERT(NULL != pphse);
CHashSetElem hse(const_cast<T*>(pt), false /*fOwn*/);
CHashSetElem *phse = NULL;
DrgHashChain **ppdrgchain = PpdrgChain(pt);
if (NULL != *ppdrgchain)
{
phse = (*ppdrgchain)->PtLookup(&hse);
GPOS_ASSERT_IMP(NULL != phse, *phse == hse);
}
*pphse = phse;
}
}
......
// Greenplum Database
// Copyright (C) 2017 Pivotal Software, Inc
//
// Hash set iterator
#ifndef GPOS_CHashSetIter_H
#define GPOS_CHashSetIter_H
#include "gpos/base.h"
#include "gpos/common/CStackObject.h"
#include "gpos/common/CHashSet.h"
#include "gpos/common/CDynamicPtrArray.h"
namespace gpos
{
// Hash set iterator
template <class T,
ULONG (*pfnHash)(const T*),
BOOL (*pfnEq)(const T*, const T*),
void (*pfnDestroy)(T*)>
class CHashSetIter : public CStackObject
{
// short hand for hashset type
typedef CHashSet<T, pfnHash, pfnEq, pfnDestroy> TSet;
private:
// set to iterate
const TSet *m_pts;
// current hashchain
ULONG m_ulChain;
// current element
ULONG m_ulElement;
// is initialized?
BOOL m_fInit;
// private copy ctor
CHashSetIter(const CHashSetIter<T, pfnHash, pfnEq, pfnDestroy> &);
// method to return the current element
const typename TSet::CHashSetElem *Phse() const;
public:
// ctor
CHashSetIter<T, pfnHash, pfnEq, pfnDestroy> (TSet *);
// dtor
virtual
~CHashSetIter<T, pfnHash, pfnEq, pfnDestroy> ()
{}
// advance iterator to next element
BOOL FAdvance();
// current element
const T *Pt() const;
}; // class CHashSetIter
}
// inline'd functions
#include "CHashSetIter.inl"
#endif // !GPOS_CHashSetIter_H
// EOF
// Greenplum Database
// Copyright (C) 2017 Pivotal Software, Inc
//
// Inline implementation of hash set iterator template
#ifndef GPOS_CHashSetIter_INL
#define GPOS_CHashSetIter_INL
namespace gpos
{
// ctor
template <class T,
ULONG (*pfnHash)(const T*),
BOOL (*pfnEq)(const T*, const T*),
void (*pfnDestroy)(T*)>
CHashSetIter<T, pfnHash, pfnEq, pfnDestroy>::CHashSetIter
(
TSet *pts
)
:
m_pts(pts),
m_ulChain(0),
m_ulElement(0)
{
GPOS_ASSERT(NULL != pts);
}
// Get the next existent hash chain
template <class T,
ULONG (*pfnHash)(const T*),
BOOL (*pfnEq)(const T*, const T*),
void (*pfnDestroy)(T*)>
BOOL
CHashSetIter<T, pfnHash, pfnEq, pfnDestroy>::FAdvance()
{
if (m_ulElement < m_pts->m_pdrgElements->UlLength())
{
m_ulElement++;
return true;
}
return false;
}
// Look up current element
template <class T,
ULONG (*pfnHash)(const T*),
BOOL (*pfnEq)(const T*, const T*),
void (*pfnDestroy)(T*)>
const typename CHashSet<T, pfnHash, pfnEq, pfnDestroy>::CHashSetElem *
CHashSetIter<T, pfnHash, pfnEq, pfnDestroy>::Phse() const
{
typename TSet::CHashSetElem *phse = NULL;
T *t = (*(m_pts->m_pdrgElements))[m_ulElement-1];
m_pts->Lookup(t, &phse);
return phse;
}
// Look up current element
template <class T,
ULONG (*pfnHash)(const T*),
BOOL (*pfnEq)(const T*, const T*),
void (*pfnDestroy)(T*)>
const T*
CHashSetIter<T, pfnHash, pfnEq, pfnDestroy>::Pt() const
{
const typename TSet::CHashSetElem *phse = Phse();
if (NULL != phse)
{
return phse->Pt();
}
return NULL;
}
}
#endif // !GPOS_CHashSetIter_INL
// EOF
......@@ -40,6 +40,7 @@ add_executable(gpos_test
src/unittest/gpos/common/CEnumSetTest.cpp
src/unittest/gpos/common/CHashMapIterTest.cpp
src/unittest/gpos/common/CHashMapTest.cpp
src/unittest/gpos/common/CHashSetIterTest.cpp
src/unittest/gpos/common/CHashSetTest.cpp
src/unittest/gpos/common/CListTest.cpp
src/unittest/gpos/common/CRefCountTest.cpp
......@@ -109,6 +110,7 @@ add_gpos_test(CDoubleTest)
add_gpos_test(CHashMapTest)
add_gpos_test(CHashMapIterTest)
add_gpos_test(CHashSetTest)
add_gpos_test(CHashSetIterTest)
add_gpos_test(CRefCountTest)
add_gpos_test(CListTest)
add_gpos_test(CStackTest)
......
// Greenplum Database
// Copyright (C) 2017 Pivotal Software, Inc
#ifndef GPOS_CHashSetIterTest_H
#define GPOS_CHashSetIterTest_H
#include "gpos/base.h"
namespace gpos
{
// Static unit tests
class CHashSetIterTest
{
public:
// unittests
static GPOS_RESULT EresUnittest();
static GPOS_RESULT EresUnittest_Basic();
}; // class CHashSetIterTest
}
#endif // !GPOS_CHashSetIterTest_H
// EOF
......@@ -32,6 +32,7 @@
#include "unittest/gpos/common/CHashMapTest.h"
#include "unittest/gpos/common/CHashMapIterTest.h"
#include "unittest/gpos/common/CHashSetTest.h"
#include "unittest/gpos/common/CHashSetIterTest.h"
#include "unittest/gpos/common/CListTest.h"
#include "unittest/gpos/common/CRefCountTest.h"
#include "unittest/gpos/common/CStackTest.h"
......@@ -91,6 +92,7 @@ static gpos::CUnittest rgut[] =
GPOS_UNITTEST_STD(CHashMapTest),
GPOS_UNITTEST_STD(CHashMapIterTest),
GPOS_UNITTEST_STD(CHashSetTest),
GPOS_UNITTEST_STD(CHashSetIterTest),
GPOS_UNITTEST_STD(CRefCountTest),
GPOS_UNITTEST_STD(CListTest),
GPOS_UNITTEST_STD(CStackTest),
......
//---------------------------------------------------------------------------
// Greenplum Database
// Copyright (C) 2017 Pivotal Software, Inc
#include "gpos/base.h"
#include "gpos/common/CHashSetIter.h"
#include "gpos/memory/CAutoMemoryPool.h"
#include "gpos/test/CUnittest.h"
#include "unittest/gpos/common/CHashSetIterTest.h"
using namespace gpos;
// Unittest for basic hash set iterator
GPOS_RESULT
CHashSetIterTest::EresUnittest()
{
CUnittest rgut[] =
{
GPOS_UNITTEST_FUNC(CHashSetIterTest::EresUnittest_Basic),
};
return CUnittest::EresExecute(rgut, GPOS_ARRAY_SIZE(rgut));
}
// Basic iterator test
GPOS_RESULT
CHashSetIterTest::EresUnittest_Basic()
{
// create memory pool
CAutoMemoryPool amp;
IMemoryPool *pmp = amp.Pmp();
// test data
ULONG_PTR rgul[] = {1,2,3,4,5,6,7,8,9};
const ULONG ulCnt = GPOS_ARRAY_SIZE(rgul);
typedef CHashSet<ULONG_PTR, UlHashPtr<ULONG_PTR>, gpos::FEqual<ULONG_PTR>, CleanupNULL<ULONG_PTR> > Set;
typedef CHashSetIter<ULONG_PTR, UlHashPtr<ULONG_PTR>, gpos::FEqual<ULONG_PTR>, CleanupNULL<ULONG_PTR> > SetIter;
// using N - 2 slots guarantees collisions
Set *ps = GPOS_NEW(pmp) Set(pmp, ulCnt - 2);
#ifdef GPOS_DEBUG
// iteration over empty set
SetIter siEmpty(ps);
GPOS_ASSERT(!siEmpty.FAdvance());
#endif // GPOS_DEBUG
// load set and iterate over it after each step
for (ULONG ul = 0; ul < ulCnt; ++ul)
{
(void) ps->FInsert(&rgul[ul]);
// checksum over elements
ULONG_PTR ulpChkSumElement = 0;
// iterate over full set
SetIter si(ps);
while (si.FAdvance())
{
ulpChkSumElement += *(si.Pt());
}
// use Gauss's formula for checksum-ing
GPOS_ASSERT(ulpChkSumElement == ((ul + 2) * (ul + 1)) / 2);
}
ps->Release();
return GPOS_OK;
}
// EOF
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册