未验证 提交 cd93ce18 编写于 作者: 悟世者's avatar 悟世者 提交者: GitHub

fix stonedb for optimize exists subquery and deep memory copy #369 (#330, #329, #340, #332) (#369)

* fix stonedb for optimize exists subquery and deep memcopy copy

* Update parameterized_filter.cpp

fix ParameterizedFilter deep copy mind memory leak

* fix subquery when this is in sql

* fix code readability and defensive

* use shallow memory when paral hash join by class DimensionGroupMultiMaterialized
Co-authored-by: Nmergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
上级 b58a3773
......@@ -113,3 +113,4 @@ CMakeFiles
CMakeFiles/*
CTestTestfile.cmake
COPYING
.vs/
\ No newline at end of file
......@@ -121,6 +121,8 @@ class Descriptor {
void CalculateJoinType();
DescriptorJoinType GetJoinType() const { return desc_t; }
bool IsType_Subquery();
bool IsType_Exists() const { return op == common::Operator::O_EXISTS || op == common::Operator::O_NOT_EXISTS; }
bool IsType_In() const { return op == common::Operator::O_IN || op == common::Operator::O_NOT_IN; }
bool IsType_TIANMUExpression() const; // only columns, constants and TIANMUExpressions
bool IsType_JoinComplex() const;
......
......@@ -181,8 +181,8 @@ int MultiIndexTable::NumOfLocks() {
//----------------------------DimensionGroupMultiMaterialized-------------------------------------------
// The no_obj need to preset, because AddDimensionContent maybe not called on
// uninvolved scenes.
DimensionGroupMultiMaterialized::DimensionGroupMultiMaterialized(int64_t obj, DimensionVector &dims, uint32_t power)
: power_(power), dims_used_(dims) {
DimensionGroupMultiMaterialized::DimensionGroupMultiMaterialized(int64_t obj, DimensionVector &dims, uint32_t power, bool is_shallow_memory)
: power_(power), dims_used_(dims), is_shallow_memory(is_shallow_memory) {
dim_group_type = DGType::DG_INDEX_TABLE;
no_obj = obj;
dims_count_ = dims_used_.Size();
......@@ -190,17 +190,25 @@ DimensionGroupMultiMaterialized::DimensionGroupMultiMaterialized(int64_t obj, Di
}
DimensionGroupMultiMaterialized::~DimensionGroupMultiMaterialized() {
if (is_shallow_memory) {
return;
}
for (auto it : dim_tables_) delete it;
}
DimensionGroup *DimensionGroupMultiMaterialized::Clone(bool shallow) {
DimensionGroupMultiMaterialized *new_value = new DimensionGroupMultiMaterialized(no_obj, dims_used_, power_);
if (shallow) return new_value;
DimensionGroupMultiMaterialized *new_value = new DimensionGroupMultiMaterialized(no_obj, dims_used_, power_, shallow);
for (int index = 0; index < dims_count_; ++index) {
MultiIndexTable *tables = dim_tables_[index];
if (tables) {
tables->Lock();
new_value->dim_tables_[index] = new MultiIndexTable(*tables);
if (shallow) {
new_value->dim_tables_[index] = tables;
} else {
new_value->dim_tables_[index] = new MultiIndexTable(*tables);
}
tables->Unlock();
}
}
......
......@@ -163,7 +163,7 @@ class DimensionGroupMultiMaterialized : public DimensionGroup {
public:
// NOTE: works also for "count only" (all t[i] are NULL, only no_obj set)
DimensionGroupMultiMaterialized(int64_t obj, DimensionVector &dims, uint32_t power);
DimensionGroupMultiMaterialized(int64_t obj, DimensionVector &dims, uint32_t power, bool is_shallow_memory = false);
~DimensionGroupMultiMaterialized() override;
// The table will be added (as a pointer to be deleted by destructor) on a
......@@ -196,6 +196,7 @@ class DimensionGroupMultiMaterialized : public DimensionGroup {
int dims_count_ = 0;
// NULL for not used (natural numbering).
std::vector<MultiIndexTable *> dim_tables_;
bool is_shallow_memory;
};
} // namespace core
} // namespace Tianmu
......
......@@ -40,20 +40,35 @@ TwoDimensionalJoiner::~TwoDimensionalJoiner() {
}
JoinAlgType TwoDimensionalJoiner::ChooseJoinAlgorithm([[maybe_unused]] MultiIndex &mind, Condition &cond) {
JoinAlgType join_alg = JoinAlgType::JTYPE_GENERAL;
if (cond[0].IsType_JoinSimple() && cond[0].op == common::Operator::O_EQ) {
if ((cond.Size() == 1) && !tianmu_sysvar_force_hashjoin)
join_alg = JoinAlgType::JTYPE_MAP; // available types checked inside
else
join_alg = JoinAlgType::JTYPE_HASH;
} else {
if (cond[0].IsType_JoinSimple() &&
(cond[0].op == common::Operator::O_MORE_EQ || cond[0].op == common::Operator::O_MORE ||
cond[0].op == common::Operator::O_LESS_EQ || cond[0].op == common::Operator::O_LESS))
join_alg = JoinAlgType::JTYPE_SORT;
auto choose_map_or_hash = ([&tianmu_sysvar_force_hashjoin, &cond] {
if ((!tianmu_sysvar_force_hashjoin) && (cond.Size() == 1))
return JoinAlgType::JTYPE_MAP; // available types checked inside
return JoinAlgType::JTYPE_HASH;
});
if (cond[0].IsType_Exists()) {
return choose_map_or_hash();
}
if (cond[0].IsType_In()) {
return choose_map_or_hash();
}
if (!cond[0].IsType_JoinSimple()) {
return JoinAlgType::JTYPE_GENERAL;
}
return join_alg;
if (cond[0].op == common::Operator::O_EQ) {
return choose_map_or_hash();
}
if (cond[0].op == common::Operator::O_MORE_EQ || cond[0].op == common::Operator::O_MORE ||
cond[0].op == common::Operator::O_LESS_EQ || cond[0].op == common::Operator::O_LESS) {
return JoinAlgType::JTYPE_SORT;
}
return JoinAlgType::JTYPE_GENERAL;
}
JoinAlgType TwoDimensionalJoiner::ChooseJoinAlgorithm(JoinFailure join_result, JoinAlgType prev_type,
......
......@@ -60,7 +60,7 @@ int EvaluateMatchedFragmentsWithRows([[maybe_unused]] uint32_t pack_power, int64
//----------------------------------------------MITaskIterator-----------------------------------------------
MITaskIterator::MITaskIterator(MultiIndex *mind, DimensionVector &dimensions, int task_id, int task_count,
int64_t rows_length)
: iter_(new MIIterator(new MultiIndex(*mind, false), dimensions)) {
: iter_(new MIIterator(new MultiIndex(*mind, true), dimensions)) {
iter_->SetTaskNum(task_count);
iter_->SetTaskId(task_id);
rows_length_ = rows_length * 1.5;
......
......@@ -36,34 +36,65 @@
namespace Tianmu {
namespace core {
ParameterizedFilter::ParameterizedFilter(uint32_t power, CondType filter_type)
: rough_mind(NULL), filter_type(filter_type) {
mind = new MultiIndex(power);
: mind(new MultiIndex(power))
, mind_shallow_memory(false)
, rough_mind(nullptr)
, table(nullptr)
, filter_type(filter_type) {
}
ParameterizedFilter &ParameterizedFilter::operator=(const ParameterizedFilter &pf) {
if (this != &pf) {
if (mind) delete mind;
if (mind && (!mind_shallow_memory)) delete mind;
if (pf.mind)
mind = new MultiIndex(*pf.mind);
else
mind = NULL; // possible e.g. for a temporary data sources
mind = nullptr; // possible e.g. for a temporary data sources
AssignInternal(pf);
mind_shallow_memory = false;
}
return *this;
}
ParameterizedFilter &ParameterizedFilter::operator=(ParameterizedFilter &&pf) {
if (this == &pf) {
return *this;
}
if (mind && (!mind_shallow_memory)) delete mind;
mind = pf.mind;
rough_mind = pf.rough_mind;
table = pf.table;
mind_shallow_memory = true;
return *this;
}
ParameterizedFilter::ParameterizedFilter(const ParameterizedFilter &pf) {
if (pf.mind)
mind = new MultiIndex(*pf.mind);
else
mind = NULL; // possible e.g. for a temporary data sources
rough_mind = NULL;
mind = nullptr; // possible e.g. for a temporary data sources
rough_mind = nullptr;
AssignInternal(pf);
mind_shallow_memory = false;
}
ParameterizedFilter::~ParameterizedFilter() {
delete mind;
delete rough_mind;
if (mind_shallow_memory) {
return;
}
if (nullptr != mind) {
delete mind;
mind = nullptr;
}
if (nullptr != rough_mind) {
delete rough_mind;
rough_mind = nullptr;
}
}
void ParameterizedFilter::AssignInternal(const ParameterizedFilter &pf) {
......@@ -71,7 +102,7 @@ void ParameterizedFilter::AssignInternal(const ParameterizedFilter &pf) {
if (pf.rough_mind)
rough_mind = new RoughMultiIndex(*pf.rough_mind);
else
rough_mind = NULL;
rough_mind = nullptr;
for (uint i = 0; i < pf.descriptors.Size(); i++)
if (!pf.descriptors[i].done) descriptors.AddDescriptor(pf.descriptors[i]);
parametrized_desc = pf.parametrized_desc;
......@@ -1017,7 +1048,11 @@ void ParameterizedFilter::UpdateMultiIndex(bool count_only, int64_t limit) {
int no_of_delayed_conditions = 0;
for (uint i = 0; i < descriptors.Size(); i++) {
if (!descriptors[i].done)
if (descriptors[i].IsType_Join() || descriptors[i].IsDelayed() || descriptors[i].IsOuter()) {
if (descriptors[i].IsType_Join()
|| descriptors[i].IsDelayed()
|| descriptors[i].IsOuter()
|| descriptors[i].IsType_In()
|| descriptors[i].IsType_Exists()) {
if (!descriptors[i].IsDelayed())
no_of_join_conditions++;
else
......@@ -1033,13 +1068,17 @@ void ParameterizedFilter::UpdateMultiIndex(bool count_only, int64_t limit) {
int no_desc = 0;
for (uint i = 0; i < descriptors.Size(); i++)
if (!descriptors[i].done && descriptors[i].IsInner() && !descriptors[i].IsType_Join() &&
!descriptors[i].IsDelayed())
!descriptors[i].IsDelayed() && !descriptors[i].IsType_Exists() && !descriptors[i].IsType_In())
++no_desc;
int desc_no = 0;
for (uint i = 0; i < descriptors.Size(); i++) {
if (!descriptors[i].done && descriptors[i].IsInner() && !descriptors[i].IsType_Join() &&
!descriptors[i].IsDelayed()) {
if (!descriptors[i].done
&& descriptors[i].IsInner()
&& !descriptors[i].IsType_Join()
&& !descriptors[i].IsDelayed()
&& !descriptors[i].IsType_In()
&& !descriptors[i].IsType_Exists()) {
++desc_no;
if (descriptors[i].attr.vc) {
cur_dim = descriptors[i].attr.vc->GetDim();
......
......@@ -41,6 +41,7 @@ class ParameterizedFilter final {
ParameterizedFilter(const ParameterizedFilter &);
virtual ~ParameterizedFilter();
ParameterizedFilter &operator=(const ParameterizedFilter &pf);
ParameterizedFilter &operator=(ParameterizedFilter &&pf);
// ParameterizedFilter & operator =(const ParameterizedFilter & pf);
void AddConditions(const Condition *conds);
uint NoParameterizedDescs() { return parametrized_desc.Size(); }
......@@ -77,8 +78,9 @@ class ParameterizedFilter final {
int desc_number, int64_t limit, int one_dim);
MultiIndex *mind;
bool mind_shallow_memory;
RoughMultiIndex *rough_mind;
TempTable *table = nullptr;
TempTable *table;
private:
Condition descriptors;
......
......@@ -2165,7 +2165,9 @@ void TempTableForSubquery::ResetToTemplate(bool rough) {
(*attrs[i]).buffer = orig_buf;
}
filter = *template_filter;
filter = std::move(*template_filter); // shallow
filter_shallow_memory = true;
for (int i = 0; i < no_global_virt_cols; i++)
if (!virt_cols_for_having[i]) virt_cols[i]->SetMultiIndex(filter.mind);
......
......@@ -319,8 +319,16 @@ class TempTable : public JustATable {
uint GetDisplayableAttrIndex(uint attr);
MultiIndex *GetMultiIndexP() { return filter.mind; }
void ClearMultiIndexP() {
if (nullptr == filter.mind) {
return;
}
if (filter_shallow_memory) {
return;
}
delete filter.mind;
filter.mind = NULL;
filter.mind = nullptr;
}
MultiIndex *GetOutputMultiIndexP() { return &output_mind; }
ParameterizedFilter *GetFilterP() { return &filter; }
......@@ -393,6 +401,7 @@ class TempTable : public JustATable {
std::vector<JoinType> join_types; // vector of types of joins, one less than tables
ParameterizedFilter filter; // multidimensional filter, contains multiindex,
// can be parametrized
bool filter_shallow_memory = false; // is filter shallow memory
MultiIndex output_mind; // one dimensional MultiIndex used for operations on
// output columns of TempTable
std::vector<SortDescriptor> order_by; // indexes of order by columns
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册