提交 0c8271e0 编写于 作者: dengyihao's avatar dengyihao

add fuzzy search

上级 41ae0b92
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __INDEX_FST_DFA_H__
#define __INDEX_FST_DFA_H__
#include "indexFstRegex.h"
#include "indexFstSparse.h"
#include "tarray.h"
#include "thash.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct FstDfa FstDfa;
typedef struct {
SArray * insts;
uint32_t next[256];
bool isMatch;
} State;
/*
* dfa builder related func
**/
typedef struct FstDfaBuilder {
FstDfa * dfa;
SHashObj *cache;
} FstDfaBuilder;
FstDfaBuilder *dfaBuilderCreate(SArray *insts);
FstDfa *dfaBuilderBuild(FstDfaBuilder *builder);
bool dfaBuilderRunState(FstDfaBuilder *builder, FstSparseSet *cur, FstSparseSet *next, uint32_t state, uint8_t bytes,
uint32_t *result);
bool dfaBuilderCachedState(FstDfaBuilder *builder, FstSparseSet *set, uint32_t *result);
/*
* dfa related func
**/
typedef struct FstDfa {
SArray *insts;
SArray *states;
} FstDfa;
FstDfa *dfaCreate(SArray *insts, SArray *states);
bool dfaIsMatch(FstDfa *dfa, uint32_t si);
bool dfaAccept(FstDfa *dfa, uint32_t si, uint8_t byte, uint32_t *result);
void dfaAdd(FstDfa *dfa, FstSparseSet *set, uint32_t ip);
bool dfaRun(FstDfa *dfa, FstSparseSet *from, FstSparseSet *to, uint8_t byte);
#ifdef __cplusplus
}
#endif
#endif
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_INDEX_FST_REGEX_H_
#define _TD_INDEX_FST_REGEX_H_
//#include "indexFstDfa.h"
#include "taos.h"
#include "tarray.h"
#include "tchecksum.h"
#include "thash.h"
#include "tlog.h"
#include "tutil.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef enum { MATCH, JUMP, SPLIT, RANGE } InstType;
typedef struct MatchValue {
} MatchValue;
typedef struct JumpValue {
uint32_t step;
} JumpValue;
typedef struct SplitValue {
uint32_t len1;
uint32_t len2;
} SplitValue;
typedef struct RangeValue {
uint8_t start;
uint8_t end;
} RangeValue;
typedef struct {
InstType ty;
union {
MatchValue mv;
JumpValue jv;
SplitValue sv;
RangeValue rv;
};
} Inst;
typedef struct {
char *orig;
void *dfa;
} FstRegex;
FstRegex *regexCreate(const char *str);
void regexSetup(FstRegex *regex, uint32_t size, const char *str);
// uint32_t regexStart()
#ifdef __cplusplus
}
#endif
#endif
......@@ -13,8 +13,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _TD_INDEX_SPARSE_H_
#define _TD_INDEX_SPARSE_H_
#ifndef _TD_INDEX_FST_SPARSE_H_
#define _TD_INDEX_FST_SPARSE_H_
#include "tarray.h"
......
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "indexFstDfa.h"
#include "thash.h"
static int dfaInstsEqual(const void *a, const void *b, size_t size) {
SArray *ar = (SArray *)a;
SArray *br = (SArray *)b;
size_t al = ar != NULL ? taosArrayGetSize(ar) : 0;
size_t bl = br != NULL ? taosArrayGetSize(br) : 0;
if (al != bl) {
return -1;
}
for (int i = 0; i < al; i++) {
uint32_t v1 = *(uint32_t *)taosArrayGet(ar, i);
uint32_t v2 = *(uint32_t *)taosArrayGet(br, i);
if (v1 != v2) {
return -1;
}
}
return 0;
}
FstDfaBuilder *dfaBuilderCreate(SArray *insts) {
FstDfaBuilder *builder = taosMemoryCalloc(1, sizeof(FstDfaBuilder));
if (builder == NULL) {
return NULL;
}
SArray *states = taosArrayInit(4, sizeof(State));
builder->dfa = dfaCreate(insts, states);
builder->cache = taosHashInit(
4, taosGetDefaultHashFunction(POINTER_BYTES == sizeof(int64_t) ? TSDB_DATA_TYPE_BIGINT : TSDB_DATA_TYPE_INT),
false, HASH_NO_LOCK);
taosHashSetEqualFp(builder->cache, dfaInstsEqual);
return builder;
}
FstDfa *dfaBuilderBuild(FstDfaBuilder *builder) {
uint32_t sz = taosArrayGetSize(builder->dfa->insts);
FstSparseSet *cur = sparSetCreate(sz);
FstSparseSet *nxt = sparSetCreate(sz);
dfaAdd(builder->dfa, cur, 0);
}
bool dfaBuilderRunState(FstDfaBuilder *builder, FstSparseSet *cur, FstSparseSet *next, uint32_t state, uint8_t bytes,
uint32_t *result) {
// impl run state
return true;
}
bool dfaBuilderCachedState(FstDfaBuilder *builder, FstSparseSet *set, uint32_t *result) {
// impl cache state
return true;
}
FstDfa *dfaCreate(SArray *insts, SArray *states) {
FstDfa *dfa = taosMemoryCalloc(1, sizeof(FstDfa));
if (dfa == NULL) {
return NULL;
}
dfa->insts = insts;
dfa->states = states;
return dfa;
}
bool dfaIsMatch(FstDfa *dfa, uint32_t si) {
// impl match
return true;
}
bool dfaAccept(FstDfa *dfa, uint32_t si, uint8_t byte, uint32_t *result) {
// impl accept
return true;
}
void dfaAdd(FstDfa *dfa, FstSparseSet *set, uint32_t ip) {
// impl add
return;
}
bool dfaRun(FstDfa *dfa, FstSparseSet *from, FstSparseSet *to, uint8_t byte) {
// impl run
return true;
}
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "indexFstRegex.h"
#include "indexFstSparse.h"
FstRegex *regexCreate(const char *str) {
FstRegex *regex = taosMemoryCalloc(1, sizeof(FstRegex));
if (regex == NULL) {
return NULL;
}
int32_t sz = (int32_t)strlen(str);
char * orig = taosMemoryCalloc(1, sz);
memcpy(orig, str, sz);
regex->orig = orig;
}
void regexSetup(FstRegex *regex, uint32_t size, const char *str) {
// return
// return;
}
......@@ -13,7 +13,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "indexSparse.h"
#include "indexFstSparse.h"
FstSparseSet *sparSetCreate(int32_t sz) {
FstSparseSet *ss = taosMemoryCalloc(1, sizeof(FstSparseSet));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册