index_fst_util.h 3.0 KB
Newer Older
dengyihao's avatar
dengyihao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */


#ifndef __INDEX_FST_UTIL_H__
#define __INDEX_FST_UTIL_H__

#include "tarray.h"
dengyihao's avatar
dengyihao 已提交
21
#include "index_fst_common.h"
dengyihao's avatar
dengyihao 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34

typedef uint64_t FstType;
typedef uint64_t CompiledAddr; 
typedef uint64_t Output;  
typedef uint8_t PackSizes;


//A sentinel value used to indicate an empty final state
extern const CompiledAddr EMPTY_ADDRESS;
/// A sentinel value used to indicate an invalid state.
extern const CompiledAddr NONE_ADDRESS;

// This version number is written to every finite state transducer created by
dengyihao's avatar
dengyihao 已提交
35
// this version When a finite state transducer is read, its version number is
dengyihao's avatar
dengyihao 已提交
36
// checked against this value.
dengyihao's avatar
dengyihao 已提交
37
extern const uint64_t    VERSION;
dengyihao's avatar
dengyihao 已提交
38 39 40 41 42 43 44 45 46
// The threshold (in number of transitions) at which an index is created for                                   
// a node's transitions. This speeds up lookup time at the expense of FST size 

extern const uint64_t TRANS_INDEX_THRESHOLD;
// high 4 bits is transition address packed size.
// low 4 bits is output value packed size.
//
// `0` is a legal value which means there are no transitions/outputs

dengyihao's avatar
dengyihao 已提交
47

dengyihao's avatar
dengyihao 已提交
48
#define FST_SET_TRANSITION_PACK_SIZE(v, sz) do {v = (v & 0b00001111) | (sz << 4); } while(0)
dengyihao's avatar
dengyihao 已提交
49
#define FST_GET_TRANSITION_PACK_SIZE(v) (((v) & 0b11110000) >> 4) 
dengyihao's avatar
dengyihao 已提交
50
#define FST_SET_OUTPUT_PACK_SIZE(v, sz) do { v =  (v & 0b11110000) | sz; } while(0)
dengyihao's avatar
dengyihao 已提交
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
#define FST_GET_OUTPUT_PACK_SIZE(v) ((v) & 0b00001111)   

#define COMMON_INPUT(idx) COMMON_INPUTS_INV[(idx) - 1]

#define COMMON_INDEX(v, max, val) do {         \
  val = ((uint16_t)COMMON_INPUTS[v] + 1)%256;  \
  val = val > max ? 0: val;                    \
} while(0) 


//uint8_t commonInput(uint8_t idx);
//uint8_t commonIdx(uint8_t v, uint8_t max);

uint8_t      packSize(uint64_t n); 
uint64_t     unpackUint64(uint8_t *ch, uint8_t sz);
uint8_t      packDeltaSize(CompiledAddr nodeAddr, CompiledAddr transAddr);
CompiledAddr unpackDelta(char *data, uint64_t len, uint64_t nodeAddr);


dengyihao's avatar
dengyihao 已提交
70 71 72 73 74
typedef struct FstString {
  uint8_t *data; 
  uint32_t len;
  int32_t ref;
} FstString;
dengyihao's avatar
dengyihao 已提交
75 76

typedef struct FstSlice {
dengyihao's avatar
dengyihao 已提交
77 78 79
  FstString   *str;     
  int32_t start; 
  int32_t end; 
dengyihao's avatar
dengyihao 已提交
80 81
} FstSlice;

dengyihao's avatar
dengyihao 已提交
82 83 84 85 86 87 88
FstSlice fstSliceCreate(uint8_t *data, uint64_t len);
FstSlice fstSliceCopy(FstSlice *s, int32_t start, int32_t end);
FstSlice fstSliceDeepCopy(FstSlice *s, int32_t start, int32_t end);
bool fstSliceEmpty(FstSlice *s);
int fstSliceCompare(FstSlice *s1, FstSlice *s2);
void fstSliceDestroy(FstSlice *s); 
uint8_t *fstSliceData(FstSlice *s, int32_t *sz); 
dengyihao's avatar
dengyihao 已提交
89

dengyihao's avatar
dengyihao 已提交
90
#define FST_SLICE_LEN(s) (s->end - s->start + 1)
dengyihao's avatar
dengyihao 已提交
91

dengyihao's avatar
dengyihao 已提交
92 93

#endif