未验证 提交 cbeff753 编写于 作者: E Eugene Lysiuchenko 提交者: GitHub

feat(sql): update "within" operator to accept GeoHash literals and constant functions (#1307)

上级 38a90c85
......@@ -122,7 +122,7 @@ public final class ColumnType {
}
public static int getGeoHashTypeWithBits(int bits) {
assert bits > 0;
assert bits > 0 && bits <= GEO_HASH_MAX_BITS_LENGTH;
// this logic relies on GeoHash type value to be clustered together
return mkGeoHashType(bits, (short) (GEOBYTE + pow2SizeOfBits(bits)));
}
......
......@@ -156,34 +156,22 @@ public class GeoHashes {
return fromString(geohash, start, start + Math.min(length, MAX_STRING_LENGTH));
}
public static void fromStringToBits(final CharSequenceHashSet prefixes, int columnType, final DirectLongList prefixesBits) {
prefixesBits.clear();
public static void addNormalizedGeoPrefix(long hash, int prefixType, int columnType, final LongList prefixes) throws NumericException {
final int bits = ColumnType.getGeoHashBits(prefixType);
final int columnSize = ColumnType.sizeOf(columnType);
final int columnBits = ColumnType.getGeoHashBits(columnType);
for (int i = 0, sz = prefixes.size(); i < sz; i++) {
try {
final CharSequence prefix = prefixes.get(i);
if (prefix == null || prefix.length() == 0) {
continue;
}
final long hash = fromString(prefix, 0, prefix.length());
final int bits = 5 * prefix.length();
final int shift = columnBits - bits;
long norm = hash << shift;
long mask = bitmask(bits, shift);
mask |= 1L << (columnSize * 8 - 1); // set the most significant bit to ignore null from prefix matching
// if the prefix is more precise than hashes,
// exclude it from matching
if (bits > columnBits) {
norm = 0L;
mask = -1L;
}
prefixesBits.add(norm);
prefixesBits.add(mask);
} catch (NumericException e) {
// Skip invalid geo hashes
}
if (hash == NULL || bits > columnBits) {
throw NumericException.INSTANCE;
}
final int shift = columnBits - bits;
long norm = hash << shift;
long mask = GeoHashes.bitmask(bits, shift);
mask |= 1L << (columnSize * 8 - 1); // set the most significant bit to ignore null from prefix matching
prefixes.add(norm);
prefixes.add(mask);
}
public static long fromStringTruncatingNl(CharSequence hash, int start, int end, int toBits) throws NumericException {
......
......@@ -101,7 +101,7 @@ public class SqlCodeGenerator implements Mutable {
private final IntList recordFunctionPositions = new IntList();
private final IntList groupByFunctionPositions = new IntList();
private boolean fullFatJoins = false;
private final CharSequenceHashSet prefixes = new CharSequenceHashSet();
private final LongList prefixes = new LongList();
static {
joinsRequiringTimestamp[JOIN_INNER] = false;
......@@ -941,10 +941,8 @@ public class SqlCodeGenerator implements Mutable {
Function filter,
SqlExecutionContext executionContext,
int timestampIndex,
int hashColumnIndex,
int hashColumnType,
@NotNull IntList columnIndexes,
@NotNull CharSequenceHashSet prefixes
@NotNull LongList prefixes
) throws SqlException {
final DataFrameCursorFactory dataFrameCursorFactory;
if (intrinsicModel.hasIntervalFilters()) {
......@@ -1093,8 +1091,6 @@ public class SqlCodeGenerator implements Mutable {
metadata,
dataFrameCursorFactory,
latestByIndex,
hashColumnIndex,
hashColumnType,
filter,
columnIndexes,
prefixes
......@@ -2440,19 +2436,12 @@ public class SqlCodeGenerator implements Mutable {
final ExpressionNode withinExtracted = whereClauseParser.extractWithin(
model,
model.getWhereClause(),
myMeta,
readerMeta,
functionParser,
executionContext,
prefixes
);
int hashColumnIndex = -1; // latest by without prefix match part
int hashColumnType = ColumnType.UNDEFINED;
if (prefixes.size() > 1) {
CharSequence column = prefixes.get(0);
hashColumnIndex = reader.getMetadata().getColumnIndex(column);
hashColumnType = reader.getMetadata().getColumnType(hashColumnIndex);
prefixes.remove(column);
}
model.setWhereClause(withinExtracted);
if (withinExtracted != null) {
......@@ -2508,8 +2497,6 @@ public class SqlCodeGenerator implements Mutable {
f,
executionContext,
readerTimestampIndex,
hashColumnIndex,
hashColumnType,
columnIndexes,
prefixes
);
......@@ -2767,8 +2754,6 @@ public class SqlCodeGenerator implements Mutable {
myMeta,
new FullBwdDataFrameCursorFactory(engine, tableName, model.getTableId(), model.getTableVersion()),
listColumnFilterA.getColumnIndexFactored(0),
hashColumnIndex,
hashColumnType,
null,
columnIndexes,
prefixes
......
......@@ -25,8 +25,10 @@
package io.questdb.griffin;
import io.questdb.cairo.ColumnType;
import io.questdb.cairo.GeoHashes;
import io.questdb.cairo.sql.Function;
import io.questdb.cairo.sql.RecordMetadata;
import io.questdb.griffin.engine.functions.AbstractGeoHashFunction;
import io.questdb.griffin.model.AliasTranslator;
import io.questdb.griffin.model.ExpressionNode;
import io.questdb.griffin.model.IntervalUtils;
......@@ -1077,7 +1079,9 @@ final class WhereClauseParser implements Mutable {
AliasTranslator translator,
ExpressionNode node,
RecordMetadata metadata,
CharSequenceHashSet prefixes
FunctionParser functionParser,
SqlExecutionContext executionContext,
LongList prefixes
) throws SqlException {
prefixes.clear();
......@@ -1086,7 +1090,7 @@ final class WhereClauseParser implements Mutable {
// pre-order iterative tree traversal
// see: http://en.wikipedia.org/wiki/Tree_traversal
if (removeWithin(translator, node, metadata, prefixes)) {
if (removeWithin(translator, node, metadata, functionParser, executionContext, prefixes)) {
return collapseWithinNodes(node);
}
......@@ -1094,10 +1098,10 @@ final class WhereClauseParser implements Mutable {
while (!stack.isEmpty() || node != null) {
if (node != null) {
if (isAndKeyword(node.token) || isOrKeyword(node.token)) {
if (!removeWithin(translator, node.rhs, metadata, prefixes)) {
if (!removeWithin(translator, node.rhs, metadata, functionParser, executionContext, prefixes)) {
stack.push(node.rhs);
}
node = removeWithin(translator, node.lhs, metadata, prefixes) ? null : node.lhs;
node = removeWithin(translator, node.lhs, metadata, functionParser, executionContext, prefixes) ? null : node.lhs;
} else {
node = stack.poll();
}
......@@ -1109,11 +1113,17 @@ final class WhereClauseParser implements Mutable {
return collapseWithinNodes(root);
}
private boolean removeWithin(AliasTranslator translator, ExpressionNode node, RecordMetadata metadata, CharSequenceHashSet prefixes) throws SqlException {
private boolean removeWithin(
AliasTranslator translator,
ExpressionNode node,
RecordMetadata metadata,
FunctionParser functionParser,
SqlExecutionContext executionContext,
LongList prefixes) throws SqlException {
if (isWithinKeyword(node.token)) {
if (prefixes.size() > 0) {
throw SqlException.$(node.position, "Using more than one 'within' operator per query is not allowed");
throw SqlException.$(node.position, "Multiple 'within' expressions not supported");
}
if (node.paramCount < 2) {
......@@ -1123,7 +1133,7 @@ final class WhereClauseParser implements Mutable {
ExpressionNode col = node.paramCount < 3 ? node.lhs : node.args.getLast();
if (col.type != ExpressionNode.LITERAL) {
return false;
throw SqlException.unexpectedToken(col.position, col.token);
}
CharSequence column = translator.translateAlias(col.token);
......@@ -1132,24 +1142,28 @@ final class WhereClauseParser implements Mutable {
throw SqlException.invalidColumn(col.position, col.token);
}
final int hashColumnIndex = metadata.getColumnIndex(column);
final int hashColumnType = metadata.getColumnType(hashColumnIndex);
if (!ColumnType.isGeoHash(hashColumnType)) {
throw SqlException.$(node.position, "GeoHash column type expected");
}
if(prefixes.size() == 0) {
prefixes.add(column); //TODO: make a proper data struct
prefixes.add(hashColumnIndex);
prefixes.add(hashColumnType);
}
int i = node.paramCount - 1;
if (i == 1) {
if (node.rhs == null || node.rhs.type != ExpressionNode.CONSTANT) {
return false;
}
prefixes.add(unquote(node.rhs.token));
int c = node.paramCount - 1;
if (c == 1) {
ExpressionNode inArg = node.rhs;
processArgument(inArg, metadata, functionParser, executionContext, hashColumnType, prefixes);
} else {
for (i--; i > -1; i--) {
ExpressionNode c = node.args.getQuick(i);
if (c.type != ExpressionNode.CONSTANT || isNullKeyword(c.token)) {
return false;
}
prefixes.add(unquote(c.token));
for (c--; c > -1; c--) {
ExpressionNode inArg = node.args.getQuick(c);
processArgument(inArg, metadata, functionParser, executionContext, hashColumnType, prefixes);
}
}
return true;
......@@ -1158,6 +1172,79 @@ final class WhereClauseParser implements Mutable {
}
}
private void processArgument(
ExpressionNode inArg,
RecordMetadata metadata,
FunctionParser functionParser,
SqlExecutionContext executionContext,
int columnType,
LongList prefixes
) throws SqlException {
final int position = inArg.position;
if(isNull(inArg)) {
throw SqlException.$(position, "GeoHash value expected");
}
final int type;
final long hash;
if (isFunc(inArg)) {
try (Function f = functionParser.parseFunction(inArg, metadata, executionContext)) {
if (isGeoHashConstFunction(f)) {
type = f.getType();
hash = GeoHashes.getGeoLong(type, f, null);
} else {
throw SqlException.$(inArg.position, "GeoHash const function expected");
}
}
} else {
final boolean isConstant = inArg.type == ExpressionNode.CONSTANT;
final CharSequence token = inArg.token;
final int len = token.length();
final boolean isBitsPrefix = len > 2 && token.charAt(0) == '#' && token.charAt(1) == '#';
final boolean isCharsPrefix = len > 1 && token.charAt(0) == '#';
if (!(isConstant && (isBitsPrefix || isCharsPrefix))) {
throw SqlException.$(position, "GeoHash literal expected");
}
try {
if (!isBitsPrefix) {
final int sdd = ExpressionParser.extractGeoHashSuffix(position, token);
final int sddLen = Numbers.decodeLowShort(sdd);
final int bits = Numbers.decodeHighShort(sdd);
type = ColumnType.getGeoHashTypeWithBits(bits);
hash = GeoHashes.fromStringTruncatingNl(token, 1, len - sddLen, bits);
} else {
int bits = len - 2;
if (bits <= ColumnType.GEO_HASH_MAX_BITS_LENGTH) {
type = ColumnType.getGeoHashTypeWithBits(bits);
hash = GeoHashes.fromBitStringNl(token, 2);
} else {
throw SqlException.$(position, "GeoHash bits literal expected");
}
}
} catch (NumericException ignored) {
throw SqlException.$(position, "GeoHash literal expected");
}
}
try {
GeoHashes.addNormalizedGeoPrefix(hash, type, columnType, prefixes);
} catch (NumericException e) {
throw SqlException.$(position, "GeoHash prefix precision mismatch");
}
}
private boolean isNull(ExpressionNode node) {
return node == null || isNullKeyword(node.token);
}
private boolean isGeoHashConstFunction(Function fn) {
return (fn instanceof AbstractGeoHashFunction) && fn.isConstant();
}
private ExpressionNode collapseWithinNodes(ExpressionNode node) {
if (node == null || isWithinKeyword(node.token)) {
return null;
......
......@@ -38,14 +38,12 @@ class LatestByAllIndexedFilteredRecordCursor extends LatestByAllIndexedRecordCur
public LatestByAllIndexedFilteredRecordCursor(
int columnIndex,
int hashColumnIndex,
int hashColumnType,
@NotNull DirectLongList rows,
@NotNull Function filter,
@NotNull IntList columnIndexes,
@NotNull DirectLongList prefixes
) {
super(columnIndex, hashColumnIndex, hashColumnType, rows, columnIndexes, prefixes);
super(columnIndex, rows, columnIndexes, prefixes);
this.filter = filter;
}
......
......@@ -25,14 +25,12 @@
package io.questdb.griffin.engine.table;
import io.questdb.cairo.CairoConfiguration;
import io.questdb.cairo.ColumnType;
import io.questdb.cairo.GeoHashes;
import io.questdb.cairo.sql.DataFrameCursorFactory;
import io.questdb.cairo.sql.Function;
import io.questdb.cairo.sql.RecordMetadata;
import io.questdb.std.CharSequenceHashSet;
import io.questdb.std.DirectLongList;
import io.questdb.std.IntList;
import io.questdb.std.LongList;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
......@@ -44,23 +42,22 @@ public class LatestByAllIndexedFilteredRecordCursorFactory extends AbstractTreeS
@NotNull RecordMetadata metadata,
@NotNull DataFrameCursorFactory dataFrameCursorFactory,
int columnIndex,
int hashColumnIndex,
int hashColumnType,
@Nullable Function filter,
@NotNull IntList columnIndexes,
@NotNull CharSequenceHashSet prefixes
@NotNull LongList prefixes
) {
super(metadata, dataFrameCursorFactory, configuration);
this.prefixes = new DirectLongList(Math.max(2, prefixes.size()));
this.prefixes = new DirectLongList(64);
if (hashColumnIndex > -1 && ColumnType.isGeoHash(hashColumnType)) {
GeoHashes.fromStringToBits(prefixes, hashColumnType, this.prefixes);
// copy into owned direct memory
for (int i = 0; i < prefixes.size(); i++) {
this.prefixes.add(prefixes.get(i));
}
if (filter == null) {
this.cursor = new LatestByAllIndexedRecordCursor(columnIndex, hashColumnIndex, hashColumnType, rows, columnIndexes, this.prefixes);
this.cursor = new LatestByAllIndexedRecordCursor(columnIndex, rows, columnIndexes, this.prefixes);
} else {
this.cursor = new LatestByAllIndexedFilteredRecordCursor(columnIndex, hashColumnIndex, hashColumnType, rows, filter, columnIndexes, this.prefixes);
this.cursor = new LatestByAllIndexedFilteredRecordCursor(columnIndex, rows, filter, columnIndexes, this.prefixes);
}
}
......
......@@ -46,8 +46,6 @@ import org.jetbrains.annotations.NotNull;
class LatestByAllIndexedRecordCursor extends AbstractRecordListCursor {
protected final DirectLongList prefixes;
private final int columnIndex;
private final int hashColumnIndex;
private final int hashColumnType;
private final SOUnboundedCountDownLatch doneLatch = new SOUnboundedCountDownLatch();
protected long indexShift = 0;
protected long aIndex;
......@@ -55,16 +53,12 @@ class LatestByAllIndexedRecordCursor extends AbstractRecordListCursor {
public LatestByAllIndexedRecordCursor(
int columnIndex,
int hashColumnIndex,
int hashColumnType,
@NotNull DirectLongList rows,
@NotNull IntList columnIndexes,
@NotNull DirectLongList prefixes
) {
super(rows, columnIndexes);
this.columnIndex = columnIndex;
this.hashColumnIndex = hashColumnIndex;
this.hashColumnType = hashColumnType;
this.prefixes = prefixes;
}
......@@ -118,9 +112,18 @@ class LatestByAllIndexedRecordCursor extends AbstractRecordListCursor {
LatestByArguments.setRowsSize(argsAddress, 0);
}
final long prefixesAddress = prefixes.getAddress();
final long prefixesCount = prefixes.size();
int hashColumnIndex = -1;
int hashColumnType = ColumnType.UNDEFINED;
long prefixesAddress = 0;
long prefixesCount = 0;
if(this.prefixes.size() > 2) {
hashColumnIndex = (int) prefixes.get(0);
hashColumnType = (int) prefixes.get(1);
prefixesAddress = prefixes.getAddress() + 2 * Long.BYTES;
prefixesCount = prefixes.size() - 2;
}
DataFrame frame;
// frame metadata is based on TableReader, which is "full" metadata
......
......@@ -481,19 +481,17 @@ public class GeoHashesTest {
}
@Test
public void testFromStringToBits() throws NumericException {
public void testBuildNormalizedPrefixesAndMasks() throws NumericException {
final int cap = 12;
DirectLongList bits = new DirectLongList(cap * 2); // hash and mask
CharSequenceHashSet strh = new CharSequenceHashSet();
StringSink sink = Misc.getThreadLocalBuilder();
LongList bits = new LongList(cap * 2); // hash and mask
int columnType = ColumnType.getGeoHashTypeWithBits(5 * cap);
for (int i = 0; i < cap; i++) {
final int prec = (i % 3) + 3;
final long h = rnd_geohash(prec);
sink.clear();
GeoHashes.appendChars(h, prec, sink);
strh.add(sink);
int type = ColumnType.getGeoHashTypeWithBits(5 * prec);
GeoHashes.addNormalizedGeoPrefix(h, type, columnType, bits);
}
GeoHashes.fromStringToBits(strh, ColumnType.getGeoHashTypeWithBits(cap * 5), bits);
for (int i = 0; i < bits.size() / 2; i += 2) {
final long b = bits.get(i);
final long m = bits.get(i + 1);
......@@ -502,41 +500,21 @@ public class GeoHashesTest {
}
@Test
public void testFromStringToBitsInvalidNull() {
final int cap = 12;
DirectLongList bits = new DirectLongList(cap * 2); // hash and mask
CharSequenceHashSet strh = new CharSequenceHashSet();
strh.add("");
strh.add(null);
strh.add("$invalid");
strh.add("questdb.10");
GeoHashes.fromStringToBits(strh, ColumnType.getGeoHashTypeWithBits(cap * 5), bits);
Assert.assertEquals(0, bits.size());
}
public void testPrefixPrecisionMismatch() throws NumericException {
final int cap = 1;
LongList bits = new LongList(cap * 2); // hash and mask
final long h = rnd_geohash(5);
final long p = rnd_geohash(7);
@Test
public void testFromStringToBitsInvalidStrings() {
final int cap = 12;
DirectLongList bits = new DirectLongList(cap * 2); // hash and mask
CharSequenceHashSet strh = new CharSequenceHashSet();
strh.add("");
strh.add("a medium sized banana");
GeoHashes.fromStringToBits(strh, ColumnType.getGeoHashTypeWithBits(cap * 5), bits);
int pType = ColumnType.getGeoHashTypeWithBits(5 * 7);
int hType = ColumnType.getGeoHashTypeWithBits(5 * 5);
try {
GeoHashes.addNormalizedGeoPrefix(h, pType, hType, bits);
} catch (NumericException ignored) {
}
Assert.assertEquals(0, bits.size());
}
@Test
public void testFromStringToBitsSingle() {
final int cap = 12;
DirectLongList bits = new DirectLongList(cap * 2); // hash and mask
CharSequenceHashSet strh = new CharSequenceHashSet();
strh.add("questdb");
GeoHashes.fromStringToBits(strh, ColumnType.getGeoHashTypeWithBits(cap * 5), bits);
Assert.assertEquals(2, bits.size());
}
@Test
public void testFromBitStringTruncating() throws NumericException {
CharSequence tooLongBitString = Chars.repeat("1", 61); // truncates
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册