diff --git a/core/src/main/java/io/questdb/cutlass/text/TextLexer.java b/core/src/main/java/io/questdb/cutlass/text/TextLexer.java index 008a119a3a80a441afd4fbc37025e4aa4c108c10..378c259c12adcf89eb308b4d7b8e58cc78666c9f 100644 --- a/core/src/main/java/io/questdb/cutlass/text/TextLexer.java +++ b/core/src/main/java/io/questdb/cutlass/text/TextLexer.java @@ -41,8 +41,8 @@ public class TextLexer implements Closeable, Mutable { private final ObjList fields = new ObjList<>(); private final ObjectPool csPool; private final TextMetadataDetector metadataDetector; - private CharSequence tableName; private final int lineRollBufLimit; + private CharSequence tableName; private boolean ignoreEolOnce; private long lineRollBufCur; private Listener textLexerListener; @@ -151,12 +151,33 @@ public class TextLexer implements Closeable, Mutable { csPool.clear(); } + private void addField() { + fields.add(csPool.next()); + fieldMax++; + } + + private void checkEol(long lo) { + if (eol) { + uneol(lo); + } + } + private void clearRollBuffer(long ptr) { useLineRollBuf = false; lineRollBufCur = lineRollBufPtr; this.fieldLo = this.fieldHi = ptr; } + private void eol(long ptr, byte c) { + if (c == '\n' || c == '\r') { + eol = true; + rollBufferUnusable = false; + clearRollBuffer(ptr); + fieldIndex = 0; + lineCount++; + } + } + ObjList getColumnNames() { return metadataDetector.getColumnNames(); } @@ -196,6 +217,12 @@ public class TextLexer implements Closeable, Mutable { return true; } + private void growRollBufAndPut(byte c) { + if (growRollBuf(lineRollBufLen + 1, true)) { + Unsafe.getUnsafe().putByte(lineRollBufCur++, c); + } + } + private void ignoreEolOnce() { eol = true; fieldIndex = 0; @@ -206,12 +233,55 @@ public class TextLexer implements Closeable, Mutable { return metadataDetector.isHeader(); } + private void onColumnDelimiter(long lo) { + checkEol(lo); + + if (inQuote || ignoreEolOnce) { + return; + } + stashField(fieldIndex++); + } + + private void onLineEnd(long ptr) throws LineLimitException { + if (inQuote) { + return; + } + + if (eol) { + this.fieldLo = this.fieldHi; + return; + } + + stashField(fieldIndex); + + if (ignoreEolOnce) { + ignoreEolOnce(); + return; + } + + triggerLine(ptr); + + if (lineCount > lineCountLimit) { + throw LineLimitException.INSTANCE; + } + } + + private void onQuote() { + if (inQuote) { + delayedOutQuote = !delayedOutQuote; + lastQuotePos = this.fieldHi; + } else if (fieldHi - fieldLo == 1) { + inQuote = true; + this.fieldLo = this.fieldHi; + } + } + private void parse(long lo, long hi) { long ptr = lo; try { while (ptr < hi) { - byte c = Unsafe.getUnsafe().getByte(ptr++); + final byte c = Unsafe.getUnsafe().getByte(ptr++); if (rollBufferUnusable) { eol(ptr, c); @@ -233,19 +303,12 @@ public class TextLexer implements Closeable, Mutable { if (c == columnDelimiter) { onColumnDelimiter(lo); + } else if (c == '"') { + onQuote(); + } else if (c == '\n' || c == '\r') { + onLineEnd(ptr); } else { - switch (c) { - case '"': - onQuote(); - break; - case '\r': - case '\n': - onLineEnd(ptr); - break; - default: - checkEol(lo); - break; - } + checkEol(lo); } } } catch (LineLimitException ignore) { @@ -264,31 +327,6 @@ public class TextLexer implements Closeable, Mutable { } } - private void onColumnDelimiter(long lo) { - checkEol(lo); - - if (inQuote || ignoreEolOnce) { - return; - } - stashField(fieldIndex++); - } - - private void checkEol(long lo) { - if (eol) { - uneol(lo); - } - } - - private void eol(long ptr, byte c) { - if (c == '\n' || c == '\r') { - eol = true; - rollBufferUnusable = false; - clearRollBuffer(ptr); - fieldIndex = 0; - lineCount++; - } - } - private void putToRollBuf(byte c) { if (lineRollBufCur - lineRollBufPtr == lineRollBufLen) { growRollBufAndPut(c); @@ -297,46 +335,6 @@ public class TextLexer implements Closeable, Mutable { } } - private void growRollBufAndPut(byte c) { - if (growRollBuf(lineRollBufLen + 1, true)) { - Unsafe.getUnsafe().putByte(lineRollBufCur++, c); - } - } - - private void onQuote() { - if (inQuote) { - delayedOutQuote = !delayedOutQuote; - lastQuotePos = this.fieldHi; - } else if (fieldHi - fieldLo == 1) { - inQuote = true; - this.fieldLo = this.fieldHi; - } - } - - private void onLineEnd(long ptr) throws LineLimitException { - if (inQuote) { - return; - } - - if (eol) { - this.fieldLo = this.fieldHi; - return; - } - - stashField(fieldIndex); - - if (ignoreEolOnce) { - ignoreEolOnce(); - return; - } - - triggerLine(ptr); - - if (lineCount > lineCountLimit) { - throw LineLimitException.INSTANCE; - } - } - private void reportExtraFields() { LogRecord logRecord = LOG.error().$("extra fields [table=").$(tableName).$("]\n\t").$(lineCount).$(" -> "); for (int i = 0, n = fields.size(); i < n; i++) { @@ -400,11 +398,6 @@ public class TextLexer implements Closeable, Mutable { this.fieldLo = this.fieldHi; } - private void addField() { - fields.add(csPool.next()); - fieldMax++; - } - private void triggerLine(long ptr) { eol = true; fieldIndex = 0; diff --git a/core/src/main/java/io/questdb/cutlass/text/TextMetadataDetector.java b/core/src/main/java/io/questdb/cutlass/text/TextMetadataDetector.java index ba6e2bd98a13057b5213ab3c9ac24083a4cc84e8..84790f32a15b475e24baa294f94a37b8b1730327 100644 --- a/core/src/main/java/io/questdb/cutlass/text/TextMetadataDetector.java +++ b/core/src/main/java/io/questdb/cutlass/text/TextMetadataDetector.java @@ -29,7 +29,6 @@ import io.questdb.cutlass.text.types.TypeManager; import io.questdb.log.Log; import io.questdb.log.LogFactory; import io.questdb.std.*; -import io.questdb.std.str.CharSink; import io.questdb.std.str.DirectByteCharSequence; import io.questdb.std.str.DirectCharSink; import io.questdb.std.str.StringSink; @@ -59,25 +58,6 @@ public class TextMetadataDetector implements TextLexer.Listener, Mutable, Closea this.utf8Sink = new DirectCharSink(textConfiguration.getUtf8SinkSize()); } - public static boolean utf8Decode(long lo, long hi, CharSink sink) { - long p = lo; - while (p < hi) { - byte b = Unsafe.getUnsafe().getByte(p); - if (b < 0) { - int n = Chars.utf8DecodeMultiByte(p, hi, b, sink); - if (n == -1) { - // UTF8 error - return false; - } - p += n; - } else { - sink.put((char) b); - ++p; - } - } - return true; - } - @Override public void clear() { tempSink.clear(); @@ -280,7 +260,7 @@ public class TextMetadataDetector implements TextLexer.Listener, Mutable, Closea for (int i = 0; i < hi; i++) { DirectByteCharSequence value = values.getQuick(i); utf8Sink.clear(); - if (utf8Decode(value.getLo(), value.getHi(), utf8Sink)) { + if (Chars.utf8Decode(value.getLo(), value.getHi(), utf8Sink)) { columnNames.setQuick(i, normalise(utf8Sink)); } else { LOG.info().$("utf8 error [table=").$(tableName).$(", line=0, col=").$(i).$(']').$(); diff --git a/core/src/main/java/io/questdb/cutlass/text/TextMetadataParser.java b/core/src/main/java/io/questdb/cutlass/text/TextMetadataParser.java index 7604a8f0f4553791e63f2a541e49063bd6e344cc..a6d9c5c0f5f55c83fbf1fc36240c492f17227387 100644 --- a/core/src/main/java/io/questdb/cutlass/text/TextMetadataParser.java +++ b/core/src/main/java/io/questdb/cutlass/text/TextMetadataParser.java @@ -52,6 +52,7 @@ public class TextMetadataParser implements JsonParser, Mutable, Closeable { private static final int P_TYPE = 2; private static final int P_PATTERN = 3; private static final int P_LOCALE = 4; + private static final int P_UTF8 = 5; private static final CharSequenceIntHashMap propertyNameMap = new CharSequenceIntHashMap(); private final DateLocaleFactory dateLocaleFactory; private final TimestampLocaleFactory timestampLocaleFactory; @@ -72,6 +73,7 @@ public class TextMetadataParser implements JsonParser, Mutable, Closeable { private int bufSize = 0; private CharSequence tableName; private int localePosition; + private boolean utf8 = false; public TextMetadataParser(TextConfiguration textConfiguration, TypeManager typeManager) { this.columnNames = new ObjList<>(); @@ -84,16 +86,6 @@ public class TextMetadataParser implements JsonParser, Mutable, Closeable { this.typeManager = typeManager; } - private static void checkInputs(int position, CharSequence name, int type) throws JsonException { - if (name == null) { - throw JsonException.$(position, "Missing 'name' property"); - } - - if (type == -1) { - throw JsonException.$(position, "Missing 'type' property"); - } - } - @Override public void clear() { bufSize = 0; @@ -160,6 +152,9 @@ public class TextMetadataParser implements JsonParser, Mutable, Closeable { locale = copy(tag); localePosition = position; break; + case P_UTF8: + utf8 = Chars.equalsLowerCaseAscii("true", tag); + break; default: LOG.info().$("ignoring [table=").$(tableName).$(", value=").$(tag).$(']').$(); break; @@ -176,12 +171,23 @@ public class TextMetadataParser implements JsonParser, Mutable, Closeable { } } + private static void checkInputs(int position, CharSequence name, int type) throws JsonException { + if (name == null) { + throw JsonException.$(position, "Missing 'name' property"); + } + + if (type == -1) { + throw JsonException.$(position, "Missing 'type' property"); + } + } + private void clearStage() { name = null; type = -1; pattern = null; locale = null; localePosition = 0; + utf8 = false; } private CharSequence copy(CharSequence tag) { @@ -237,7 +243,7 @@ public class TextMetadataParser implements JsonParser, Mutable, Closeable { if (pattern == null) { throw JsonException.$(0, "DATE format pattern is required"); } - columnTypes.add(typeManager.nextTimestampAdapter().of(timestampFormatFactory.get(pattern), timestampLocale)); + columnTypes.add(typeManager.nextTimestampAdapter(utf8, timestampFormatFactory.get(pattern), timestampLocale)); break; default: columnTypes.add(typeManager.getTypeAdapter(type)); @@ -282,5 +288,6 @@ public class TextMetadataParser implements JsonParser, Mutable, Closeable { propertyNameMap.put("type", P_TYPE); propertyNameMap.put("pattern", P_PATTERN); propertyNameMap.put("locale", P_LOCALE); + propertyNameMap.put("utf8", P_UTF8); } } diff --git a/core/src/main/java/io/questdb/cutlass/text/types/DateAdapter.java b/core/src/main/java/io/questdb/cutlass/text/types/DateAdapter.java index 2f4ec33c9dfbe07b97130e6d8da433fb78eb6a46..79ff288a0df690a327e2e5efd620a8b4ea48e1e5 100644 --- a/core/src/main/java/io/questdb/cutlass/text/types/DateAdapter.java +++ b/core/src/main/java/io/questdb/cutlass/text/types/DateAdapter.java @@ -25,23 +25,16 @@ package io.questdb.cutlass.text.types; import io.questdb.cairo.ColumnType; import io.questdb.cairo.TableWriter; -import io.questdb.cutlass.text.TextUtil; import io.questdb.std.Mutable; import io.questdb.std.NumericException; import io.questdb.std.str.DirectByteCharSequence; -import io.questdb.std.str.DirectCharSink; import io.questdb.std.time.DateFormat; import io.questdb.std.time.DateLocale; public class DateAdapter extends AbstractTypeAdapter implements Mutable { - private final DirectCharSink utf8Sink; private DateLocale locale; private DateFormat format; - public DateAdapter(DirectCharSink utf8Sink) { - this.utf8Sink = utf8Sink; - } - @Override public void clear() { this.format = null; @@ -65,9 +58,7 @@ public class DateAdapter extends AbstractTypeAdapter implements Mutable { @Override public void write(TableWriter.Row row, int column, DirectByteCharSequence value) throws Exception { - utf8Sink.clear(); - TextUtil.utf8Decode(value.getLo(), value.getHi(), utf8Sink); - row.putDate(column, format.parse(utf8Sink, locale)); + row.putDate(column, format.parse(value, locale)); } public DateAdapter of(DateFormat format, DateLocale locale) { diff --git a/core/src/main/java/io/questdb/cutlass/text/types/DateUtf8Adapter.java b/core/src/main/java/io/questdb/cutlass/text/types/DateUtf8Adapter.java new file mode 100644 index 0000000000000000000000000000000000000000..bfeedeea9b2fd30db31350ed24b363d96ac8be68 --- /dev/null +++ b/core/src/main/java/io/questdb/cutlass/text/types/DateUtf8Adapter.java @@ -0,0 +1,78 @@ +/******************************************************************************* + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (C) 2014-2019 Appsicle + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + ******************************************************************************/ + +package io.questdb.cutlass.text.types; + +import io.questdb.cairo.ColumnType; +import io.questdb.cairo.TableWriter; +import io.questdb.cutlass.text.TextUtil; +import io.questdb.std.Mutable; +import io.questdb.std.NumericException; +import io.questdb.std.str.DirectByteCharSequence; +import io.questdb.std.str.DirectCharSink; +import io.questdb.std.time.DateFormat; +import io.questdb.std.time.DateLocale; + +public class DateUtf8Adapter extends AbstractTypeAdapter implements Mutable { + private final DirectCharSink utf8Sink; + private DateLocale locale; + private DateFormat format; + + public DateUtf8Adapter(DirectCharSink utf8Sink) { + this.utf8Sink = utf8Sink; + } + + @Override + public void clear() { + this.format = null; + this.locale = null; + } + + @Override + public int getType() { + return ColumnType.DATE; + } + + @Override + public boolean probe(CharSequence text) { + try { + format.parse(text, locale); + return true; + } catch (NumericException e) { + return false; + } + } + + @Override + public void write(TableWriter.Row row, int column, DirectByteCharSequence value) throws Exception { + utf8Sink.clear(); + TextUtil.utf8Decode(value.getLo(), value.getHi(), utf8Sink); + row.putDate(column, format.parse(utf8Sink, locale)); + } + + public DateUtf8Adapter of(DateFormat format, DateLocale locale) { + this.format = format; + this.locale = locale; + return this; + } +} diff --git a/core/src/main/java/io/questdb/cutlass/text/types/InputFormatConfiguration.java b/core/src/main/java/io/questdb/cutlass/text/types/InputFormatConfiguration.java index 6cbde2fbeb473b2eebb82de91b56b42176dcc9be..cd73606a335cc605ae8786cd73bb394285748681 100644 --- a/core/src/main/java/io/questdb/cutlass/text/types/InputFormatConfiguration.java +++ b/core/src/main/java/io/questdb/cutlass/text/types/InputFormatConfiguration.java @@ -25,9 +25,11 @@ package io.questdb.cutlass.text.types; import io.questdb.cutlass.json.JsonException; import io.questdb.cutlass.json.JsonLexer; +import io.questdb.cutlass.json.JsonParser; import io.questdb.log.Log; import io.questdb.log.LogFactory; import io.questdb.std.Chars; +import io.questdb.std.IntList; import io.questdb.std.ObjList; import io.questdb.std.Unsafe; import io.questdb.std.microtime.TimestampFormat; @@ -50,14 +52,18 @@ public class InputFormatConfiguration { private static final int STATE_EXPECT_TIMESTAMP_FORMAT_ARRAY = 3; private static final int STATE_EXPECT_DATE_FORMAT_VALUE = 4; private static final int STATE_EXPECT_DATE_LOCALE_VALUE = 5; + private static final int STATE_EXPECT_DATE_UTF8_VALUE = 10; private static final int STATE_EXPECT_TIMESTAMP_FORMAT_VALUE = 6; private static final int STATE_EXPECT_TIMESTAMP_LOCALE_VALUE = 7; private static final int STATE_EXPECT_DATE_FORMAT_ENTRY = 8; private static final int STATE_EXPECT_TIMESTAMP_FORMAT_ENTRY = 9; + private static final int STATE_EXPECT_TIMESTAMP_UTF8_VALUE = 11; private final ObjList dateFormats = new ObjList<>(); private final ObjList dateLocales = new ObjList<>(); + private final IntList dateUtf8Flags = new IntList(); private final ObjList timestampFormats = new ObjList<>(); private final ObjList timestampLocales = new ObjList<>(); + private final IntList timestampUtf8Flags = new IntList(); private final DateFormatFactory dateFormatFactory; private final DateLocaleFactory dateLocaleFactory; private final TimestampFormatFactory timestampFormatFactory; @@ -65,8 +71,10 @@ public class InputFormatConfiguration { private int jsonState = STATE_EXPECT_TOP; // expect start of object private DateFormat jsonDateFormat; private DateLocale jsonDateLocale; + private boolean jsonDateUtf8; private TimestampFormat jsonTimestampFormat; private TimestampLocale jsonTimestampLocale; + private boolean jsonTimestampUtf8; public InputFormatConfiguration( DateFormatFactory dateFormatFactory, @@ -83,13 +91,17 @@ public class InputFormatConfiguration { public void clear() { dateFormats.clear(); dateLocales.clear(); + dateUtf8Flags.clear(); timestampFormats.clear(); timestampLocales.clear(); + timestampUtf8Flags.clear(); jsonState = STATE_EXPECT_TOP; jsonDateFormat = null; jsonDateLocale = null; + jsonDateUtf8 = false; jsonTimestampFormat = null; jsonTimestampLocale = null; + jsonTimestampUtf8 = false; } public DateFormatFactory getDateFormatFactory() { @@ -108,6 +120,10 @@ public class InputFormatConfiguration { return dateLocales; } + public IntList getDateUtf8Flags() { + return dateUtf8Flags; + } + public TimestampFormatFactory getTimestampFormatFactory() { return timestampFormatFactory; } @@ -124,11 +140,17 @@ public class InputFormatConfiguration { return timestampLocales; } + public IntList getTimestampUtf8Flags() { + return timestampUtf8Flags; + } + public void parseConfiguration(JsonLexer jsonLexer, String adapterSetConfigurationFileName) throws JsonException { this.clear(); jsonLexer.clear(); + final JsonParser parser = this::onJsonEvent; + LOG.info().$("loading [from=").$(adapterSetConfigurationFileName).$(']').$(); try (InputStream stream = this.getClass().getResourceAsStream(adapterSetConfigurationFileName)) { if (stream == null) { @@ -146,7 +168,7 @@ public class InputFormatConfiguration { for (int i = 0; i < len; i++) { Unsafe.getUnsafe().putByte(memBuffer + i, heapBuffer[i]); } - jsonLexer.parse(memBuffer, memBuffer + len, this::onJsonEvent); + jsonLexer.parse(memBuffer, memBuffer + len, parser); } jsonLexer.clear(); } finally { @@ -192,6 +214,7 @@ public class InputFormatConfiguration { } dateFormats.add(jsonDateFormat); dateLocales.add(jsonDateLocale == null ? DateLocaleFactory.INSTANCE.getDefaultDateLocale() : jsonDateLocale); + dateUtf8Flags.add(jsonDateUtf8 ? 1 : 0); break; case STATE_EXPECT_TIMESTAMP_FORMAT_ENTRY: if (jsonTimestampFormat == null) { @@ -200,6 +223,7 @@ public class InputFormatConfiguration { timestampFormats.add(jsonTimestampFormat); timestampLocales.add(jsonTimestampLocale == null ? TimestampLocaleFactory.INSTANCE.getDefaultTimestampLocale() : jsonTimestampLocale); + timestampUtf8Flags.add(jsonTimestampUtf8 ? 1 : 0); break; default: // the only time we get here would be when @@ -225,10 +249,10 @@ public class InputFormatConfiguration { } break; case STATE_EXPECT_DATE_FORMAT_ENTRY: - processEntry(tag, position, STATE_EXPECT_DATE_FORMAT_VALUE, STATE_EXPECT_DATE_LOCALE_VALUE); + processEntry(tag, position, STATE_EXPECT_DATE_FORMAT_VALUE, STATE_EXPECT_DATE_LOCALE_VALUE, STATE_EXPECT_DATE_UTF8_VALUE); break; default: - processEntry(tag, position, STATE_EXPECT_TIMESTAMP_FORMAT_VALUE, STATE_EXPECT_TIMESTAMP_LOCALE_VALUE); + processEntry(tag, position, STATE_EXPECT_TIMESTAMP_FORMAT_VALUE, STATE_EXPECT_TIMESTAMP_LOCALE_VALUE, STATE_EXPECT_TIMESTAMP_UTF8_VALUE); break; } break; @@ -267,6 +291,14 @@ public class InputFormatConfiguration { } jsonState = STATE_EXPECT_TIMESTAMP_FORMAT_ENTRY; break; + case STATE_EXPECT_TIMESTAMP_UTF8_VALUE: + jsonTimestampUtf8 = Chars.equalsLowerCaseAscii(tag, "true"); + jsonState = STATE_EXPECT_TIMESTAMP_FORMAT_ENTRY; + break; + case STATE_EXPECT_DATE_UTF8_VALUE: + jsonDateUtf8 = Chars.equalsLowerCaseAscii(tag, "true"); + jsonState = STATE_EXPECT_DATE_FORMAT_ENTRY; + break; default: // we are picking up values from attributes we don't expect throw JsonException.$(position, "array expected (value)"); @@ -292,11 +324,13 @@ public class InputFormatConfiguration { } } - private void processEntry(CharSequence tag, int position, int stateExpectFormatValue, int stateExpectLocaleValue) throws JsonException { + private void processEntry(CharSequence tag, int position, int stateExpectFormatValue, int stateExpectLocaleValue, int stateExpectUtf8Value) throws JsonException { if (Chars.equals(tag, "format")) { jsonState = stateExpectFormatValue; // expect date format } else if (Chars.equals(tag, "locale")) { jsonState = stateExpectLocaleValue; + } else if (Chars.equals(tag, "utf8")) { + jsonState = stateExpectUtf8Value; } else { // unknown tag name? throw JsonException.$(position, "unknown [tag=").put(tag).put(']'); diff --git a/core/src/main/java/io/questdb/cutlass/text/types/TimestampAdapter.java b/core/src/main/java/io/questdb/cutlass/text/types/TimestampAdapter.java index 0a902452f49909cb0b47f8df2f9e1ab34aff091e..6eb372fbac07996ccb255a3188431e948edcd942 100644 --- a/core/src/main/java/io/questdb/cutlass/text/types/TimestampAdapter.java +++ b/core/src/main/java/io/questdb/cutlass/text/types/TimestampAdapter.java @@ -25,23 +25,16 @@ package io.questdb.cutlass.text.types; import io.questdb.cairo.ColumnType; import io.questdb.cairo.TableWriter; -import io.questdb.cutlass.text.TextUtil; import io.questdb.std.Mutable; import io.questdb.std.NumericException; import io.questdb.std.microtime.TimestampFormat; import io.questdb.std.microtime.TimestampLocale; import io.questdb.std.str.DirectByteCharSequence; -import io.questdb.std.str.DirectCharSink; public class TimestampAdapter extends AbstractTypeAdapter implements Mutable { - private final DirectCharSink utf8Sink; private TimestampLocale locale; private TimestampFormat format; - public TimestampAdapter(DirectCharSink utf8Sink) { - this.utf8Sink = utf8Sink; - } - @Override public void clear() { this.format = null; @@ -65,9 +58,7 @@ public class TimestampAdapter extends AbstractTypeAdapter implements Mutable { @Override public void write(TableWriter.Row row, int column, DirectByteCharSequence value) throws Exception { - utf8Sink.clear(); - TextUtil.utf8Decode(value.getLo(), value.getHi(), utf8Sink); - row.putDate(column, format.parse(utf8Sink, locale)); + row.putDate(column, format.parse(value, locale)); } public TimestampAdapter of(TimestampFormat format, TimestampLocale locale) { diff --git a/core/src/main/java/io/questdb/cutlass/text/types/TimestampUtf8Adapter.java b/core/src/main/java/io/questdb/cutlass/text/types/TimestampUtf8Adapter.java new file mode 100644 index 0000000000000000000000000000000000000000..2ea57feb3a40b656601d6a5198c1de8f340a9c63 --- /dev/null +++ b/core/src/main/java/io/questdb/cutlass/text/types/TimestampUtf8Adapter.java @@ -0,0 +1,78 @@ +/******************************************************************************* + * ___ _ ____ ____ + * / _ \ _ _ ___ ___| |_| _ \| __ ) + * | | | | | | |/ _ \/ __| __| | | | _ \ + * | |_| | |_| | __/\__ \ |_| |_| | |_) | + * \__\_\\__,_|\___||___/\__|____/|____/ + * + * Copyright (C) 2014-2019 Appsicle + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + ******************************************************************************/ + +package io.questdb.cutlass.text.types; + +import io.questdb.cairo.ColumnType; +import io.questdb.cairo.TableWriter; +import io.questdb.cutlass.text.TextUtil; +import io.questdb.std.Mutable; +import io.questdb.std.NumericException; +import io.questdb.std.microtime.TimestampFormat; +import io.questdb.std.microtime.TimestampLocale; +import io.questdb.std.str.DirectByteCharSequence; +import io.questdb.std.str.DirectCharSink; + +public class TimestampUtf8Adapter extends AbstractTypeAdapter implements Mutable { + private final DirectCharSink utf8Sink; + private TimestampLocale locale; + private TimestampFormat format; + + public TimestampUtf8Adapter(DirectCharSink utf8Sink) { + this.utf8Sink = utf8Sink; + } + + @Override + public void clear() { + this.format = null; + this.locale = null; + } + + @Override + public int getType() { + return ColumnType.TIMESTAMP; + } + + @Override + public boolean probe(CharSequence text) { + try { + format.parse(text, locale); + return true; + } catch (NumericException e) { + return false; + } + } + + @Override + public void write(TableWriter.Row row, int column, DirectByteCharSequence value) throws Exception { + utf8Sink.clear(); + TextUtil.utf8Decode(value.getLo(), value.getHi(), utf8Sink); + row.putDate(column, format.parse(utf8Sink, locale)); + } + + public TimestampUtf8Adapter of(TimestampFormat format, TimestampLocale locale) { + this.format = format; + this.locale = locale; + return this; + } +} diff --git a/core/src/main/java/io/questdb/cutlass/text/types/TypeManager.java b/core/src/main/java/io/questdb/cutlass/text/types/TypeManager.java index 6f3f7306bbce6fcbea530b6c03b4c1fc2fd5fc88..a975c0a8eb02df5735cda4b1444df753669df10f 100644 --- a/core/src/main/java/io/questdb/cutlass/text/types/TypeManager.java +++ b/core/src/main/java/io/questdb/cutlass/text/types/TypeManager.java @@ -26,6 +26,7 @@ package io.questdb.cutlass.text.types; import io.questdb.cairo.CairoException; import io.questdb.cairo.ColumnType; import io.questdb.cutlass.text.TextConfiguration; +import io.questdb.std.IntList; import io.questdb.std.Mutable; import io.questdb.std.ObjList; import io.questdb.std.ObjectPool; @@ -39,7 +40,8 @@ public class TypeManager implements Mutable { private final ObjList probes = new ObjList<>(); private final int probeCount; private final StringAdapter stringAdapter; - private final ObjectPool dateAdapterPool; + private final ObjectPool dateAdapterPool; + private final ObjectPool timestampUtf8AdapterPool; private final ObjectPool timestampAdapterPool; private final SymbolAdapter symbolAdapter; private final InputFormatConfiguration inputFormatConfiguration; @@ -48,8 +50,9 @@ public class TypeManager implements Mutable { TextConfiguration configuration, DirectCharSink utf8Sink ) { - this.dateAdapterPool = new ObjectPool<>(() -> new DateAdapter(utf8Sink), configuration.getDateAdapterPoolCapacity()); - this.timestampAdapterPool = new ObjectPool<>(() -> new TimestampAdapter(utf8Sink), configuration.getTimestampAdapterPoolCapacity()); + this.dateAdapterPool = new ObjectPool<>(() -> new DateUtf8Adapter(utf8Sink), configuration.getDateAdapterPoolCapacity()); + this.timestampUtf8AdapterPool = new ObjectPool<>(() -> new TimestampUtf8Adapter(utf8Sink), configuration.getTimestampAdapterPoolCapacity()); + this.timestampAdapterPool = new ObjectPool<>(TimestampAdapter::new, configuration.getTimestampAdapterPoolCapacity()); this.inputFormatConfiguration = configuration.getInputFormatConfiguration(); this.stringAdapter = new StringAdapter(utf8Sink); this.symbolAdapter = new SymbolAdapter(utf8Sink); @@ -57,13 +60,23 @@ public class TypeManager implements Mutable { final ObjList dateFormats = inputFormatConfiguration.getDateFormats(); final ObjList dateLocales = inputFormatConfiguration.getDateLocales(); + final IntList dateUtf8Flags = inputFormatConfiguration.getDateUtf8Flags(); for (int i = 0, n = dateFormats.size(); i < n; i++) { - probes.add(new DateAdapter(utf8Sink).of(dateFormats.getQuick(i), dateLocales.getQuick(i))); + if (dateUtf8Flags.getQuick(i) == 1) { + probes.add(new DateUtf8Adapter(utf8Sink).of(dateFormats.getQuick(i), dateLocales.getQuick(i))); + } else { + probes.add(new DateAdapter().of(dateFormats.getQuick(i), dateLocales.getQuick(i))); + } } final ObjList timestampFormats = inputFormatConfiguration.getTimestampFormats(); final ObjList timestampLocales = inputFormatConfiguration.getTimestampLocales(); + final IntList timestampUtf8Flags = inputFormatConfiguration.getTimestampUtf8Flags(); for (int i = 0, n = timestampFormats.size(); i < n; i++) { - probes.add(new TimestampAdapter(utf8Sink).of(timestampFormats.getQuick(i), timestampLocales.getQuick(i))); + if (timestampUtf8Flags.getQuick(i) == 1) { + probes.add(new TimestampUtf8Adapter(utf8Sink).of(timestampFormats.getQuick(i), timestampLocales.getQuick(i))); + } else { + probes.add(new TimestampAdapter().of(timestampFormats.getQuick(i), timestampLocales.getQuick(i))); + } } this.probeCount = probes.size(); } @@ -71,6 +84,7 @@ public class TypeManager implements Mutable { @Override public void clear() { dateAdapterPool.clear(); + timestampUtf8AdapterPool.clear(); timestampAdapterPool.clear(); } @@ -115,12 +129,20 @@ public class TypeManager implements Mutable { } } - public DateAdapter nextDateAdapter() { + public DateUtf8Adapter nextDateAdapter() { return dateAdapterPool.next(); } - public TimestampAdapter nextTimestampAdapter() { - return timestampAdapterPool.next(); + public TypeAdapter nextTimestampAdapter(boolean decodeUtf8, TimestampFormat format, TimestampLocale locale) { + if (decodeUtf8) { + TimestampUtf8Adapter adapter = timestampUtf8AdapterPool.next(); + adapter.of(format, locale); + return adapter; + } + + TimestampAdapter adapter = timestampAdapterPool.next(); + adapter.of(format, locale); + return adapter; } private void addDefaultProbes() { diff --git a/core/src/main/java/io/questdb/std/Chars.java b/core/src/main/java/io/questdb/std/Chars.java index 0f1e5ad204df1a54363a2e3a6cdaa71abcb756b1..a8b86566332d397df4d6bd37d1ddfa3737e26cbe 100644 --- a/core/src/main/java/io/questdb/std/Chars.java +++ b/core/src/main/java/io/questdb/std/Chars.java @@ -530,7 +530,6 @@ public final class Chars { public static boolean utf8Decode(long lo, long hi, CharSink sink) { long p = lo; - while (p < hi) { byte b = Unsafe.getUnsafe().getByte(p); if (b < 0) { diff --git a/core/src/main/resources/text_loader.json b/core/src/main/resources/text_loader.json index eb3d8871ee05f20e562f96213f3415c93c93c934..6849bb0bf262af106848623fe3bdc249c1debac4 100644 --- a/core/src/main/resources/text_loader.json +++ b/core/src/main/resources/text_loader.json @@ -8,7 +8,8 @@ }, { "format": "yyyy-MM-ddTHH:mm:ss.SSSz", - "locale": "en-US" + "locale": "en-US", + "utf8": false }, { "format": "MM/dd/y" @@ -16,7 +17,8 @@ ], "timestamp": [ { - "format": "yyyy-MM-ddTHH:mm:ss.SSSUUUz" + "format": "yyyy-MM-ddTHH:mm:ss.SSSUUUz", + "utf8": false } ] } \ No newline at end of file diff --git a/core/src/test/java/io/questdb/cutlass/text/TextLoaderTest.java b/core/src/test/java/io/questdb/cutlass/text/TextLoaderTest.java index 7b7f74a15345d59a2fadf77bfada26709630f812..52ec889c514ac572932b75eda13576d506c7968b 100644 --- a/core/src/test/java/io/questdb/cutlass/text/TextLoaderTest.java +++ b/core/src/test/java/io/questdb/cutlass/text/TextLoaderTest.java @@ -1735,7 +1735,8 @@ public class TextLoaderTest extends AbstractGriffinTest { " \"name\": \"date\",\n" + " \"type\": \"TIMESTAMP\",\n" + " \"pattern\": \"d MMMM y г.\",\n" + - " \"locale\": \"ru-RU\"\n" + + " \"locale\": \"ru-RU\",\n" + + " \"utf8\": \"true\"\n" + " }\n" + "]")); @@ -1770,7 +1771,8 @@ public class TextLoaderTest extends AbstractGriffinTest { " {\n" + " \"name\": \"date\",\n" + " \"type\": \"TIMESTAMP\",\n" + - " \"pattern\": \"d MMMM y г.\"\n" + + " \"pattern\": \"d MMMM y г.\",\n" + + " \"utf8\": true\n" + " }\n" + "]"));