提交 ca7ee6ad 编写于 作者: V Vlad Ilyushchenko

CUTLASS: improved text file ingestion performance via making text parser a...

CUTLASS: improved text file ingestion performance via making text parser a little more optimal and not doing utf8decode on dates and timestamps where its not needed.
上级 6b8f99a4
......@@ -41,8 +41,8 @@ public class TextLexer implements Closeable, Mutable {
private final ObjList<DirectByteCharSequence> fields = new ObjList<>();
private final ObjectPool<DirectByteCharSequence> csPool;
private final TextMetadataDetector metadataDetector;
private CharSequence tableName;
private final int lineRollBufLimit;
private CharSequence tableName;
private boolean ignoreEolOnce;
private long lineRollBufCur;
private Listener textLexerListener;
......@@ -151,12 +151,33 @@ public class TextLexer implements Closeable, Mutable {
csPool.clear();
}
private void addField() {
fields.add(csPool.next());
fieldMax++;
}
private void checkEol(long lo) {
if (eol) {
uneol(lo);
}
}
private void clearRollBuffer(long ptr) {
useLineRollBuf = false;
lineRollBufCur = lineRollBufPtr;
this.fieldLo = this.fieldHi = ptr;
}
private void eol(long ptr, byte c) {
if (c == '\n' || c == '\r') {
eol = true;
rollBufferUnusable = false;
clearRollBuffer(ptr);
fieldIndex = 0;
lineCount++;
}
}
ObjList<CharSequence> getColumnNames() {
return metadataDetector.getColumnNames();
}
......@@ -196,6 +217,12 @@ public class TextLexer implements Closeable, Mutable {
return true;
}
private void growRollBufAndPut(byte c) {
if (growRollBuf(lineRollBufLen + 1, true)) {
Unsafe.getUnsafe().putByte(lineRollBufCur++, c);
}
}
private void ignoreEolOnce() {
eol = true;
fieldIndex = 0;
......@@ -206,12 +233,55 @@ public class TextLexer implements Closeable, Mutable {
return metadataDetector.isHeader();
}
private void onColumnDelimiter(long lo) {
checkEol(lo);
if (inQuote || ignoreEolOnce) {
return;
}
stashField(fieldIndex++);
}
private void onLineEnd(long ptr) throws LineLimitException {
if (inQuote) {
return;
}
if (eol) {
this.fieldLo = this.fieldHi;
return;
}
stashField(fieldIndex);
if (ignoreEolOnce) {
ignoreEolOnce();
return;
}
triggerLine(ptr);
if (lineCount > lineCountLimit) {
throw LineLimitException.INSTANCE;
}
}
private void onQuote() {
if (inQuote) {
delayedOutQuote = !delayedOutQuote;
lastQuotePos = this.fieldHi;
} else if (fieldHi - fieldLo == 1) {
inQuote = true;
this.fieldLo = this.fieldHi;
}
}
private void parse(long lo, long hi) {
long ptr = lo;
try {
while (ptr < hi) {
byte c = Unsafe.getUnsafe().getByte(ptr++);
final byte c = Unsafe.getUnsafe().getByte(ptr++);
if (rollBufferUnusable) {
eol(ptr, c);
......@@ -233,19 +303,12 @@ public class TextLexer implements Closeable, Mutable {
if (c == columnDelimiter) {
onColumnDelimiter(lo);
} else if (c == '"') {
onQuote();
} else if (c == '\n' || c == '\r') {
onLineEnd(ptr);
} else {
switch (c) {
case '"':
onQuote();
break;
case '\r':
case '\n':
onLineEnd(ptr);
break;
default:
checkEol(lo);
break;
}
checkEol(lo);
}
}
} catch (LineLimitException ignore) {
......@@ -264,31 +327,6 @@ public class TextLexer implements Closeable, Mutable {
}
}
private void onColumnDelimiter(long lo) {
checkEol(lo);
if (inQuote || ignoreEolOnce) {
return;
}
stashField(fieldIndex++);
}
private void checkEol(long lo) {
if (eol) {
uneol(lo);
}
}
private void eol(long ptr, byte c) {
if (c == '\n' || c == '\r') {
eol = true;
rollBufferUnusable = false;
clearRollBuffer(ptr);
fieldIndex = 0;
lineCount++;
}
}
private void putToRollBuf(byte c) {
if (lineRollBufCur - lineRollBufPtr == lineRollBufLen) {
growRollBufAndPut(c);
......@@ -297,46 +335,6 @@ public class TextLexer implements Closeable, Mutable {
}
}
private void growRollBufAndPut(byte c) {
if (growRollBuf(lineRollBufLen + 1, true)) {
Unsafe.getUnsafe().putByte(lineRollBufCur++, c);
}
}
private void onQuote() {
if (inQuote) {
delayedOutQuote = !delayedOutQuote;
lastQuotePos = this.fieldHi;
} else if (fieldHi - fieldLo == 1) {
inQuote = true;
this.fieldLo = this.fieldHi;
}
}
private void onLineEnd(long ptr) throws LineLimitException {
if (inQuote) {
return;
}
if (eol) {
this.fieldLo = this.fieldHi;
return;
}
stashField(fieldIndex);
if (ignoreEolOnce) {
ignoreEolOnce();
return;
}
triggerLine(ptr);
if (lineCount > lineCountLimit) {
throw LineLimitException.INSTANCE;
}
}
private void reportExtraFields() {
LogRecord logRecord = LOG.error().$("extra fields [table=").$(tableName).$("]\n\t").$(lineCount).$(" -> ");
for (int i = 0, n = fields.size(); i < n; i++) {
......@@ -400,11 +398,6 @@ public class TextLexer implements Closeable, Mutable {
this.fieldLo = this.fieldHi;
}
private void addField() {
fields.add(csPool.next());
fieldMax++;
}
private void triggerLine(long ptr) {
eol = true;
fieldIndex = 0;
......
......@@ -29,7 +29,6 @@ import io.questdb.cutlass.text.types.TypeManager;
import io.questdb.log.Log;
import io.questdb.log.LogFactory;
import io.questdb.std.*;
import io.questdb.std.str.CharSink;
import io.questdb.std.str.DirectByteCharSequence;
import io.questdb.std.str.DirectCharSink;
import io.questdb.std.str.StringSink;
......@@ -59,25 +58,6 @@ public class TextMetadataDetector implements TextLexer.Listener, Mutable, Closea
this.utf8Sink = new DirectCharSink(textConfiguration.getUtf8SinkSize());
}
public static boolean utf8Decode(long lo, long hi, CharSink sink) {
long p = lo;
while (p < hi) {
byte b = Unsafe.getUnsafe().getByte(p);
if (b < 0) {
int n = Chars.utf8DecodeMultiByte(p, hi, b, sink);
if (n == -1) {
// UTF8 error
return false;
}
p += n;
} else {
sink.put((char) b);
++p;
}
}
return true;
}
@Override
public void clear() {
tempSink.clear();
......@@ -280,7 +260,7 @@ public class TextMetadataDetector implements TextLexer.Listener, Mutable, Closea
for (int i = 0; i < hi; i++) {
DirectByteCharSequence value = values.getQuick(i);
utf8Sink.clear();
if (utf8Decode(value.getLo(), value.getHi(), utf8Sink)) {
if (Chars.utf8Decode(value.getLo(), value.getHi(), utf8Sink)) {
columnNames.setQuick(i, normalise(utf8Sink));
} else {
LOG.info().$("utf8 error [table=").$(tableName).$(", line=0, col=").$(i).$(']').$();
......
......@@ -52,6 +52,7 @@ public class TextMetadataParser implements JsonParser, Mutable, Closeable {
private static final int P_TYPE = 2;
private static final int P_PATTERN = 3;
private static final int P_LOCALE = 4;
private static final int P_UTF8 = 5;
private static final CharSequenceIntHashMap propertyNameMap = new CharSequenceIntHashMap();
private final DateLocaleFactory dateLocaleFactory;
private final TimestampLocaleFactory timestampLocaleFactory;
......@@ -72,6 +73,7 @@ public class TextMetadataParser implements JsonParser, Mutable, Closeable {
private int bufSize = 0;
private CharSequence tableName;
private int localePosition;
private boolean utf8 = false;
public TextMetadataParser(TextConfiguration textConfiguration, TypeManager typeManager) {
this.columnNames = new ObjList<>();
......@@ -84,16 +86,6 @@ public class TextMetadataParser implements JsonParser, Mutable, Closeable {
this.typeManager = typeManager;
}
private static void checkInputs(int position, CharSequence name, int type) throws JsonException {
if (name == null) {
throw JsonException.$(position, "Missing 'name' property");
}
if (type == -1) {
throw JsonException.$(position, "Missing 'type' property");
}
}
@Override
public void clear() {
bufSize = 0;
......@@ -160,6 +152,9 @@ public class TextMetadataParser implements JsonParser, Mutable, Closeable {
locale = copy(tag);
localePosition = position;
break;
case P_UTF8:
utf8 = Chars.equalsLowerCaseAscii("true", tag);
break;
default:
LOG.info().$("ignoring [table=").$(tableName).$(", value=").$(tag).$(']').$();
break;
......@@ -176,12 +171,23 @@ public class TextMetadataParser implements JsonParser, Mutable, Closeable {
}
}
private static void checkInputs(int position, CharSequence name, int type) throws JsonException {
if (name == null) {
throw JsonException.$(position, "Missing 'name' property");
}
if (type == -1) {
throw JsonException.$(position, "Missing 'type' property");
}
}
private void clearStage() {
name = null;
type = -1;
pattern = null;
locale = null;
localePosition = 0;
utf8 = false;
}
private CharSequence copy(CharSequence tag) {
......@@ -237,7 +243,7 @@ public class TextMetadataParser implements JsonParser, Mutable, Closeable {
if (pattern == null) {
throw JsonException.$(0, "DATE format pattern is required");
}
columnTypes.add(typeManager.nextTimestampAdapter().of(timestampFormatFactory.get(pattern), timestampLocale));
columnTypes.add(typeManager.nextTimestampAdapter(utf8, timestampFormatFactory.get(pattern), timestampLocale));
break;
default:
columnTypes.add(typeManager.getTypeAdapter(type));
......@@ -282,5 +288,6 @@ public class TextMetadataParser implements JsonParser, Mutable, Closeable {
propertyNameMap.put("type", P_TYPE);
propertyNameMap.put("pattern", P_PATTERN);
propertyNameMap.put("locale", P_LOCALE);
propertyNameMap.put("utf8", P_UTF8);
}
}
......@@ -25,23 +25,16 @@ package io.questdb.cutlass.text.types;
import io.questdb.cairo.ColumnType;
import io.questdb.cairo.TableWriter;
import io.questdb.cutlass.text.TextUtil;
import io.questdb.std.Mutable;
import io.questdb.std.NumericException;
import io.questdb.std.str.DirectByteCharSequence;
import io.questdb.std.str.DirectCharSink;
import io.questdb.std.time.DateFormat;
import io.questdb.std.time.DateLocale;
public class DateAdapter extends AbstractTypeAdapter implements Mutable {
private final DirectCharSink utf8Sink;
private DateLocale locale;
private DateFormat format;
public DateAdapter(DirectCharSink utf8Sink) {
this.utf8Sink = utf8Sink;
}
@Override
public void clear() {
this.format = null;
......@@ -65,9 +58,7 @@ public class DateAdapter extends AbstractTypeAdapter implements Mutable {
@Override
public void write(TableWriter.Row row, int column, DirectByteCharSequence value) throws Exception {
utf8Sink.clear();
TextUtil.utf8Decode(value.getLo(), value.getHi(), utf8Sink);
row.putDate(column, format.parse(utf8Sink, locale));
row.putDate(column, format.parse(value, locale));
}
public DateAdapter of(DateFormat format, DateLocale locale) {
......
/*******************************************************************************
* ___ _ ____ ____
* / _ \ _ _ ___ ___| |_| _ \| __ )
* | | | | | | |/ _ \/ __| __| | | | _ \
* | |_| | |_| | __/\__ \ |_| |_| | |_) |
* \__\_\\__,_|\___||___/\__|____/|____/
*
* Copyright (C) 2014-2019 Appsicle
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
package io.questdb.cutlass.text.types;
import io.questdb.cairo.ColumnType;
import io.questdb.cairo.TableWriter;
import io.questdb.cutlass.text.TextUtil;
import io.questdb.std.Mutable;
import io.questdb.std.NumericException;
import io.questdb.std.str.DirectByteCharSequence;
import io.questdb.std.str.DirectCharSink;
import io.questdb.std.time.DateFormat;
import io.questdb.std.time.DateLocale;
public class DateUtf8Adapter extends AbstractTypeAdapter implements Mutable {
private final DirectCharSink utf8Sink;
private DateLocale locale;
private DateFormat format;
public DateUtf8Adapter(DirectCharSink utf8Sink) {
this.utf8Sink = utf8Sink;
}
@Override
public void clear() {
this.format = null;
this.locale = null;
}
@Override
public int getType() {
return ColumnType.DATE;
}
@Override
public boolean probe(CharSequence text) {
try {
format.parse(text, locale);
return true;
} catch (NumericException e) {
return false;
}
}
@Override
public void write(TableWriter.Row row, int column, DirectByteCharSequence value) throws Exception {
utf8Sink.clear();
TextUtil.utf8Decode(value.getLo(), value.getHi(), utf8Sink);
row.putDate(column, format.parse(utf8Sink, locale));
}
public DateUtf8Adapter of(DateFormat format, DateLocale locale) {
this.format = format;
this.locale = locale;
return this;
}
}
......@@ -25,9 +25,11 @@ package io.questdb.cutlass.text.types;
import io.questdb.cutlass.json.JsonException;
import io.questdb.cutlass.json.JsonLexer;
import io.questdb.cutlass.json.JsonParser;
import io.questdb.log.Log;
import io.questdb.log.LogFactory;
import io.questdb.std.Chars;
import io.questdb.std.IntList;
import io.questdb.std.ObjList;
import io.questdb.std.Unsafe;
import io.questdb.std.microtime.TimestampFormat;
......@@ -50,14 +52,18 @@ public class InputFormatConfiguration {
private static final int STATE_EXPECT_TIMESTAMP_FORMAT_ARRAY = 3;
private static final int STATE_EXPECT_DATE_FORMAT_VALUE = 4;
private static final int STATE_EXPECT_DATE_LOCALE_VALUE = 5;
private static final int STATE_EXPECT_DATE_UTF8_VALUE = 10;
private static final int STATE_EXPECT_TIMESTAMP_FORMAT_VALUE = 6;
private static final int STATE_EXPECT_TIMESTAMP_LOCALE_VALUE = 7;
private static final int STATE_EXPECT_DATE_FORMAT_ENTRY = 8;
private static final int STATE_EXPECT_TIMESTAMP_FORMAT_ENTRY = 9;
private static final int STATE_EXPECT_TIMESTAMP_UTF8_VALUE = 11;
private final ObjList<DateFormat> dateFormats = new ObjList<>();
private final ObjList<DateLocale> dateLocales = new ObjList<>();
private final IntList dateUtf8Flags = new IntList();
private final ObjList<TimestampFormat> timestampFormats = new ObjList<>();
private final ObjList<TimestampLocale> timestampLocales = new ObjList<>();
private final IntList timestampUtf8Flags = new IntList();
private final DateFormatFactory dateFormatFactory;
private final DateLocaleFactory dateLocaleFactory;
private final TimestampFormatFactory timestampFormatFactory;
......@@ -65,8 +71,10 @@ public class InputFormatConfiguration {
private int jsonState = STATE_EXPECT_TOP; // expect start of object
private DateFormat jsonDateFormat;
private DateLocale jsonDateLocale;
private boolean jsonDateUtf8;
private TimestampFormat jsonTimestampFormat;
private TimestampLocale jsonTimestampLocale;
private boolean jsonTimestampUtf8;
public InputFormatConfiguration(
DateFormatFactory dateFormatFactory,
......@@ -83,13 +91,17 @@ public class InputFormatConfiguration {
public void clear() {
dateFormats.clear();
dateLocales.clear();
dateUtf8Flags.clear();
timestampFormats.clear();
timestampLocales.clear();
timestampUtf8Flags.clear();
jsonState = STATE_EXPECT_TOP;
jsonDateFormat = null;
jsonDateLocale = null;
jsonDateUtf8 = false;
jsonTimestampFormat = null;
jsonTimestampLocale = null;
jsonTimestampUtf8 = false;
}
public DateFormatFactory getDateFormatFactory() {
......@@ -108,6 +120,10 @@ public class InputFormatConfiguration {
return dateLocales;
}
public IntList getDateUtf8Flags() {
return dateUtf8Flags;
}
public TimestampFormatFactory getTimestampFormatFactory() {
return timestampFormatFactory;
}
......@@ -124,11 +140,17 @@ public class InputFormatConfiguration {
return timestampLocales;
}
public IntList getTimestampUtf8Flags() {
return timestampUtf8Flags;
}
public void parseConfiguration(JsonLexer jsonLexer, String adapterSetConfigurationFileName) throws JsonException {
this.clear();
jsonLexer.clear();
final JsonParser parser = this::onJsonEvent;
LOG.info().$("loading [from=").$(adapterSetConfigurationFileName).$(']').$();
try (InputStream stream = this.getClass().getResourceAsStream(adapterSetConfigurationFileName)) {
if (stream == null) {
......@@ -146,7 +168,7 @@ public class InputFormatConfiguration {
for (int i = 0; i < len; i++) {
Unsafe.getUnsafe().putByte(memBuffer + i, heapBuffer[i]);
}
jsonLexer.parse(memBuffer, memBuffer + len, this::onJsonEvent);
jsonLexer.parse(memBuffer, memBuffer + len, parser);
}
jsonLexer.clear();
} finally {
......@@ -192,6 +214,7 @@ public class InputFormatConfiguration {
}
dateFormats.add(jsonDateFormat);
dateLocales.add(jsonDateLocale == null ? DateLocaleFactory.INSTANCE.getDefaultDateLocale() : jsonDateLocale);
dateUtf8Flags.add(jsonDateUtf8 ? 1 : 0);
break;
case STATE_EXPECT_TIMESTAMP_FORMAT_ENTRY:
if (jsonTimestampFormat == null) {
......@@ -200,6 +223,7 @@ public class InputFormatConfiguration {
timestampFormats.add(jsonTimestampFormat);
timestampLocales.add(jsonTimestampLocale == null ? TimestampLocaleFactory.INSTANCE.getDefaultTimestampLocale() : jsonTimestampLocale);
timestampUtf8Flags.add(jsonTimestampUtf8 ? 1 : 0);
break;
default:
// the only time we get here would be when
......@@ -225,10 +249,10 @@ public class InputFormatConfiguration {
}
break;
case STATE_EXPECT_DATE_FORMAT_ENTRY:
processEntry(tag, position, STATE_EXPECT_DATE_FORMAT_VALUE, STATE_EXPECT_DATE_LOCALE_VALUE);
processEntry(tag, position, STATE_EXPECT_DATE_FORMAT_VALUE, STATE_EXPECT_DATE_LOCALE_VALUE, STATE_EXPECT_DATE_UTF8_VALUE);
break;
default:
processEntry(tag, position, STATE_EXPECT_TIMESTAMP_FORMAT_VALUE, STATE_EXPECT_TIMESTAMP_LOCALE_VALUE);
processEntry(tag, position, STATE_EXPECT_TIMESTAMP_FORMAT_VALUE, STATE_EXPECT_TIMESTAMP_LOCALE_VALUE, STATE_EXPECT_TIMESTAMP_UTF8_VALUE);
break;
}
break;
......@@ -267,6 +291,14 @@ public class InputFormatConfiguration {
}
jsonState = STATE_EXPECT_TIMESTAMP_FORMAT_ENTRY;
break;
case STATE_EXPECT_TIMESTAMP_UTF8_VALUE:
jsonTimestampUtf8 = Chars.equalsLowerCaseAscii(tag, "true");
jsonState = STATE_EXPECT_TIMESTAMP_FORMAT_ENTRY;
break;
case STATE_EXPECT_DATE_UTF8_VALUE:
jsonDateUtf8 = Chars.equalsLowerCaseAscii(tag, "true");
jsonState = STATE_EXPECT_DATE_FORMAT_ENTRY;
break;
default:
// we are picking up values from attributes we don't expect
throw JsonException.$(position, "array expected (value)");
......@@ -292,11 +324,13 @@ public class InputFormatConfiguration {
}
}
private void processEntry(CharSequence tag, int position, int stateExpectFormatValue, int stateExpectLocaleValue) throws JsonException {
private void processEntry(CharSequence tag, int position, int stateExpectFormatValue, int stateExpectLocaleValue, int stateExpectUtf8Value) throws JsonException {
if (Chars.equals(tag, "format")) {
jsonState = stateExpectFormatValue; // expect date format
} else if (Chars.equals(tag, "locale")) {
jsonState = stateExpectLocaleValue;
} else if (Chars.equals(tag, "utf8")) {
jsonState = stateExpectUtf8Value;
} else {
// unknown tag name?
throw JsonException.$(position, "unknown [tag=").put(tag).put(']');
......
......@@ -25,23 +25,16 @@ package io.questdb.cutlass.text.types;
import io.questdb.cairo.ColumnType;
import io.questdb.cairo.TableWriter;
import io.questdb.cutlass.text.TextUtil;
import io.questdb.std.Mutable;
import io.questdb.std.NumericException;
import io.questdb.std.microtime.TimestampFormat;
import io.questdb.std.microtime.TimestampLocale;
import io.questdb.std.str.DirectByteCharSequence;
import io.questdb.std.str.DirectCharSink;
public class TimestampAdapter extends AbstractTypeAdapter implements Mutable {
private final DirectCharSink utf8Sink;
private TimestampLocale locale;
private TimestampFormat format;
public TimestampAdapter(DirectCharSink utf8Sink) {
this.utf8Sink = utf8Sink;
}
@Override
public void clear() {
this.format = null;
......@@ -65,9 +58,7 @@ public class TimestampAdapter extends AbstractTypeAdapter implements Mutable {
@Override
public void write(TableWriter.Row row, int column, DirectByteCharSequence value) throws Exception {
utf8Sink.clear();
TextUtil.utf8Decode(value.getLo(), value.getHi(), utf8Sink);
row.putDate(column, format.parse(utf8Sink, locale));
row.putDate(column, format.parse(value, locale));
}
public TimestampAdapter of(TimestampFormat format, TimestampLocale locale) {
......
/*******************************************************************************
* ___ _ ____ ____
* / _ \ _ _ ___ ___| |_| _ \| __ )
* | | | | | | |/ _ \/ __| __| | | | _ \
* | |_| | |_| | __/\__ \ |_| |_| | |_) |
* \__\_\\__,_|\___||___/\__|____/|____/
*
* Copyright (C) 2014-2019 Appsicle
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
package io.questdb.cutlass.text.types;
import io.questdb.cairo.ColumnType;
import io.questdb.cairo.TableWriter;
import io.questdb.cutlass.text.TextUtil;
import io.questdb.std.Mutable;
import io.questdb.std.NumericException;
import io.questdb.std.microtime.TimestampFormat;
import io.questdb.std.microtime.TimestampLocale;
import io.questdb.std.str.DirectByteCharSequence;
import io.questdb.std.str.DirectCharSink;
public class TimestampUtf8Adapter extends AbstractTypeAdapter implements Mutable {
private final DirectCharSink utf8Sink;
private TimestampLocale locale;
private TimestampFormat format;
public TimestampUtf8Adapter(DirectCharSink utf8Sink) {
this.utf8Sink = utf8Sink;
}
@Override
public void clear() {
this.format = null;
this.locale = null;
}
@Override
public int getType() {
return ColumnType.TIMESTAMP;
}
@Override
public boolean probe(CharSequence text) {
try {
format.parse(text, locale);
return true;
} catch (NumericException e) {
return false;
}
}
@Override
public void write(TableWriter.Row row, int column, DirectByteCharSequence value) throws Exception {
utf8Sink.clear();
TextUtil.utf8Decode(value.getLo(), value.getHi(), utf8Sink);
row.putDate(column, format.parse(utf8Sink, locale));
}
public TimestampUtf8Adapter of(TimestampFormat format, TimestampLocale locale) {
this.format = format;
this.locale = locale;
return this;
}
}
......@@ -26,6 +26,7 @@ package io.questdb.cutlass.text.types;
import io.questdb.cairo.CairoException;
import io.questdb.cairo.ColumnType;
import io.questdb.cutlass.text.TextConfiguration;
import io.questdb.std.IntList;
import io.questdb.std.Mutable;
import io.questdb.std.ObjList;
import io.questdb.std.ObjectPool;
......@@ -39,7 +40,8 @@ public class TypeManager implements Mutable {
private final ObjList<TypeAdapter> probes = new ObjList<>();
private final int probeCount;
private final StringAdapter stringAdapter;
private final ObjectPool<DateAdapter> dateAdapterPool;
private final ObjectPool<DateUtf8Adapter> dateAdapterPool;
private final ObjectPool<TimestampUtf8Adapter> timestampUtf8AdapterPool;
private final ObjectPool<TimestampAdapter> timestampAdapterPool;
private final SymbolAdapter symbolAdapter;
private final InputFormatConfiguration inputFormatConfiguration;
......@@ -48,8 +50,9 @@ public class TypeManager implements Mutable {
TextConfiguration configuration,
DirectCharSink utf8Sink
) {
this.dateAdapterPool = new ObjectPool<>(() -> new DateAdapter(utf8Sink), configuration.getDateAdapterPoolCapacity());
this.timestampAdapterPool = new ObjectPool<>(() -> new TimestampAdapter(utf8Sink), configuration.getTimestampAdapterPoolCapacity());
this.dateAdapterPool = new ObjectPool<>(() -> new DateUtf8Adapter(utf8Sink), configuration.getDateAdapterPoolCapacity());
this.timestampUtf8AdapterPool = new ObjectPool<>(() -> new TimestampUtf8Adapter(utf8Sink), configuration.getTimestampAdapterPoolCapacity());
this.timestampAdapterPool = new ObjectPool<>(TimestampAdapter::new, configuration.getTimestampAdapterPoolCapacity());
this.inputFormatConfiguration = configuration.getInputFormatConfiguration();
this.stringAdapter = new StringAdapter(utf8Sink);
this.symbolAdapter = new SymbolAdapter(utf8Sink);
......@@ -57,13 +60,23 @@ public class TypeManager implements Mutable {
final ObjList<DateFormat> dateFormats = inputFormatConfiguration.getDateFormats();
final ObjList<DateLocale> dateLocales = inputFormatConfiguration.getDateLocales();
final IntList dateUtf8Flags = inputFormatConfiguration.getDateUtf8Flags();
for (int i = 0, n = dateFormats.size(); i < n; i++) {
probes.add(new DateAdapter(utf8Sink).of(dateFormats.getQuick(i), dateLocales.getQuick(i)));
if (dateUtf8Flags.getQuick(i) == 1) {
probes.add(new DateUtf8Adapter(utf8Sink).of(dateFormats.getQuick(i), dateLocales.getQuick(i)));
} else {
probes.add(new DateAdapter().of(dateFormats.getQuick(i), dateLocales.getQuick(i)));
}
}
final ObjList<TimestampFormat> timestampFormats = inputFormatConfiguration.getTimestampFormats();
final ObjList<TimestampLocale> timestampLocales = inputFormatConfiguration.getTimestampLocales();
final IntList timestampUtf8Flags = inputFormatConfiguration.getTimestampUtf8Flags();
for (int i = 0, n = timestampFormats.size(); i < n; i++) {
probes.add(new TimestampAdapter(utf8Sink).of(timestampFormats.getQuick(i), timestampLocales.getQuick(i)));
if (timestampUtf8Flags.getQuick(i) == 1) {
probes.add(new TimestampUtf8Adapter(utf8Sink).of(timestampFormats.getQuick(i), timestampLocales.getQuick(i)));
} else {
probes.add(new TimestampAdapter().of(timestampFormats.getQuick(i), timestampLocales.getQuick(i)));
}
}
this.probeCount = probes.size();
}
......@@ -71,6 +84,7 @@ public class TypeManager implements Mutable {
@Override
public void clear() {
dateAdapterPool.clear();
timestampUtf8AdapterPool.clear();
timestampAdapterPool.clear();
}
......@@ -115,12 +129,20 @@ public class TypeManager implements Mutable {
}
}
public DateAdapter nextDateAdapter() {
public DateUtf8Adapter nextDateAdapter() {
return dateAdapterPool.next();
}
public TimestampAdapter nextTimestampAdapter() {
return timestampAdapterPool.next();
public TypeAdapter nextTimestampAdapter(boolean decodeUtf8, TimestampFormat format, TimestampLocale locale) {
if (decodeUtf8) {
TimestampUtf8Adapter adapter = timestampUtf8AdapterPool.next();
adapter.of(format, locale);
return adapter;
}
TimestampAdapter adapter = timestampAdapterPool.next();
adapter.of(format, locale);
return adapter;
}
private void addDefaultProbes() {
......
......@@ -530,7 +530,6 @@ public final class Chars {
public static boolean utf8Decode(long lo, long hi, CharSink sink) {
long p = lo;
while (p < hi) {
byte b = Unsafe.getUnsafe().getByte(p);
if (b < 0) {
......
......@@ -8,7 +8,8 @@
},
{
"format": "yyyy-MM-ddTHH:mm:ss.SSSz",
"locale": "en-US"
"locale": "en-US",
"utf8": false
},
{
"format": "MM/dd/y"
......@@ -16,7 +17,8 @@
],
"timestamp": [
{
"format": "yyyy-MM-ddTHH:mm:ss.SSSUUUz"
"format": "yyyy-MM-ddTHH:mm:ss.SSSUUUz",
"utf8": false
}
]
}
\ No newline at end of file
......@@ -1735,7 +1735,8 @@ public class TextLoaderTest extends AbstractGriffinTest {
" \"name\": \"date\",\n" +
" \"type\": \"TIMESTAMP\",\n" +
" \"pattern\": \"d MMMM y г.\",\n" +
" \"locale\": \"ru-RU\"\n" +
" \"locale\": \"ru-RU\",\n" +
" \"utf8\": \"true\"\n" +
" }\n" +
"]"));
......@@ -1770,7 +1771,8 @@ public class TextLoaderTest extends AbstractGriffinTest {
" {\n" +
" \"name\": \"date\",\n" +
" \"type\": \"TIMESTAMP\",\n" +
" \"pattern\": \"d MMMM y г.\"\n" +
" \"pattern\": \"d MMMM y г.\",\n" +
" \"utf8\": true\n" +
" }\n" +
"]"));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册