提交 3e953484 编写于 作者: J jack870131

[IOTDB-73]Add new encoding method for regular timestamp column

上级 7e04cd6e
......@@ -45,6 +45,12 @@ PLAIN编码,默认的编码方式,即不编码,支持多种数据类型,
GORILLA编码,比较适合编码前后值比较接近的浮点数序列,不适合编码前后波动较大的数据。
* 规则数据编码 (REGULAR)
规则数据编码, 比较适合于编码规则序列递增的数据(例如,每个数据点之间经过相同时间的时间序列),在这种情况下,它的性能将优于二阶插分 (TS_2DIFF) 编码。
规则数据编码不适用于有波动(不规则数据)的数据,建议使用二阶差分编码 (TS_2DIFF) 进行处理。
* 数据类型与编码的对应关系
前文介绍的四种编码适用于不同的数据类型,若对应关系错误,则无法正确创建时间序列。数据类型与支持其编码的编码方式对应关系总结如表格2-3。
......@@ -54,8 +60,8 @@ GORILLA编码,比较适合编码前后值比较接近的浮点数序列,不
|数据类型 |支持的编码|
|:---:|:---:|
|BOOLEAN| PLAIN, RLE|
|INT32 |PLAIN, RLE, TS_2DIFF|
|INT64 |PLAIN, RLE, TS_2DIFF|
|INT32 |PLAIN, RLE, TS_2DIFF, REGULAR|
|INT64 |PLAIN, RLE, TS_2DIFF, REGULAR|
|FLOAT |PLAIN, RLE, TS_2DIFF, GORILLA|
|DOUBLE |PLAIN, RLE, TS_2DIFF, GORILLA|
|TEXT |PLAIN|
......
......@@ -39,7 +39,7 @@ Note: PrefixPath can not include `*`
CREATE TIMESERIES <Timeseries> WITH <AttributeClauses>
AttributeClauses : DATATYPE=<DataTypeValue> COMMA ENCODING=<EncodingValue> [COMMA <ExtraAttributeClause>]*
DataTypeValue: BOOLEAN | DOUBLE | FLOAT | INT32 | INT64 | TEXT
EncodingValue: GORILLA | PLAIN | RLE | TS_2DIFF
EncodingValue: GORILLA | PLAIN | RLE | TS_2DIFF | REGULAR
ExtraAttributeClause: {
COMPRESSOR = <CompressorValue>
MAX_POINT_NUMBER = Integer
......
......@@ -7,9 +7,9 @@
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
......@@ -44,6 +44,12 @@ Run-length encoding can also be used to encode floating-point numbers, but it is
GORILLA encoding is more suitable for floating-point sequence with similar values and is not recommended for sequence data with large fluctuations.
* REGULAR
Regular data encoding is more suitable for encoding regular sequence increasing data (e.g. the timeseries with the same time elapsed between each data point), which its performance would be better than TS_2DIFF in this situation.
Regular data encoding method is not suitable for the data with fluctuations (irregular data), which is recommended to use the TS_2DIFF encoder to deal with it.
* Correspondence between data type and encoding
The four encodings described in the previous sections are applicable to different data types. If the correspondence is wrong, the time series cannot be created correctly. The correspondence between the data type and its supported encodings is summarized in Table 2-3.
......@@ -53,8 +59,8 @@ The four encodings described in the previous sections are applicable to differen
|Data Type |Supported Encoding|
|:---:|:---:|
|BOOLEAN| PLAIN, RLE|
|INT32 |PLAIN, RLE, TS_2DIFF|
|INT64 |PLAIN, RLE, TS_2DIFF|
|INT32 |PLAIN, RLE, TS_2DIFF, REGULAR|
|INT64 |PLAIN, RLE, TS_2DIFF, REGULAR|
|FLOAT |PLAIN, RLE, TS_2DIFF, GORILLA|
|DOUBLE |PLAIN, RLE, TS_2DIFF, GORILLA|
|TEXT |PLAIN|
......
......@@ -7,9 +7,9 @@
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
......@@ -48,7 +48,7 @@ Note: PrefixPath can not include `*`
CREATE TIMESERIES <Timeseries> WITH <AttributeClauses>
AttributeClauses : DATATYPE=<DataTypeValue> COMMA ENCODING=<EncodingValue> [COMMA <ExtraAttributeClause>]*
DataTypeValue: BOOLEAN | DOUBLE | FLOAT | INT32 | INT64 | TEXT
EncodingValue: GORILLA | PLAIN | RLE | TS_2DIFF
EncodingValue: GORILLA | PLAIN | RLE | TS_2DIFF | REGULAR
ExtraAttributeClause: {
COMPRESSOR = <CompressorValue>
MAX_POINT_NUMBER = Integer
......
......@@ -92,6 +92,16 @@ public abstract class Decoder {
throw new TsFileDecodingException(
"Decoder not found:" + type + " , DataType is :" + dataType);
}
} else if (type == TSEncoding.REGULAR) {
switch (dataType) {
case INT32:
return new RegularDataDecoder.IntRegularDecoder();
case INT64:
return new RegularDataDecoder.LongRegularDecoder();
default:
throw new TsFileDecodingException(
"Decoder not found:" + type + " , DataType is :" + dataType);
}
} else {
throw new TsFileDecodingException(
"Decoder not found:" + type + " , DataType is :" + dataType);
......
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iotdb.tsfile.encoding.decoder;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.BitSet;
import org.apache.iotdb.tsfile.encoding.encoder.RegularDataEncoder;
import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
import org.apache.iotdb.tsfile.utils.ReadWriteIOUtils;
/**
* This class is a decoder for decoding the byte array that encoded by {@code
* RegularDataEncoder}. RegularDataDecoder only supports integer and long values.<br>.
*
* @author tsunghantsai
* @see RegularDataEncoder
*/
public abstract class RegularDataDecoder extends Decoder {
/**
* the first value in one pack.
*/
protected int readIntTotalCount = 0;
protected int nextReadIndex = 0;
/**
* data number in this pack.
*/
protected int packNum;
public RegularDataDecoder() {
super(TSEncoding.REGULAR);
}
protected abstract void readHeader(ByteBuffer buffer) throws IOException;
protected abstract void allocateDataArray();
protected abstract void readValue(int i);
@Override
public boolean hasNext(ByteBuffer buffer) throws IOException {
return (nextReadIndex < readIntTotalCount) || buffer.remaining() > 0;
}
public static class IntRegularDecoder extends RegularDataDecoder {
private int[] data;
private int firstValue;
private int previous;
private boolean isMissingPoint;
private BitSet bitmap;
private int bitmapIndex;
/**
* minimum value for all difference.
*/
private int minDeltaBase;
public IntRegularDecoder() {
super();
}
/**
* if there's no decoded data left, decode next pack into {@code data}.
*
* @param buffer ByteBuffer
* @return long value
*/
protected int readT(ByteBuffer buffer) {
if (nextReadIndex == readIntTotalCount) {
isMissingPoint = ReadWriteIOUtils.readBool(buffer);
if (isMissingPoint) {
readBitmap(buffer);
}
return loadIntBatch(buffer); // load first value
}
if (isMissingPoint) {
bitmapIndex++;
return loadWithBitmap(buffer);
}
return data[nextReadIndex++];
}
private void readBitmap(ByteBuffer buffer) {
int length = ReadWriteIOUtils.readInt(buffer);
byte[] byteArr = new byte[length];
buffer.get(byteArr);
bitmap = BitSet.valueOf(byteArr);
bitmapIndex = 0;
}
/**
* load the data with bitmap (when bitmap denote the element with false, load next element)
*
* @param buffer
* @return long value
*/
protected int loadWithBitmap(ByteBuffer buffer) {
while (!bitmap.get(bitmapIndex)) {
bitmapIndex++;
}
nextReadIndex = bitmapIndex - 1;
return data[nextReadIndex];
}
/**
* if remaining data has been run out, load next pack from InputStream.
*
* @param buffer ByteBuffer
* @return int value
*/
protected int loadIntBatch(ByteBuffer buffer) {
packNum = ReadWriteIOUtils.readInt(buffer);
readHeader(buffer);
allocateDataArray();
readIntTotalCount = isMissingPoint ? (packNum - 2) : (packNum - 1);
previous = firstValue;
nextReadIndex = 0;
readPack();
return firstValue;
}
private void readPack() {
for (int i = 0; i < data.length; i++) {
readValue(i);
previous = data[i];
}
}
@Override
public int readInt(ByteBuffer buffer) {
return readT(buffer);
}
@Override
protected void readHeader(ByteBuffer buffer) {
minDeltaBase = ReadWriteIOUtils.readInt(buffer);
firstValue = ReadWriteIOUtils.readInt(buffer);
}
@Override
protected void allocateDataArray() {
data = new int[packNum - 1];
}
@Override
protected void readValue(int i) {
data[i] = previous + minDeltaBase;
}
@Override
public void reset() {
// do nothing
}
}
public static class LongRegularDecoder extends RegularDataDecoder {
private long[] data;
private long firstValue;
private long previous;
private boolean isMissingPoint;
private BitSet bitmap;
private int bitmapIndex;
/**
* minimum value for all difference.
*/
private long minDeltaBase;
public LongRegularDecoder() {
super();
}
/**
* if there's no decoded data left, decode next pack into {@code data}.
*
* @param buffer ByteBuffer
* @return long value
*/
protected long readT(ByteBuffer buffer) {
if (nextReadIndex == readIntTotalCount) {
isMissingPoint = ReadWriteIOUtils.readBool(buffer);
if (isMissingPoint) {
readBitmap(buffer);
}
return loadIntBatch(buffer); // load first value
}
if (isMissingPoint) {
bitmapIndex++;
return loadWithBitmap(buffer);
}
return data[nextReadIndex++];
}
private void readBitmap(ByteBuffer buffer) {
int length = ReadWriteIOUtils.readInt(buffer);
byte[] byteArr = new byte[length];
buffer.get(byteArr);
bitmap = BitSet.valueOf(byteArr);
bitmapIndex = 0;
}
/**
* load the data with bitmap (when bitmap denote the element with false, load next element)
*
* @param buffer
* @return long value
*/
protected long loadWithBitmap(ByteBuffer buffer) {
while (!bitmap.get(bitmapIndex)) {
bitmapIndex++;
}
nextReadIndex = bitmapIndex - 1;
return data[nextReadIndex];
}
/**
* if remaining data has been run out, load next pack from InputStream.
*
* @param buffer ByteBuffer
* @return long value
*/
protected long loadIntBatch(ByteBuffer buffer) {
packNum = ReadWriteIOUtils.readInt(buffer);
readHeader(buffer);
allocateDataArray();
readIntTotalCount = isMissingPoint ? (packNum - 2) : (packNum - 1);
previous = firstValue;
nextReadIndex = 0;
readPack();
return firstValue;
}
private void readPack() {
for (int i = 0; i < data.length; i++) {
readValue(i);
previous = data[i];
}
}
@Override
public long readLong(ByteBuffer buffer) {
return readT(buffer);
}
@Override
protected void readHeader(ByteBuffer buffer) {
minDeltaBase = ReadWriteIOUtils.readLong(buffer);
firstValue = ReadWriteIOUtils.readLong(buffer);
}
@Override
protected void allocateDataArray() {
data = new long[packNum - 1];
}
@Override
protected void readValue(int i) {
data[i] = previous + minDeltaBase;
}
@Override
public void reset() {
// do nothing
}
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iotdb.tsfile.encoding.encoder;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.BitSet;
import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
import org.apache.iotdb.tsfile.utils.BytesUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p> RegularDataEncoder is an encoder for compressing data in type of integer and long. We adapt a
* hypothesis that the difference between each data point is the same, which it means the data is
* regular. </p> <p>To encode the regular data, we first create an array as a block to store the data
* loaded into the encoder. While it reach the default block size, start calculating the delta between
* each data point in this block in order to checkout whether there are missing points exist in the data.
* If there is, create a bitmap for this block to denote the position of missing points. Next, store
* the data info (the data size, the minimum delta value and the first data point of this block) and the
* bitmap with its info into the result byte array output stream.</p>
*
* @author tsunghantsai
*/
public abstract class RegularDataEncoder extends Encoder {
protected static final int BLOCK_DEFAULT_SIZE = 128;
private static final Logger LOGGER = LoggerFactory.getLogger(RegularDataEncoder.class);
protected ByteArrayOutputStream out;
protected int blockSize;
protected int writeIndex = -1;
/**
* constructor of RegularDataEncoder.
*
* @param size - the number how many numbers to be packed into a block.
*/
public RegularDataEncoder(int size) {
super(TSEncoding.REGULAR);
blockSize = size;
}
protected abstract void writeHeader() throws IOException;
protected abstract void reset();
protected abstract void flushBlockBuffer(ByteArrayOutputStream out) throws IOException;
protected void writeHeaderToBytes() throws IOException {
out.write(BytesUtils.intToBytes(writeIndex));
writeHeader();
}
/**
* calling this method to flush all values which haven't encoded to result byte array.
*/
@Override
public void flush(ByteArrayOutputStream out) {
try {
flushBlockBuffer(out);
} catch (IOException e) {
LOGGER.error("flush data to stream failed!", e);
}
}
public static class IntRegularEncoder extends RegularDataEncoder {
private int[] data;
private int[] missingPointData;
private int[] regularData;
private int firstValue;
private int previousValue;
private int minDeltaBase;
private boolean isMissingPoint;
private boolean isLastPack;
private BitSet bitmap;
public IntRegularEncoder() {
this(BLOCK_DEFAULT_SIZE);
}
/**
* constructor of RegularDataEncoder.
*
* @param size - the number how many numbers to be packed into a block.
*/
public IntRegularEncoder(int size) {
super(size);
reset();
}
@Override
protected void flushBlockBuffer(ByteArrayOutputStream out) throws IOException {
if (writeIndex == -1) {
return;
}
this.out = out;
// write last pack
if (writeIndex < blockSize) {
isLastPack = true;
checkMissingPoint(out);
}
// write identifier
out.write(BytesUtils.boolToBytes(isMissingPoint));
// write bitmap if missing points exist
if (isMissingPoint) {
writeBitmap(out);
}
// write header
writeHeaderToBytes();
reset();
writeIndex = -1;
}
@Override
protected void reset() {
blockSize = BLOCK_DEFAULT_SIZE;
minDeltaBase = Integer.MAX_VALUE;
isMissingPoint = false;
isLastPack = false;
firstValue = 0;
previousValue = 0;
}
@Override
protected void writeHeader() throws IOException {
out.write(BytesUtils.intToBytes(minDeltaBase));
out.write(BytesUtils.intToBytes(firstValue));
}
@Override
public void encode(int value, ByteArrayOutputStream out) {
try {
encodeValue(value, out);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public int getOneItemMaxSize() {
return 4;
}
@Override
public long getMaxByteSize() {
// The meaning of 20 is: identifier(4)+bitmapLength(4)+index(4)+minDeltaBase(4)+firstValue(4)
return (long) 20 + (writeIndex * 2 / 8) + (writeIndex * 4);
}
/**
* input a integer or long value.
*
* @param value value to encode
* @param out - the ByteArrayOutputStream which data encode into
*/
public void encodeValue(int value, ByteArrayOutputStream out) throws IOException {
if (writeIndex == -1) {
data = new int[blockSize];
writeIndex = 0;
}
data[writeIndex++] = value;
if (writeIndex == blockSize) {
checkMissingPoint(out);
}
}
private void checkMissingPoint(ByteArrayOutputStream out) throws IOException {
// get the new regular data if the missing point exists in the original data
data = getRegularData(data);
firstValue = data[0];
if (isMissingPoint) {
writeIndex = data.length;
}
if (!isLastPack) {
flush(out);
}
}
private void writeBitmap(ByteArrayOutputStream out) throws IOException {
// generate bitmap
data2Diff(missingPointData);
byte[] bsArr = bitmap.toByteArray();
out.write(BytesUtils.intToBytes(bsArr.length));
out.write(bsArr);
}
private int[] getRegularData(int[] data) {
if (writeIndex > 1) {
previousValue = data[0];
minDeltaBase = data[1] - data[0];
// calculate minimum elapsed of the data and check whether the missing point exists
for (int i = 1; i < writeIndex; i++) {
calcDelta(data[i]);
previousValue = data[i];
}
}
// generate the continious data when the missing point exists
if (isMissingPoint) {
generateRegularData(data);
return regularData;
}
return data;
}
private void generateRegularData(int[] data) {
missingPointData = data;
blockSize = (int) (((data[writeIndex - 1] - data[0]) / minDeltaBase) + 1);
regularData = new int[blockSize];
for (int i = 0; i < blockSize; i++) {
regularData[i] = (data[0] + minDeltaBase * i);
}
}
private void calcDelta(int value) {
int delta = value - previousValue; // calculate delta
if (delta != minDeltaBase) {
isMissingPoint = true;
}
if (delta < minDeltaBase) {
minDeltaBase = delta;
}
}
private void data2Diff(int[] missingPointData) {
bitmap = new BitSet(regularData.length);
bitmap.flip(0, regularData.length);
int offset = 0;
for (int i = 1; i < missingPointData.length; i++) {
long delta = missingPointData[i] - missingPointData[i - 1];
if (delta != minDeltaBase) {
int missingPointNum = (int) (delta / minDeltaBase) - 1;
for (int j = 0; j < missingPointNum; j++) {
bitmap.set(i + (offset++), false);
}
}
}
}
}
public static class LongRegularEncoder extends RegularDataEncoder {
private long[] data;
private long[] missingPointData;
private long[] regularData;
private long firstValue;
private long previousValue;
private long minDeltaBase;
private boolean isMissingPoint;
private boolean isLastPack;
private BitSet bitmap;
public LongRegularEncoder() {
this(BLOCK_DEFAULT_SIZE);
}
/**
* constructor of LongRegularEncoder which is a sub-class of RegularDataEncoder.
*
* @param size - the number how many numbers to be packed into a block.
*/
public LongRegularEncoder(int size) {
super(size);
reset();
}
@Override
protected void flushBlockBuffer(ByteArrayOutputStream out) throws IOException {
if (writeIndex == -1) {
return;
}
this.out = out;
// write last pack
if (writeIndex < blockSize) {
isLastPack = true;
checkMissingPoint(out);
}
// write identifier
out.write(BytesUtils.boolToBytes(isMissingPoint));
// write bitmap if missing points exist
if (isMissingPoint) {
writeBitmap(out);
}
// write header
writeHeaderToBytes();
reset();
writeIndex = -1;
}
@Override
protected void reset() {
blockSize = BLOCK_DEFAULT_SIZE;
minDeltaBase = Long.MAX_VALUE;
isMissingPoint = false;
isLastPack = false;
firstValue = 0L;
previousValue = 0L;
}
@Override
protected void writeHeader() throws IOException {
out.write(BytesUtils.longToBytes(minDeltaBase));
out.write(BytesUtils.longToBytes(firstValue));
}
@Override
public void encode(long value, ByteArrayOutputStream out) {
try {
encodeValue(value, out);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public int getOneItemMaxSize() {
return 8;
}
@Override
public long getMaxByteSize() {
// The meaning of 20 is: identifier(4)+bitmapLength(4)+index(4)+minDeltaBase(8)+firstValue(8)
return (long) 28 + (writeIndex * 2 / 8) + (writeIndex * 8);
}
/**
* input a integer or long value.
*
* @param value value to encode
* @param out - the ByteArrayOutputStream which data encode into
*/
public void encodeValue(long value, ByteArrayOutputStream out) throws IOException {
if (writeIndex == -1) {
data = new long[blockSize];
writeIndex = 0;
}
data[writeIndex++] = value;
if (writeIndex == blockSize) {
checkMissingPoint(out);
}
}
private void checkMissingPoint(ByteArrayOutputStream out) throws IOException {
// get the new regular data if the missing point exists in the original data
data = getRegularData(data);
firstValue = data[0];
if (isMissingPoint) {
writeIndex = data.length;
}
if (!isLastPack) {
flush(out);
}
}
private void writeBitmap(ByteArrayOutputStream out) throws IOException {
// generate bitmap
data2Diff(missingPointData);
byte[] bsArr = bitmap.toByteArray();
out.write(BytesUtils.intToBytes(bsArr.length));
out.write(bsArr);
}
private long[] getRegularData(long[] data) {
if (writeIndex > 1) {
previousValue = data[0];
minDeltaBase = data[1] - data[0];
// calculate minimum elapsed of the data and check whether the missing point exists
for (int i = 1; i < writeIndex; i++) {
calcDelta(data[i]);
previousValue = data[i];
}
}
// generate the continious data when the missing point exists
if (isMissingPoint) {
generateRegularData(data);
return regularData;
}
return data;
}
private void generateRegularData(long[] data) {
missingPointData = data;
blockSize = (int) (((data[writeIndex - 1] - data[0]) / minDeltaBase) + 1);
regularData = new long[blockSize];
for (int i = 0; i < blockSize; i++) {
regularData[i] = (data[0] + minDeltaBase * i);
}
}
private void calcDelta(long value) {
long delta = value - previousValue; // calculate delta
if (delta != minDeltaBase) {
isMissingPoint = true;
}
if (delta < minDeltaBase) {
minDeltaBase = delta;
}
}
private void data2Diff(long[] missingPointData) {
bitmap = new BitSet(regularData.length);
bitmap.flip(0, regularData.length);
int offset = 0;
for (int i = 1; i < missingPointData.length; i++) {
long delta = missingPointData[i] - missingPointData[i - 1];
if (delta != minDeltaBase) {
int missingPointNum = (int) (delta / minDeltaBase) - 1;
for (int j = 0; j < missingPointNum; j++) {
bitmap.set(i + (offset++), false);
}
}
}
}
}
}
......@@ -62,6 +62,8 @@ public abstract class TSEncodingBuilder {
return new TS_2DIFF();
case GORILLA:
return new GORILLA();
case REGULAR:
return new REGULAR();
default:
throw new UnsupportedOperationException(type.toString());
}
......@@ -236,8 +238,31 @@ public abstract class TSEncodingBuilder {
@Override
public void initFromProps(Map<String, String> props) {
//allowed do nothing
// allowed do nothing
}
}
/**
* for INT32, INT64
*/
public static class REGULAR extends TSEncodingBuilder {
@Override
public Encoder getEncoder(TSDataType type) {
switch (type) {
case INT32:
return new RegularDataEncoder.IntRegularEncoder();
case INT64:
return new RegularDataEncoder.LongRegularEncoder();
default:
throw new UnSupportedDataTypeException("REGULAR doesn't support data type: " + type);
}
}
@Override
public void initFromProps(Map<String, String> props) {
// allowed do nothing
}
}
}
......@@ -20,7 +20,7 @@ package org.apache.iotdb.tsfile.file.metadata.enums;
public enum TSEncoding {
PLAIN, PLAIN_DICTIONARY, RLE, DIFF, TS_2DIFF, BITMAP, GORILLA;
PLAIN, PLAIN_DICTIONARY, RLE, DIFF, TS_2DIFF, BITMAP, GORILLA, REGULAR;
/**
* judge the encoding deserialize type.
......@@ -44,6 +44,8 @@ public enum TSEncoding {
return BITMAP;
case 6:
return GORILLA;
case 7:
return REGULAR;
default:
return PLAIN;
}
......@@ -74,6 +76,8 @@ public enum TSEncoding {
return 5;
case GORILLA:
return 6;
case REGULAR:
return 7;
default:
return 0;
}
......
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iotdb.tsfile.write.record;
import org.apache.iotdb.tsfile.write.record.datapoint.DataPoint;
public class TSRecordBatch {
public long[] times;
public String deviceId;
public int blockSize;
public TSRecordBatch(int size) {
blockSize = size;
}
}
......@@ -102,25 +102,13 @@ public class DeltaBinaryEncoderLongTest {
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
int kong = 0;
for(int i = 0; i < dates.size(); i++) {
if(i % 500 == 0) {
kong ++;
}
}
ROW_NUM = dates.size() - kong;
ROW_NUM = dates.size();
long[] data = new long[ROW_NUM];
int j = 0;
for(int i = 0; i < dates.size(); i++) {
if(i % 500 == 0) {
continue;
}
try {
Date date = dateFormat.parse(dates.get(i));
data[j++] = date.getTime();
data[i] =date.getTime();
} catch (ParseException e) {
e.printStackTrace();
}
......@@ -136,13 +124,25 @@ public class DeltaBinaryEncoderLongTest {
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
ROW_NUM = dates.size();
int kong = 0;
for(int i = 0; i < dates.size(); i++) {
if(i % 500 == 0) {
kong ++;
}
}
ROW_NUM = dates.size() - kong;
long[] data = new long[ROW_NUM];
int j = 0;
for(int i = 0; i < dates.size(); i++) {
if(i % 500 == 0) {
continue;
}
try {
Date date = dateFormat.parse(dates.get(i));
data[i] =date.getTime();
data[j++] = date.getTime();
} catch (ParseException e) {
e.printStackTrace();
}
......
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iotdb.tsfile.encoding.decoder.regular;
import static org.junit.Assert.assertEquals;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.iotdb.tsfile.encoding.decoder.RegularDataDecoder;
import org.apache.iotdb.tsfile.encoding.encoder.RegularDataEncoder;
import org.junit.Before;
import org.junit.Test;
public class RegularDataEncoderIntegerTest {
private static int ROW_NUM;
ByteArrayOutputStream out;
private RegularDataEncoder regularDataEncoder;
private RegularDataDecoder regularDataDecoder;
private ByteBuffer buffer;
@Before
public void test() {
regularDataEncoder = new RegularDataEncoder.IntRegularEncoder();
regularDataDecoder = new RegularDataDecoder.IntRegularDecoder();
}
@Test
public void testRegularEncodingWithoutMissingPoint() throws IOException {
ROW_NUM = 2000000;
int[] data = new int[ROW_NUM];
for (int i = 0; i < ROW_NUM; i++) {
data[i] = i;
}
shouldReadAndWrite(data, ROW_NUM);
}
@Test
public void testRegularWithOnePercentMissingPoints() throws IOException {
int[] data = getMissingPointData(2000000, 80);
shouldReadAndWrite(data, ROW_NUM);
}
@Test
public void testRegularWithFivePercentMissingPoints() throws IOException {
int[] data = getMissingPointData(2000000, 20);
shouldReadAndWrite(data, ROW_NUM);
}
@Test
public void testRegularWithTenPercentMissingPoints() throws IOException {
int[] data = getMissingPointData(2000000, 10);
shouldReadAndWrite(data, ROW_NUM);
}
@Test
public void testRegularWithTwentyPercentMissingPoints() throws IOException {
int[] data = getMissingPointData(2000000, 5);
shouldReadAndWrite(data, ROW_NUM);
}
@Test
public void testRegularWithLowMissingPoints1() throws IOException {
int[] data = getMissingPointData(2000000, 1700);
shouldReadAndWrite(data, ROW_NUM);
}
@Test
public void testRegularWithLowMissingPoints2() throws IOException {
int[] data = getMissingPointData(2000000, 40000);
shouldReadAndWrite(data, ROW_NUM);
}
private int[] getMissingPointData(int dataSize, int missingPointInterval) {
ROW_NUM = dataSize;
int originalRowNum = ROW_NUM;
int kong = 0;
for (int i = 0; i < ROW_NUM; i++) {
if (i % missingPointInterval == 0) {
kong++;
}
}
ROW_NUM = originalRowNum - kong;
int[] data = new int[ROW_NUM];
int j = 0;
for (int i = 0; i < originalRowNum; i++) {
if (i % missingPointInterval == 0) {
continue;
}
data[j++] = i;
}
return data;
}
private void writeData(int[] data, int length) throws IOException {
for (int i = 0; i < length; i++) {
regularDataEncoder.encode(data[i], out);
}
regularDataEncoder.flush(out);
}
private void shouldReadAndWrite(int[] data, int length) throws IOException {
System.out.println("source data size:" + 8 * length + " byte");
out = new ByteArrayOutputStream();
writeData(data, length);
byte[] page = out.toByteArray();
System.out.println("encoding data size:" + page.length + " byte");
buffer = ByteBuffer.wrap(page);
int i = 0;
while (regularDataDecoder.hasNext(buffer)) {
assertEquals(data[i++], regularDataDecoder.readInt(buffer));
}
}
}
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iotdb.tsfile.encoding.decoder.regular;
import static org.junit.Assert.assertEquals;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.TimeZone;
import java.util.stream.Stream;
import org.apache.iotdb.tsfile.encoding.decoder.RegularDataDecoder;
import org.apache.iotdb.tsfile.encoding.encoder.RegularDataEncoder;
import org.junit.Before;
import org.junit.Test;
public class RegularDataEncoderLongTest {
private static int ROW_NUM;
ByteArrayOutputStream out;
private RegularDataEncoder regularDataEncoder;
private RegularDataDecoder regularDataDecoder;
private ByteBuffer buffer;
@Before
public void test() {
regularDataEncoder = new RegularDataEncoder.LongRegularEncoder();
regularDataDecoder = new RegularDataDecoder.LongRegularDecoder();
}
@Test
public void testRegularEncodingWithoutMissingPoint() throws IOException {
List<String> dates = getBetweenDateWithOneSecond("1980-01-01T01:00:00", "1980-01-28T01:00:00");
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
ROW_NUM = dates.size();
long[] data = new long[ROW_NUM];
for(int i = 0; i < dates.size(); i++) {
try {
Date date = dateFormat.parse(dates.get(i));
data[i] = date.getTime();
} catch (ParseException e) {
e.printStackTrace();
}
}
shouldReadAndWrite(data, ROW_NUM);
}
@Test
public void testRegularWithOnePercentMissingPoints1() throws IOException {
long[] data = getMissingPointData(
getBetweenDateWithOneSecond("1980-01-01T01:00:00", "1980-01-28T01:00:00"), 80);
shouldReadAndWrite(data, ROW_NUM);
}
@Test
public void testRegularWithOnePercentMissingPoints2() throws IOException {
long[] data = getMissingPointData(
getBetweenDateWithTwoSecond("1980-01-01T01:00:00", "1980-01-28T01:00:00"), 80);
shouldReadAndWrite(data, ROW_NUM);
}
@Test
public void testRegularWithFivePercentMissingPoints() throws IOException {
long[] data = getMissingPointData(
getBetweenDateWithOneSecond("1980-01-01T01:00:00", "1980-01-28T01:00:00"), 20);
shouldReadAndWrite(data, ROW_NUM);
}
@Test
public void testRegularWithTenPercentMissingPoints() throws IOException {
long[] data = getMissingPointData(
getBetweenDateWithOneSecond("1980-01-01T01:00:00", "1980-01-28T01:00:00"), 10);
shouldReadAndWrite(data, ROW_NUM);
}
@Test
public void testRegularWithTwentyPercentMissingPoints() throws IOException {
long[] data = getMissingPointData(
getBetweenDateWithOneSecond("1980-01-01T01:00:00", "1980-01-28T01:00:00"), 5);
shouldReadAndWrite(data, ROW_NUM);
}
@Test
public void testRegularWithLowMissingPoints1() throws IOException {
long[] data = getMissingPointData(
getBetweenDateWithOneSecond("1980-01-01T01:00:00", "1980-01-28T01:00:00"), 1700);
shouldReadAndWrite(data, ROW_NUM);
}
@Test
public void testRegularWithLowMissingPoints2() throws IOException {
long[] data = getMissingPointData(
getBetweenDateWithOneSecond("1980-01-01T01:00:00", "1980-01-28T01:00:00"), 40000);
shouldReadAndWrite(data, ROW_NUM);
}
private long[] getMissingPointData(List<String> originalData, int missingPointInterval) {
List<String> dates = originalData;
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
int kong = 0;
for (int i = 0; i < dates.size(); i++) {
if (i % missingPointInterval == 0) {
kong++;
}
}
ROW_NUM = dates.size() - kong;
long[] data = new long[ROW_NUM];
int j = 0;
for (int i = 0; i < dates.size(); i++) {
if (i % missingPointInterval == 0) {
continue;
}
try {
Date date = dateFormat.parse(dates.get(i));
data[j++] = date.getTime();
} catch (ParseException e) {
e.printStackTrace();
}
}
return data;
}
private List<String> getBetweenDateWithOneSecond(String start, String end){
TimeZone.setDefault(TimeZone.getTimeZone("GMT+8"));
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
List<String> list = new ArrayList<>();
LocalDateTime startDate = LocalDateTime.parse(start);
LocalDateTime endDate = LocalDateTime.parse(end);
long distance = ChronoUnit.SECONDS.between(startDate, endDate);
if (distance < 1) {
return list;
}
Stream.iterate(startDate, d -> {
return d.plusSeconds(1);
}).limit(distance + 1).forEach(f -> {
list.add(f.format(formatter));
});
return list;
}
private List<String> getBetweenDateWithTwoSecond(String start, String end) {
TimeZone.setDefault(TimeZone.getTimeZone("GMT+8"));
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
List<String> list = new ArrayList<>();
LocalDateTime startDate = LocalDateTime.parse(start);
LocalDateTime endDate = LocalDateTime.parse(end);
long distance = ChronoUnit.SECONDS.between(startDate, endDate);
if (distance < 1) {
return list;
}
Stream.iterate(startDate, d -> {
return d.plusSeconds(2);
}).limit((distance / 2) + 1).forEach(f -> {
list.add(f.format(formatter));
});
return list;
}
private void writeData(long[] data, int length) throws IOException {
for (int i = 0; i < length; i++) {
regularDataEncoder.encode(data[i], out);
}
regularDataEncoder.flush(out);
}
private void shouldReadAndWrite(long[] data, int length) throws IOException {
System.out.println("source data size:" + 8 * length + " byte");
out = new ByteArrayOutputStream();
writeData(data, length);
byte[] page = out.toByteArray();
System.out.println("encoding data size:" + page.length + " byte");
buffer = ByteBuffer.wrap(page);
int i = 0;
while (regularDataDecoder.hasNext(buffer)) {
assertEquals(data[i++], regularDataDecoder.readLong(buffer));
}
}
}
......@@ -25,6 +25,8 @@ import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.iotdb.tsfile.common.conf.TSFileConfig;
import org.apache.iotdb.tsfile.exception.write.WriteProcessException;
import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
......@@ -70,25 +72,25 @@ public class TsFileReadWriteTest {
@Test
public void intTest() throws IOException, WriteProcessException {
writeData(TSDataType.INT32, (i) -> new IntDataPoint("sensor_1", (int) i));
writeData(TSDataType.INT32, (i) -> new IntDataPoint("sensor_1", (int) i), TSEncoding.RLE);
readData((i, field, delta) -> assertEquals(i, field.getIntV()));
}
@Test
public void longTest() throws IOException, WriteProcessException {
writeData(TSDataType.INT64, (i) -> new LongDataPoint("sensor_1", i));
writeData(TSDataType.INT64, (i) -> new LongDataPoint("sensor_1", i), TSEncoding.RLE);
readData((i, field, delta) -> assertEquals(i, field.getLongV()));
}
@Test
public void floatTest() throws IOException, WriteProcessException {
writeData(TSDataType.FLOAT, (i) -> new FloatDataPoint("sensor_1", (float) i));
writeData(TSDataType.FLOAT, (i) -> new FloatDataPoint("sensor_1", (float) i), TSEncoding.RLE);
readData((i, field, delta) -> assertEquals(i, field.getFloatV(), delta));
}
@Test
public void doubleTest() throws IOException, WriteProcessException {
writeData(TSDataType.DOUBLE, (i) -> new DoubleDataPoint("sensor_1", (double) i));
writeData(TSDataType.DOUBLE, (i) -> new DoubleDataPoint("sensor_1", (double) i), TSEncoding.RLE);
readData((i, field, delta) -> assertEquals(i, field.getDoubleV(), delta));
}
......@@ -120,12 +122,21 @@ public class TsFileReadWriteTest {
assertTrue(f.delete());
}
private void writeData(TSDataType dataType, DataPointProxy proxy) throws IOException, WriteProcessException {
@Test
public void readMeasurementWithRegularEncodingTest() throws IOException, WriteProcessException {
TSFileConfig.timeEncoder = "REGULAR";
writeData(TSDataType.INT64, (i) -> new LongDataPoint("sensor_1", i), TSEncoding.REGULAR);
readData((i, field, delta) -> assertEquals(i, field.getLongV()));
TSFileConfig.timeEncoder = "TS_2DIFF";
}
private void writeData(TSDataType dataType, DataPointProxy proxy, TSEncoding encodingType)
throws IOException, WriteProcessException {
int floatCount = 1024 * 1024 * 13 + 1023;
// add measurements into file schema
try (TsFileWriter tsFileWriter = new TsFileWriter(f)) {
tsFileWriter
.addMeasurement(new MeasurementSchema("sensor_1", dataType, TSEncoding.RLE));
.addMeasurement(new MeasurementSchema("sensor_1", dataType, encodingType));
for (long i = 1; i < floatCount; i++) {
// construct TSRecord
TSRecord tsRecord = new TSRecord(i, "device_1");
......@@ -163,6 +174,5 @@ public class TsFileReadWriteTest {
}
private interface ReadDataPointProxy {
void assertEqualProxy(long i, Field field, double delta);
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册