提交 f4a97aeb 编写于 作者: N nishantmonu51

fix rollup for hashed partitions

truncate timestamp while calculating the partitionNumber
上级 ae4f2193
......@@ -41,6 +41,7 @@ import com.metamx.common.logger.Logger;
import io.druid.common.utils.JodaUtils;
import io.druid.data.input.InputRow;
import io.druid.data.input.impl.StringInputRowParser;
import io.druid.granularity.QueryGranularity;
import io.druid.guice.GuiceInjectors;
import io.druid.guice.JsonConfigProvider;
import io.druid.guice.annotations.Self;
......@@ -172,6 +173,7 @@ public class HadoopDruidIndexerConfig
private volatile PathSpec pathSpec;
private volatile Map<DateTime,ShardSpecLookup> shardSpecLookups = Maps.newHashMap();
private volatile Map<ShardSpec, HadoopyShardSpec> hadoopShardSpecLookup = Maps.newHashMap();
private final QueryGranularity rollupGran;
@JsonCreator
public HadoopDruidIndexerConfig(
......@@ -203,6 +205,7 @@ public class HadoopDruidIndexerConfig
hadoopShardSpecLookup.put(hadoopyShardSpec.getActualSpec(), hadoopyShardSpec);
}
}
this.rollupGran = schema.getDataSchema().getGranularitySpec().getQueryGranularity();
}
@JsonProperty
......@@ -326,7 +329,7 @@ public class HadoopDruidIndexerConfig
return Optional.absent();
}
final ShardSpec actualSpec = shardSpecLookups.get(timeBucket.get().getStart()).getShardSpec(inputRow);
final ShardSpec actualSpec = shardSpecLookups.get(timeBucket.get().getStart()).getShardSpec(rollupGran.truncate(inputRow.getTimestampFromEpoch()), inputRow);
final HadoopyShardSpec hadoopyShardSpec = hadoopShardSpecLookup.get(actualSpec);
return Optional.of(
......
......@@ -407,14 +407,14 @@ public class IndexTask extends AbstractFixedIntervalTask
final int myRowFlushBoundary = rowFlushBoundary > 0
? rowFlushBoundary
: toolbox.getConfig().getDefaultRowFlushBoundary();
final QueryGranularity rollupGran = ingestionSchema.getDataSchema().getGranularitySpec().getQueryGranularity();
try {
plumber.startJob();
while (firehose.hasMore()) {
final InputRow inputRow = firehose.nextRow();
if (shouldIndex(shardSpec, interval, inputRow)) {
if (shouldIndex(shardSpec, interval, inputRow, rollupGran)) {
int numRows = plumber.add(inputRow);
if (numRows == -1) {
throw new ISE(
......@@ -469,13 +469,15 @@ public class IndexTask extends AbstractFixedIntervalTask
*
* @return true or false
*/
private boolean shouldIndex(
private static boolean shouldIndex(
final ShardSpec shardSpec,
final Interval interval,
final InputRow inputRow
final InputRow inputRow,
final QueryGranularity rollupGran
)
{
return interval.contains(inputRow.getTimestampFromEpoch()) && shardSpec.isInChunk(inputRow);
return interval.contains(inputRow.getTimestampFromEpoch())
&& shardSpec.isInChunk(rollupGran.truncate(inputRow.getTimestampFromEpoch()), inputRow);
}
public static class IndexIngestionSpec extends IngestionSpec<IndexIOConfig, IndexTuningConfig>
......
......@@ -41,7 +41,7 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<metamx.java-util.version>0.26.9</metamx.java-util.version>
<apache.curator.version>2.6.0</apache.curator.version>
<druid.api.version>0.2.14</druid.api.version>
<druid.api.version>0.2.14.1</druid.api.version>
</properties>
<modules>
......
......@@ -49,14 +49,14 @@ public class HashBasedNumberedShardSpec extends NumberedShardSpec
}
@Override
public boolean isInChunk(InputRow inputRow)
public boolean isInChunk(long timestamp, InputRow inputRow)
{
return (((long) hash(inputRow)) - getPartitionNum()) % getPartitions() == 0;
return (((long) hash(timestamp, inputRow)) - getPartitionNum()) % getPartitions() == 0;
}
protected int hash(InputRow inputRow)
protected int hash(long timestamp, InputRow inputRow)
{
final List<Object> groupKey = Rows.toGroupKey(inputRow.getTimestampFromEpoch(), inputRow);
final List<Object> groupKey = Rows.toGroupKey(timestamp, inputRow);
try {
return hashFunction.hashBytes(jsonMapper.writeValueAsBytes(groupKey)).asInt();
}
......@@ -80,9 +80,9 @@ public class HashBasedNumberedShardSpec extends NumberedShardSpec
return new ShardSpecLookup()
{
@Override
public ShardSpec getShardSpec(InputRow row)
public ShardSpec getShardSpec(long timestamp, InputRow row)
{
int index = Math.abs(hash(row) % getPartitions());
int index = Math.abs(hash(timestamp, row) % getPartitions());
return shardSpecs.get(index);
}
};
......
......@@ -50,7 +50,7 @@ public class LinearShardSpec implements ShardSpec
return new ShardSpecLookup()
{
@Override
public ShardSpec getShardSpec(InputRow row)
public ShardSpec getShardSpec(long timestamp, InputRow row)
{
return shardSpecs.get(0);
}
......@@ -63,7 +63,7 @@ public class LinearShardSpec implements ShardSpec
}
@Override
public boolean isInChunk(InputRow inputRow) {
public boolean isInChunk(long timestamp, InputRow inputRow) {
return true;
}
......
......@@ -60,7 +60,7 @@ public class NumberedShardSpec implements ShardSpec
return new ShardSpecLookup()
{
@Override
public ShardSpec getShardSpec(InputRow row)
public ShardSpec getShardSpec(long timestamp, InputRow row)
{
return shardSpecs.get(0);
}
......@@ -80,7 +80,7 @@ public class NumberedShardSpec implements ShardSpec
}
@Override
public boolean isInChunk(InputRow inputRow)
public boolean isInChunk(long timestamp, InputRow inputRow)
{
return true;
}
......
......@@ -100,10 +100,10 @@ public class SingleDimensionShardSpec implements ShardSpec
return new ShardSpecLookup()
{
@Override
public ShardSpec getShardSpec(InputRow row)
public ShardSpec getShardSpec(long timestamp, InputRow row)
{
for (ShardSpec spec : shardSpecs) {
if (spec.isInChunk(row)) {
if (spec.isInChunk(timestamp, row)) {
return spec;
}
}
......@@ -124,7 +124,7 @@ public class SingleDimensionShardSpec implements ShardSpec
}
@Override
public boolean isInChunk(InputRow inputRow)
public boolean isInChunk(long timestamp, InputRow inputRow)
{
final List<String> values = inputRow.getDimension(dimension);
......
......@@ -127,7 +127,7 @@ public class HashBasedNumberedShardSpecTest
public boolean assertExistsInOneSpec(List<ShardSpec> specs, InputRow row)
{
for (ShardSpec spec : specs) {
if (spec.isInChunk(row)) {
if (spec.isInChunk(row.getTimestampFromEpoch(), row)) {
return true;
}
}
......@@ -145,7 +145,7 @@ public class HashBasedNumberedShardSpecTest
}
@Override
protected int hash(InputRow inputRow)
protected int hash(long timestamp, InputRow inputRow)
{
return inputRow.hashCode();
}
......@@ -208,4 +208,5 @@ public class HashBasedNumberedShardSpecTest
return 0;
}
}
}
......@@ -111,7 +111,7 @@ public class SingleDimensionShardSpecTest
}
)
);
Assert.assertEquals(String.format("spec[%s], row[%s]", spec, inputRow), pair.lhs, spec.isInChunk(inputRow));
Assert.assertEquals(String.format("spec[%s], row[%s]", spec, inputRow), pair.lhs, spec.isInChunk(inputRow.getTimestampFromEpoch(), inputRow));
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册