Skip to content

Commit 945df33

Browse files
committed
Ensure ordinal builder emit ordinal blocks (elastic#127949)
Currently, if a field has high cardinality, we may mistakenly disable emitting ordinal blocks. For example, with 10,000 `tsid` values, we never emit ordinal blocks during reads, even though we could emit blocks for 10 `tsid` values across 1,000 positions. This bug disables optimizations for value aggregation and block hashing. This change tracks the minimum and maximum seen ordinals and uses them as an estimate for the number of ordinals. However, if a page contains `ord=1` and `ord=9999`, ordinal blocks still won't be emitted. Allocating a bitset or an array for `value_count` could track this more accurately but would require additional memory. I need to think about this trade off more before opening another PR to fix this issue completely. This is a quick, contained fix that significantly speeds up time-series aggregation (and other queries too). The execution time of this query is reduced from 3.4s to 1.9s with 11M documents. ``` POST /_query { "profile": true, "query": "TS metrics-hostmetricsreceiver.otel-default | STATS cpu = avg(avg_over_time(`metrics.system.cpu.load_average.1m`)) BY host.name, BUCKET(@timestamp, 5 minute)" } ``` ``` "took": 3475, "is_partial": false, "documents_found": 11368089, "values_loaded": 34248167 ``` ``` "took": 1965, "is_partial": false, "documents_found": 11368089, "values_loaded": 34248167 ```
1 parent d4accf3 commit 945df33

File tree

3 files changed

+58
-8
lines changed

3 files changed

+58
-8
lines changed

docs/changelog/127949.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 127949
2+
summary: Ensure ordinal builder emit ordinal blocks
3+
area: ES|QL
4+
type: bug
5+
issues: []

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java

+16-8
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
public class SingletonOrdinalsBuilder implements BlockLoader.SingletonOrdinalsBuilder, Releasable, Block.Builder {
2323
private final BlockFactory blockFactory;
2424
private final SortedDocValues docValues;
25+
private int minOrd = Integer.MAX_VALUE;
26+
private int maxOrd = Integer.MIN_VALUE;
2527
private final int[] ords;
2628
private int count;
2729

@@ -39,8 +41,10 @@ public SingletonOrdinalsBuilder appendNull() {
3941
}
4042

4143
@Override
42-
public SingletonOrdinalsBuilder appendOrd(int value) {
43-
ords[count++] = value;
44+
public SingletonOrdinalsBuilder appendOrd(int ord) {
45+
ords[count++] = ord;
46+
minOrd = Math.min(minOrd, ord);
47+
maxOrd = Math.max(maxOrd, ord);
4448
return this;
4549
}
4650

@@ -55,7 +59,7 @@ public SingletonOrdinalsBuilder endPositionEntry() {
5559
}
5660

5761
BytesRefBlock buildOrdinal() {
58-
int valueCount = docValues.getValueCount();
62+
int valueCount = maxOrd - minOrd + 1;
5963
long breakerSize = ordsSize(valueCount);
6064
blockFactory.adjustBreaker(breakerSize);
6165
BytesRefVector bytesVector = null;
@@ -65,7 +69,7 @@ BytesRefBlock buildOrdinal() {
6569
Arrays.fill(newOrds, -1);
6670
for (int ord : ords) {
6771
if (ord != -1) {
68-
newOrds[ord] = 0;
72+
newOrds[ord - minOrd] = 0;
6973
}
7074
}
7175
// resolve the ordinals and remaps the ordinals
@@ -74,7 +78,7 @@ BytesRefBlock buildOrdinal() {
7478
for (int i = 0; i < newOrds.length; i++) {
7579
if (newOrds[i] != -1) {
7680
newOrds[i] = ++nextOrd;
77-
bytesBuilder.appendBytesRef(docValues.lookupOrd(i));
81+
bytesBuilder.appendBytesRef(docValues.lookupOrd(i + minOrd));
7882
}
7983
}
8084
bytesVector = bytesBuilder.build();
@@ -86,7 +90,7 @@ BytesRefBlock buildOrdinal() {
8690
if (ord == -1) {
8791
ordinalsBuilder.appendNull();
8892
} else {
89-
ordinalsBuilder.appendInt(newOrds[ord]);
93+
ordinalsBuilder.appendInt(newOrds[ord - minOrd]);
9094
}
9195
}
9296
ordinalBlock = ordinalsBuilder.build();
@@ -107,7 +111,6 @@ BytesRefBlock buildRegularBlock() {
107111
blockFactory.adjustBreaker(breakerSize);
108112
try {
109113
int[] sortedOrds = ords.clone();
110-
Arrays.sort(sortedOrds);
111114
int uniqueCount = compactToUnique(sortedOrds);
112115

113116
try (BreakingBytesRefBuilder copies = new BreakingBytesRefBuilder(blockFactory.breaker(), "ords")) {
@@ -167,7 +170,12 @@ public BytesRefBlock build() {
167170
}
168171

169172
boolean shouldBuildOrdinalsBlock() {
170-
return ords.length >= 2 * docValues.getValueCount() && ords.length >= 32;
173+
if (minOrd <= maxOrd) {
174+
int numOrds = maxOrd - minOrd + 1;
175+
return OrdinalBytesRefBlock.isDense(ords.length, numOrds);
176+
} else {
177+
return false;
178+
}
171179
}
172180

173181
@Override

x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilderTests.java

+37
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
package org.elasticsearch.compute.data;
99

1010
import org.apache.lucene.document.SortedDocValuesField;
11+
import org.apache.lucene.index.DirectoryReader;
1112
import org.apache.lucene.index.IndexReader;
13+
import org.apache.lucene.index.IndexWriter;
14+
import org.apache.lucene.index.IndexWriterConfig;
1215
import org.apache.lucene.index.LeafReaderContext;
1316
import org.apache.lucene.index.SortedDocValues;
1417
import org.apache.lucene.store.Directory;
@@ -37,6 +40,7 @@
3740
import static org.elasticsearch.test.MapMatcher.assertMap;
3841
import static org.elasticsearch.test.MapMatcher.matchesMap;
3942
import static org.hamcrest.Matchers.equalTo;
43+
import static org.hamcrest.Matchers.hasSize;
4044

4145
public class SingletonOrdinalsBuilderTests extends ESTestCase {
4246
public void testReader() throws IOException {
@@ -154,6 +158,39 @@ public void testAllNull() throws IOException {
154158
}
155159
}
156160

161+
public void testEmitOrdinalForHighCardinality() throws IOException {
162+
BlockFactory factory = breakingDriverContext().blockFactory();
163+
int numOrds = between(50, 100);
164+
try (Directory directory = newDirectory(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig())) {
165+
for (int o = 0; o < numOrds; o++) {
166+
int docPerOrds = between(10, 15);
167+
for (int d = 0; d < docPerOrds; d++) {
168+
indexWriter.addDocument(List.of(new SortedDocValuesField("f", new BytesRef("value-" + o))));
169+
}
170+
}
171+
try (IndexReader reader = DirectoryReader.open(indexWriter)) {
172+
assertThat(reader.leaves(), hasSize(1));
173+
for (LeafReaderContext ctx : reader.leaves()) {
174+
int batchSize = between(40, 100);
175+
int ord = random().nextInt(numOrds);
176+
try (
177+
var b1 = new SingletonOrdinalsBuilder(factory, ctx.reader().getSortedDocValues("f"), batchSize);
178+
var b2 = new SingletonOrdinalsBuilder(factory, ctx.reader().getSortedDocValues("f"), batchSize)
179+
) {
180+
for (int i = 0; i < batchSize; i++) {
181+
b1.appendOrd(ord);
182+
b2.appendOrd(ord);
183+
}
184+
try (BytesRefBlock block1 = b1.build(); BytesRefBlock block2 = b2.buildRegularBlock()) {
185+
assertThat(block1, equalTo(block2));
186+
assertNotNull(block1.asOrdinals());
187+
}
188+
}
189+
}
190+
}
191+
}
192+
}
193+
157194
static BytesRefBlock buildOrdinalsBuilder(SingletonOrdinalsBuilder builder) {
158195
if (randomBoolean()) {
159196
return builder.buildRegularBlock();

0 commit comments

Comments
 (0)