Ensure ordinal builder emit ordinal blocks (elastic#127949)

dnhatn · dnhatn · commit 945df33c7f41 · 2025-05-09T16:05:10.000Z
Currently, if a field has high cardinality, we may mistakenly disable emitting ordinal blocks. For example, with 10,000 `tsid` values, we never emit ordinal blocks during reads, even though we could emit blocks for 10 `tsid` values across 1,000 positions. This bug disables optimizations for value aggregation and block hashing. This change tracks the minimum and maximum seen ordinals and uses them as an estimate for the number of ordinals. However, if a page contains `ord=1` and `ord=9999`, ordinal blocks still won't be emitted. Allocating a bitset or an array for `value_count` could track this more accurately but would require additional memory. I need to think about this trade off more before opening another PR to fix this issue completely. This is a quick, contained fix that significantly speeds up time-series aggregation (and other queries too). The execution time of this query is reduced from 3.4s to 1.9s with 11M documents. ``` POST /_query { "profile": true, "query": "TS metrics-hostmetricsreceiver.otel-default | STATS cpu = avg(avg_over_time(`metrics.system.cpu.load_average.1m`)) BY host.name, BUCKET(@timestamp, 5 minute)" } ``` ``` "took": 3475, "is_partial": false, "documents_found": 11368089, "values_loaded": 34248167 ``` ``` "took": 1965, "is_partial": false, "documents_found": 11368089, "values_loaded": 34248167 ```
diff --git a/docs/changelog/127949.yaml b/docs/changelog/127949.yaml
@@ -0,0 +1,5 @@
+pr: 127949
+summary: Ensure ordinal builder emit ordinal blocks
+area: ES|QL
+type: bug
+issues: []
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java
@@ -22,6 +22,8 @@
 public class SingletonOrdinalsBuilder implements BlockLoader.SingletonOrdinalsBuilder, Releasable, Block.Builder {
     private final BlockFactory blockFactory;
     private final SortedDocValues docValues;
+    private int minOrd = Integer.MAX_VALUE;
+    private int maxOrd = Integer.MIN_VALUE;
     private final int[] ords;
     private int count;
 
@@ -39,8 +41,10 @@ public SingletonOrdinalsBuilder appendNull() {
     }
 
     @Override
-    public SingletonOrdinalsBuilder appendOrd(int value) {
-        ords[count++] = value;
+    public SingletonOrdinalsBuilder appendOrd(int ord) {
+        ords[count++] = ord;
+        minOrd = Math.min(minOrd, ord);
+        maxOrd = Math.max(maxOrd, ord);
         return this;
     }
 
@@ -55,7 +59,7 @@ public SingletonOrdinalsBuilder endPositionEntry() {
     }
 
     BytesRefBlock buildOrdinal() {
-        int valueCount = docValues.getValueCount();
+        int valueCount = maxOrd - minOrd + 1;
         long breakerSize = ordsSize(valueCount);
         blockFactory.adjustBreaker(breakerSize);
         BytesRefVector bytesVector = null;
@@ -65,7 +69,7 @@ BytesRefBlock buildOrdinal() {
             Arrays.fill(newOrds, -1);
             for (int ord : ords) {
                 if (ord != -1) {
-                    newOrds[ord] = 0;
+                    newOrds[ord - minOrd] = 0;
                 }
             }
             // resolve the ordinals and remaps the ordinals
@@ -74,7 +78,7 @@ BytesRefBlock buildOrdinal() {
                 for (int i = 0; i < newOrds.length; i++) {
                     if (newOrds[i] != -1) {
                         newOrds[i] = ++nextOrd;
-                        bytesBuilder.appendBytesRef(docValues.lookupOrd(i));
+                        bytesBuilder.appendBytesRef(docValues.lookupOrd(i + minOrd));
                     }
                 }
                 bytesVector = bytesBuilder.build();
@@ -86,7 +90,7 @@ BytesRefBlock buildOrdinal() {
                     if (ord == -1) {
                         ordinalsBuilder.appendNull();
                     } else {
-                        ordinalsBuilder.appendInt(newOrds[ord]);
+                        ordinalsBuilder.appendInt(newOrds[ord - minOrd]);
                     }
                 }
                 ordinalBlock = ordinalsBuilder.build();
@@ -107,7 +111,6 @@ BytesRefBlock buildRegularBlock() {
             blockFactory.adjustBreaker(breakerSize);
             try {
                 int[] sortedOrds = ords.clone();
-                Arrays.sort(sortedOrds);
                 int uniqueCount = compactToUnique(sortedOrds);
 
                 try (BreakingBytesRefBuilder copies = new BreakingBytesRefBuilder(blockFactory.breaker(), "ords")) {
@@ -167,7 +170,12 @@ public BytesRefBlock build() {
     }
 
     boolean shouldBuildOrdinalsBlock() {
-        return ords.length >= 2 * docValues.getValueCount() && ords.length >= 32;
+        if (minOrd <= maxOrd) {
+            int numOrds = maxOrd - minOrd + 1;
+            return OrdinalBytesRefBlock.isDense(ords.length, numOrds);
+        } else {
+            return false;
+        }
     }
 
     @Override
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilderTests.java
@@ -8,7 +8,10 @@
 package org.elasticsearch.compute.data;
 
 import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.store.Directory;
@@ -37,6 +40,7 @@
 import static org.elasticsearch.test.MapMatcher.assertMap;
 import static org.elasticsearch.test.MapMatcher.matchesMap;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.hasSize;
 
 public class SingletonOrdinalsBuilderTests extends ESTestCase {
     public void testReader() throws IOException {
@@ -154,6 +158,39 @@ public void testAllNull() throws IOException {
         }
     }
 
+    public void testEmitOrdinalForHighCardinality() throws IOException {
+        BlockFactory factory = breakingDriverContext().blockFactory();
+        int numOrds = between(50, 100);
+        try (Directory directory = newDirectory(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig())) {
+            for (int o = 0; o < numOrds; o++) {
+                int docPerOrds = between(10, 15);
+                for (int d = 0; d < docPerOrds; d++) {
+                    indexWriter.addDocument(List.of(new SortedDocValuesField("f", new BytesRef("value-" + o))));
+                }
+            }
+            try (IndexReader reader = DirectoryReader.open(indexWriter)) {
+                assertThat(reader.leaves(), hasSize(1));
+                for (LeafReaderContext ctx : reader.leaves()) {
+                    int batchSize = between(40, 100);
+                    int ord = random().nextInt(numOrds);
+                    try (
+                        var b1 = new SingletonOrdinalsBuilder(factory, ctx.reader().getSortedDocValues("f"), batchSize);
+                        var b2 = new SingletonOrdinalsBuilder(factory, ctx.reader().getSortedDocValues("f"), batchSize)
+                    ) {
+                        for (int i = 0; i < batchSize; i++) {
+                            b1.appendOrd(ord);
+                            b2.appendOrd(ord);
+                        }
+                        try (BytesRefBlock block1 = b1.build(); BytesRefBlock block2 = b2.buildRegularBlock()) {
+                            assertThat(block1, equalTo(block2));
+                            assertNotNull(block1.asOrdinals());
+                        }
+                    }
+                }
+            }
+        }
+    }
+
     static BytesRefBlock buildOrdinalsBuilder(SingletonOrdinalsBuilder builder) {
         if (randomBoolean()) {
             return builder.buildRegularBlock();