Prune _source to only contain stored fields
Gerrit queries fetch only stored fields from _source. So, pruning
contents of _source to only contain those fields will help reduce
index sizes. Although doing this introduces some limitations as
described in [1], they don't apply for Gerrit uses-cases as we can
always recreate the indices from notedb.
On a Gerrit instance with ~4M changes, this change brings down the
size of changes index from ~40G to ~25G.
[1] https://www.elastic.co/guide/en/elasticsearch/reference/7.17/mapping-source-field.html#include-exclude
Change-Id: Iffab01a1c0121f9f99ae0f3d10bc40842deb0598
diff --git a/src/main/java/com/google/gerrit/elasticsearch/AbstractElasticIndex.java b/src/main/java/com/google/gerrit/elasticsearch/AbstractElasticIndex.java
index 908d9a7..d4f2c5d 100644
--- a/src/main/java/com/google/gerrit/elasticsearch/AbstractElasticIndex.java
+++ b/src/main/java/com/google/gerrit/elasticsearch/AbstractElasticIndex.java
@@ -30,7 +30,7 @@
import com.google.common.io.BaseEncoding;
import com.google.common.io.CharStreams;
import com.google.gerrit.common.Nullable;
-import com.google.gerrit.elasticsearch.ElasticMapping.MappingProperties;
+import com.google.gerrit.elasticsearch.ElasticMapping.Mapping;
import com.google.gerrit.elasticsearch.builders.QueryBuilder;
import com.google.gerrit.elasticsearch.builders.SearchSourceBuilder;
import com.google.gerrit.elasticsearch.bulk.DeleteRequest;
@@ -226,14 +226,14 @@
protected abstract String getId(V v);
- protected String getMappingsForSingleType(MappingProperties properties) {
- return getMappingsFor(properties);
+ protected String getMappingsForSingleType(Mapping mapping) {
+ return getMappingsFor(mapping);
}
- protected String getMappingsFor(MappingProperties properties) {
+ protected String getMappingsFor(Mapping mapping) {
JsonObject mappings = new JsonObject();
- mappings.add(MAPPINGS, gson.toJsonTree(properties));
+ mappings.add(MAPPINGS, gson.toJsonTree(mapping));
return gson.toJson(mappings);
}
diff --git a/src/main/java/com/google/gerrit/elasticsearch/ElasticAccountIndex.java b/src/main/java/com/google/gerrit/elasticsearch/ElasticAccountIndex.java
index 1da80ae..1e94148 100644
--- a/src/main/java/com/google/gerrit/elasticsearch/ElasticAccountIndex.java
+++ b/src/main/java/com/google/gerrit/elasticsearch/ElasticAccountIndex.java
@@ -15,7 +15,7 @@
package com.google.gerrit.elasticsearch;
import com.google.common.collect.ImmutableSet;
-import com.google.gerrit.elasticsearch.ElasticMapping.MappingProperties;
+import com.google.gerrit.elasticsearch.ElasticMapping.Mapping;
import com.google.gerrit.elasticsearch.bulk.BulkRequest;
import com.google.gerrit.elasticsearch.bulk.IndexRequest;
import com.google.gerrit.elasticsearch.bulk.UpdateRequest;
@@ -46,7 +46,7 @@
public class ElasticAccountIndex extends AbstractElasticIndex<Account.Id, AccountState>
implements AccountIndex {
static class AccountMapping {
- final MappingProperties accounts;
+ final Mapping accounts;
AccountMapping(Schema<AccountState> schema, ElasticQueryAdapter adapter) {
this.accounts = ElasticMapping.createMapping(schema, adapter);
diff --git a/src/main/java/com/google/gerrit/elasticsearch/ElasticChangeIndex.java b/src/main/java/com/google/gerrit/elasticsearch/ElasticChangeIndex.java
index a4674bf..8504e16 100644
--- a/src/main/java/com/google/gerrit/elasticsearch/ElasticChangeIndex.java
+++ b/src/main/java/com/google/gerrit/elasticsearch/ElasticChangeIndex.java
@@ -17,7 +17,7 @@
import static java.util.Objects.requireNonNull;
import com.google.common.collect.ImmutableSet;
-import com.google.gerrit.elasticsearch.ElasticMapping.MappingProperties;
+import com.google.gerrit.elasticsearch.ElasticMapping.Mapping;
import com.google.gerrit.elasticsearch.bulk.BulkRequest;
import com.google.gerrit.elasticsearch.bulk.IndexRequest;
import com.google.gerrit.elasticsearch.bulk.UpdateRequest;
@@ -53,12 +53,12 @@
class ElasticChangeIndex extends AbstractElasticIndex<Change.Id, ChangeData>
implements ChangeIndex {
static class ChangeMapping {
- final MappingProperties changes;
- final MappingProperties openChanges;
- final MappingProperties closedChanges;
+ final Mapping changes;
+ final Mapping openChanges;
+ final Mapping closedChanges;
ChangeMapping(Schema<ChangeData> schema, ElasticQueryAdapter adapter) {
- MappingProperties mapping = ElasticMapping.createMapping(schema, adapter);
+ Mapping mapping = ElasticMapping.createMapping(schema, adapter);
this.changes = mapping;
this.openChanges = mapping;
this.closedChanges = mapping;
diff --git a/src/main/java/com/google/gerrit/elasticsearch/ElasticGroupIndex.java b/src/main/java/com/google/gerrit/elasticsearch/ElasticGroupIndex.java
index 626203a..e0b337e 100644
--- a/src/main/java/com/google/gerrit/elasticsearch/ElasticGroupIndex.java
+++ b/src/main/java/com/google/gerrit/elasticsearch/ElasticGroupIndex.java
@@ -15,7 +15,7 @@
package com.google.gerrit.elasticsearch;
import com.google.common.collect.ImmutableSet;
-import com.google.gerrit.elasticsearch.ElasticMapping.MappingProperties;
+import com.google.gerrit.elasticsearch.ElasticMapping.Mapping;
import com.google.gerrit.elasticsearch.bulk.BulkRequest;
import com.google.gerrit.elasticsearch.bulk.IndexRequest;
import com.google.gerrit.elasticsearch.bulk.UpdateRequest;
@@ -46,7 +46,7 @@
public class ElasticGroupIndex extends AbstractElasticIndex<AccountGroup.UUID, InternalGroup>
implements GroupIndex {
static class GroupMapping {
- final MappingProperties groups;
+ final Mapping groups;
GroupMapping(Schema<InternalGroup> schema, ElasticQueryAdapter adapter) {
this.groups = ElasticMapping.createMapping(schema, adapter);
diff --git a/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java b/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java
index 6bae325..1668450 100644
--- a/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java
+++ b/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java
@@ -18,6 +18,7 @@
import com.google.gerrit.index.FieldDef;
import com.google.gerrit.index.FieldType;
import com.google.gerrit.index.Schema;
+import com.google.gson.annotations.SerializedName;
import java.util.Map;
class ElasticMapping {
@@ -25,7 +26,7 @@
protected static final String TIMESTAMP_FIELD_TYPE = "date";
protected static final String TIMESTAMP_FIELD_FORMAT = "date_optional_time";
- static MappingProperties createMapping(Schema<?> schema, ElasticQueryAdapter adapter) {
+ static Mapping createMapping(Schema<?> schema, ElasticQueryAdapter adapter) {
ElasticMapping.Builder mapping = new ElasticMapping.Builder(adapter);
for (FieldDef<?, ?> field : schema.getFields().values()) {
String name = field.getName();
@@ -46,6 +47,11 @@
throw new IllegalStateException("Unsupported field type: " + fieldType.getName());
}
}
+ mapping.addSourceIncludes(
+ schema.getFields().values().stream()
+ .filter(f -> f.isStored())
+ .map(f -> f.getName())
+ .toArray(String[]::new));
return mapping.build();
}
@@ -53,15 +59,18 @@
private final ElasticQueryAdapter adapter;
private final ImmutableMap.Builder<String, FieldProperties> fields =
new ImmutableMap.Builder<>();
+ private final ImmutableMap.Builder<String, String[]> sourceIncludes =
+ new ImmutableMap.Builder<>();
Builder(ElasticQueryAdapter adapter) {
this.adapter = adapter;
}
- MappingProperties build() {
- MappingProperties properties = new MappingProperties();
- properties.properties = fields.build();
- return properties;
+ Mapping build() {
+ Mapping mapping = new Mapping();
+ mapping.properties = fields.build();
+ mapping.source = sourceIncludes.build();
+ return mapping;
}
Builder addExactField(String name) {
@@ -99,13 +108,21 @@
return this;
}
+ Builder addSourceIncludes(String[] includes) {
+ sourceIncludes.put("includes", includes);
+ return this;
+ }
+
Builder add(String name, String type) {
fields.put(name, new FieldProperties(type));
return this;
}
}
- static class MappingProperties {
+ static class Mapping {
+ @SerializedName("_source")
+ Map<String, String[]> source;
+
Map<String, FieldProperties> properties;
}
diff --git a/src/main/java/com/google/gerrit/elasticsearch/ElasticProjectIndex.java b/src/main/java/com/google/gerrit/elasticsearch/ElasticProjectIndex.java
index 99b202d..4703f1f 100644
--- a/src/main/java/com/google/gerrit/elasticsearch/ElasticProjectIndex.java
+++ b/src/main/java/com/google/gerrit/elasticsearch/ElasticProjectIndex.java
@@ -15,7 +15,7 @@
package com.google.gerrit.elasticsearch;
import com.google.common.collect.ImmutableSet;
-import com.google.gerrit.elasticsearch.ElasticMapping.MappingProperties;
+import com.google.gerrit.elasticsearch.ElasticMapping.Mapping;
import com.google.gerrit.elasticsearch.bulk.BulkRequest;
import com.google.gerrit.elasticsearch.bulk.IndexRequest;
import com.google.gerrit.elasticsearch.bulk.UpdateRequest;
@@ -48,7 +48,7 @@
public class ElasticProjectIndex extends AbstractElasticIndex<Project.NameKey, ProjectData>
implements ProjectIndex {
static class ProjectMapping {
- MappingProperties projects;
+ Mapping projects;
ProjectMapping(Schema<ProjectData> schema, ElasticQueryAdapter adapter) {
this.projects = ElasticMapping.createMapping(schema, adapter);