Add an analyzer with tokenizer:keyword to prefix fields
Default Elasticsearch analyzer drops square brackets when
performing a query. A keyword tokenizer[1] outputs the
exact same text for queries without dropping any characters.
Also, a keyword tokenizer creates a single term for the given
text which makes 'match_phrase_prefix'[2] searches work as
intended by Gerrit.
For example, consider change C1 with hashtag '[area] subsystem'
and change C2 with 'area subsystem'. A Gerrit query [3] returns
C1 with Lucene and C1,C2 with Elasticsearch (without this change).
This helps match Elasticsearch's behaviour of 'prefixhashtag'
and 'prefixtopic' operators with that of Lucene.
[1] https://www.elastic.co/guide/en/elasticsearch/reference/7.17/analysis-keyword-tokenizer.html
[2] https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-match-query-phrase-prefix.html
[3] prefixhashtag:"[area]"
Change-Id: Icf62611af9e8323f98d4cb21d619bf5bc3d73177
diff --git a/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java b/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java
index 1668450..9ea9fcb 100644
--- a/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java
+++ b/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java
@@ -40,8 +40,10 @@
|| fieldType == FieldType.LONG) {
mapping.addNumber(name);
} else if (fieldType == FieldType.FULL_TEXT) {
- mapping.addStringWithAnalyzer(name);
- } else if (fieldType == FieldType.PREFIX || fieldType == FieldType.STORED_ONLY) {
+ mapping.addStringWithAnalyzer(name, "custom_with_char_filter");
+ } else if (fieldType == FieldType.PREFIX) {
+ mapping.addStringWithAnalyzer(name, "keyword_tokenizer");
+ } else if (fieldType == FieldType.STORED_ONLY) {
mapping.addString(name);
} else {
throw new IllegalStateException("Unsupported field type: " + fieldType.getName());
@@ -101,9 +103,9 @@
return this;
}
- Builder addStringWithAnalyzer(String name) {
+ Builder addStringWithAnalyzer(String name, String analyzer) {
FieldProperties key = new FieldProperties(adapter.stringFieldType());
- key.analyzer = "custom_with_char_filter";
+ key.analyzer = analyzer;
fields.put(name, key);
return this;
}
diff --git a/src/main/java/com/google/gerrit/elasticsearch/ElasticSetting.java b/src/main/java/com/google/gerrit/elasticsearch/ElasticSetting.java
index 0059574..c054da6 100644
--- a/src/main/java/com/google/gerrit/elasticsearch/ElasticSetting.java
+++ b/src/main/java/com/google/gerrit/elasticsearch/ElasticSetting.java
@@ -16,6 +16,7 @@
import com.google.common.collect.ImmutableMap;
import com.google.gson.annotations.SerializedName;
+import java.util.HashMap;
import java.util.Map;
class ElasticSetting {
@@ -59,6 +60,12 @@
FieldProperties analyzer = new FieldProperties();
analyzer.customWithCharFilter = customAnalyzer;
+ analyzer.keywordTokenizer =
+ new HashMap<>() {
+ {
+ put("tokenizer", "keyword");
+ }
+ };
fields.put("analyzer", analyzer);
return this;
}
@@ -92,6 +99,7 @@
String[] mappings;
FieldProperties customMapping;
FieldProperties customWithCharFilter;
+ Map<String, String> keywordTokenizer;
FieldProperties() {}
diff --git a/src/test/java/com/google/gerrit/elasticsearch/ElasticV7QueryChangesTest.java b/src/test/java/com/google/gerrit/elasticsearch/ElasticV7QueryChangesTest.java
index 6d0602b..9160b9e 100644
--- a/src/test/java/com/google/gerrit/elasticsearch/ElasticV7QueryChangesTest.java
+++ b/src/test/java/com/google/gerrit/elasticsearch/ElasticV7QueryChangesTest.java
@@ -15,18 +15,14 @@
package com.google.gerrit.elasticsearch;
import static com.google.common.truth.Truth.assertThat;
-import static com.google.common.truth.TruthJUnit.assume;
import static com.google.gerrit.testing.GerritJUnit.assertThrows;
import com.google.gerrit.entities.Change;
import com.google.gerrit.exceptions.StorageException;
-import com.google.gerrit.server.change.ChangeInserter;
-import com.google.gerrit.server.index.change.ChangeField;
import com.google.gerrit.server.query.change.AbstractQueryChangesTest;
import com.google.gerrit.testing.ConfigSuite;
import com.google.gerrit.testing.GerritTestName;
import com.google.gerrit.testing.InMemoryRepositoryManager;
-import com.google.gerrit.testing.InMemoryRepositoryManager.Repo;
import com.google.inject.Injector;
import org.apache.http.impl.nio.client.CloseableHttpAsyncClient;
import org.apache.http.impl.nio.client.HttpAsyncClients;
@@ -83,52 +79,6 @@
ElasticTestUtils.createAllIndexes(injector);
}
- @Test
- @Override
- // TODO(davido): overrides byTopic() method to adjust to ES behaviour for
- // "prefixtopic" predicate. This should be fixed in a follow-up change.
- public void byTopic() throws Exception {
-
- TestRepository<Repo> repo = createProject("repo");
- ChangeInserter ins1 = newChangeWithTopic(repo, "feature1");
- Change change1 = insert(repo, ins1);
-
- ChangeInserter ins2 = newChangeWithTopic(repo, "feature2");
- Change change2 = insert(repo, ins2);
-
- ChangeInserter ins3 = newChangeWithTopic(repo, "Cherrypick-feature2");
- Change change3 = insert(repo, ins3);
-
- ChangeInserter ins4 = newChangeWithTopic(repo, "feature2-fixup");
- Change change4 = insert(repo, ins4);
-
- ChangeInserter ins5 = newChangeWithTopic(repo, "https://gerrit.local");
- Change change5 = insert(repo, ins5);
-
- ChangeInserter ins6 = newChangeWithTopic(repo, "git_gerrit_training");
- Change change6 = insert(repo, ins6);
-
- Change change_no_topic = insert(repo, newChange(repo));
-
- assertQuery("intopic:foo");
- assertQuery("intopic:feature1", change1);
- assertQuery("intopic:feature2", change4, change3, change2);
- assertQuery("topic:feature2", change2);
- assertQuery("intopic:feature2", change4, change3, change2);
- assertQuery("intopic:fixup", change4);
- assertQuery("intopic:gerrit", change6, change5);
- assertQuery("topic:\"\"", change_no_topic);
- assertQuery("intopic:\"\"", change_no_topic);
-
- assume().that(getSchema().hasField(ChangeField.PREFIX_TOPIC)).isTrue();
- // change3 is considered by ES in prefixtopic:feature query, see
- // https://www.elastic.co/guide/en/elasticsearch/reference/8.2/query-dsl-match-query-phrase-prefix.html
- // assertQuery("prefixtopic:feature", change4, change2, change1);
- assertQuery("prefixtopic:feature", change4, change3, change2, change1);
- assertQuery("prefixtopic:Cher", change3);
- assertQuery("prefixtopic:feature22");
- }
-
@Override
protected Injector createInjector() {
return ElasticTestUtils.createInjector(config, testName, container);