Add an analyzer with tokenizer:keyword to prefix fields Default Elasticsearch analyzer drops square brackets when performing a query. A keyword tokenizer[1] outputs the exact same text for queries without dropping any characters. Also, a keyword tokenizer creates a single term for the given text which makes 'match_phrase_prefix'[2] searches work as intended by Gerrit. For example, consider change C1 with hashtag '[area] subsystem' and change C2 with 'area subsystem'. A Gerrit query [3] returns C1 with Lucene and C1,C2 with Elasticsearch (without this change). This helps match Elasticsearch's behaviour of 'prefixhashtag' and 'prefixtopic' operators with that of Lucene. [1] https://www.elastic.co/guide/en/elasticsearch/reference/7.17/analysis-keyword-tokenizer.html [2] https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-match-query-phrase-prefix.html [3] prefixhashtag:"[area]" Change-Id: Icf62611af9e8323f98d4cb21d619bf5bc3d73177
diff --git a/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java b/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java index 1668450..9ea9fcb 100644 --- a/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java +++ b/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java
@@ -40,8 +40,10 @@ || fieldType == FieldType.LONG) { mapping.addNumber(name); } else if (fieldType == FieldType.FULL_TEXT) { - mapping.addStringWithAnalyzer(name); - } else if (fieldType == FieldType.PREFIX || fieldType == FieldType.STORED_ONLY) { + mapping.addStringWithAnalyzer(name, "custom_with_char_filter"); + } else if (fieldType == FieldType.PREFIX) { + mapping.addStringWithAnalyzer(name, "keyword_tokenizer"); + } else if (fieldType == FieldType.STORED_ONLY) { mapping.addString(name); } else { throw new IllegalStateException("Unsupported field type: " + fieldType.getName()); @@ -101,9 +103,9 @@ return this; } - Builder addStringWithAnalyzer(String name) { + Builder addStringWithAnalyzer(String name, String analyzer) { FieldProperties key = new FieldProperties(adapter.stringFieldType()); - key.analyzer = "custom_with_char_filter"; + key.analyzer = analyzer; fields.put(name, key); return this; }
diff --git a/src/main/java/com/google/gerrit/elasticsearch/ElasticSetting.java b/src/main/java/com/google/gerrit/elasticsearch/ElasticSetting.java index 0059574..c054da6 100644 --- a/src/main/java/com/google/gerrit/elasticsearch/ElasticSetting.java +++ b/src/main/java/com/google/gerrit/elasticsearch/ElasticSetting.java
@@ -16,6 +16,7 @@ import com.google.common.collect.ImmutableMap; import com.google.gson.annotations.SerializedName; +import java.util.HashMap; import java.util.Map; class ElasticSetting { @@ -59,6 +60,12 @@ FieldProperties analyzer = new FieldProperties(); analyzer.customWithCharFilter = customAnalyzer; + analyzer.keywordTokenizer = + new HashMap<>() { + { + put("tokenizer", "keyword"); + } + }; fields.put("analyzer", analyzer); return this; } @@ -92,6 +99,7 @@ String[] mappings; FieldProperties customMapping; FieldProperties customWithCharFilter; + Map<String, String> keywordTokenizer; FieldProperties() {}
diff --git a/src/test/java/com/google/gerrit/elasticsearch/ElasticV7QueryChangesTest.java b/src/test/java/com/google/gerrit/elasticsearch/ElasticV7QueryChangesTest.java index 6d0602b..9160b9e 100644 --- a/src/test/java/com/google/gerrit/elasticsearch/ElasticV7QueryChangesTest.java +++ b/src/test/java/com/google/gerrit/elasticsearch/ElasticV7QueryChangesTest.java
@@ -15,18 +15,14 @@ package com.google.gerrit.elasticsearch; import static com.google.common.truth.Truth.assertThat; -import static com.google.common.truth.TruthJUnit.assume; import static com.google.gerrit.testing.GerritJUnit.assertThrows; import com.google.gerrit.entities.Change; import com.google.gerrit.exceptions.StorageException; -import com.google.gerrit.server.change.ChangeInserter; -import com.google.gerrit.server.index.change.ChangeField; import com.google.gerrit.server.query.change.AbstractQueryChangesTest; import com.google.gerrit.testing.ConfigSuite; import com.google.gerrit.testing.GerritTestName; import com.google.gerrit.testing.InMemoryRepositoryManager; -import com.google.gerrit.testing.InMemoryRepositoryManager.Repo; import com.google.inject.Injector; import org.apache.http.impl.nio.client.CloseableHttpAsyncClient; import org.apache.http.impl.nio.client.HttpAsyncClients; @@ -83,52 +79,6 @@ ElasticTestUtils.createAllIndexes(injector); } - @Test - @Override - // TODO(davido): overrides byTopic() method to adjust to ES behaviour for - // "prefixtopic" predicate. This should be fixed in a follow-up change. - public void byTopic() throws Exception { - - TestRepository<Repo> repo = createProject("repo"); - ChangeInserter ins1 = newChangeWithTopic(repo, "feature1"); - Change change1 = insert(repo, ins1); - - ChangeInserter ins2 = newChangeWithTopic(repo, "feature2"); - Change change2 = insert(repo, ins2); - - ChangeInserter ins3 = newChangeWithTopic(repo, "Cherrypick-feature2"); - Change change3 = insert(repo, ins3); - - ChangeInserter ins4 = newChangeWithTopic(repo, "feature2-fixup"); - Change change4 = insert(repo, ins4); - - ChangeInserter ins5 = newChangeWithTopic(repo, "https://gerrit.local"); - Change change5 = insert(repo, ins5); - - ChangeInserter ins6 = newChangeWithTopic(repo, "git_gerrit_training"); - Change change6 = insert(repo, ins6); - - Change change_no_topic = insert(repo, newChange(repo)); - - assertQuery("intopic:foo"); - assertQuery("intopic:feature1", change1); - assertQuery("intopic:feature2", change4, change3, change2); - assertQuery("topic:feature2", change2); - assertQuery("intopic:feature2", change4, change3, change2); - assertQuery("intopic:fixup", change4); - assertQuery("intopic:gerrit", change6, change5); - assertQuery("topic:\"\"", change_no_topic); - assertQuery("intopic:\"\"", change_no_topic); - - assume().that(getSchema().hasField(ChangeField.PREFIX_TOPIC)).isTrue(); - // change3 is considered by ES in prefixtopic:feature query, see - // https://www.elastic.co/guide/en/elasticsearch/reference/8.2/query-dsl-match-query-phrase-prefix.html - // assertQuery("prefixtopic:feature", change4, change2, change1); - assertQuery("prefixtopic:feature", change4, change3, change2, change1); - assertQuery("prefixtopic:Cher", change3); - assertQuery("prefixtopic:feature22"); - } - @Override protected Injector createInjector() { return ElasticTestUtils.createInjector(config, testName, container);