Elasticsearch: Add char analyzer to ensure consistency of query results

When using Elasticsearch, doing a query that involved the characters "."
and "_", from full text fields, did not include results with keywords as
a substring. This behavior was different from Lucene, where these two
characters are mapped to the space character (" ") so that the query
returns keywords separated by them.

This change adds character mappings for Elasticsearch in order to ensure
that the full-text queries return same results as when using Lucene. At
index creation time, this change creates a new elasticsearch setting
where an analyzer with character mappings is configured. This analyzer
is then added to the elasticsearch mappings to be used by the full-text
field queries.

Because the elasticsearch mappings and settings can only be configured
at index creation time, one should take the following steps to apply
this change:

1. delete the index (changes, accounts, groups).
2. initialize an Elasticsearch site with this change.
3. reindex the documents (changes, accounts, groups).
4. start the site.

This change applies to all the currently supported Elasticsearch
versions.

Bug: Issue 9146
Bug: Issue 9147
Change-Id: I6da7a98d35d912b5bee7cc510d02db4433f25538
diff --git a/gerrit-elasticsearch/src/main/java/com/google/gerrit/elasticsearch/AbstractElasticIndex.java b/gerrit-elasticsearch/src/main/java/com/google/gerrit/elasticsearch/AbstractElasticIndex.java
index 8667fed..4ab1409 100644
--- a/gerrit-elasticsearch/src/main/java/com/google/gerrit/elasticsearch/AbstractElasticIndex.java
+++ b/gerrit-elasticsearch/src/main/java/com/google/gerrit/elasticsearch/AbstractElasticIndex.java
@@ -19,6 +19,7 @@
 import static org.apache.commons.codec.binary.Base64.decodeBase64;
 
 import com.google.common.collect.FluentIterable;
+import com.google.common.collect.ImmutableMap;
 import com.google.common.io.CharStreams;
 import com.google.gerrit.elasticsearch.ElasticMapping.MappingProperties;
 import com.google.gerrit.elasticsearch.builders.SearchSourceBuilder;
@@ -54,6 +55,7 @@
   protected static final String MAPPINGS = "mappings";
   protected static final String ORDER = "order";
   protected static final String SEARCH = "_search";
+  protected static final String SETTINGS = "settings";
 
   protected static <T> List<T> decodeProtos(
       JsonObject doc, String fieldName, ProtobufCodec<T> codec) {
@@ -156,7 +158,8 @@
     }
 
     // Recreate the index.
-    response = performRequest("PUT", getMappings(), indexName, Collections.emptyMap());
+    String indexCreationFields = concatJsonString(getSettings(), getMappings());
+    response = performRequest("PUT", indexCreationFields, indexName, Collections.emptyMap());
     statusCode = response.getStatusLine().getStatusCode();
     if (statusCode != HttpStatus.SC_OK) {
       String error = String.format("Failed to create index %s: %s", indexName, statusCode);
@@ -168,6 +171,10 @@
 
   protected abstract String getMappings();
 
+  private String getSettings() {
+    return gson.toJson(ImmutableMap.of(SETTINGS, ElasticSetting.createSetting()));
+  }
+
   protected abstract String getId(V v);
 
   protected String getMappingsForSingleType(String candidateType, MappingProperties properties) {
@@ -225,6 +232,10 @@
     return performRequest("POST", payload, uri, params);
   }
 
+  private String concatJsonString(String target, String addition) {
+    return target.substring(0, target.length() - 1) + "," + addition.substring(1);
+  }
+
   private Response performRequest(
       String method, Object payload, String uri, Map<String, String> params) throws IOException {
     String payloadStr = payload instanceof String ? (String) payload : payload.toString();
diff --git a/gerrit-elasticsearch/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java b/gerrit-elasticsearch/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java
index e9f3cb3..9fcbaab 100644
--- a/gerrit-elasticsearch/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java
+++ b/gerrit-elasticsearch/src/main/java/com/google/gerrit/elasticsearch/ElasticMapping.java
@@ -34,9 +34,9 @@
           || fieldType == FieldType.INTEGER_RANGE
           || fieldType == FieldType.LONG) {
         mapping.addNumber(name);
-      } else if (fieldType == FieldType.PREFIX
-          || fieldType == FieldType.FULL_TEXT
-          || fieldType == FieldType.STORED_ONLY) {
+      } else if (fieldType == FieldType.FULL_TEXT) {
+        mapping.addStringWithAnalyzer(name);
+      } else if (fieldType == FieldType.PREFIX || fieldType == FieldType.STORED_ONLY) {
         mapping.addString(name);
       } else {
         throw new IllegalStateException("Unsupported field type: " + fieldType.getName());
@@ -88,6 +88,13 @@
       return this;
     }
 
+    Builder addStringWithAnalyzer(String name) {
+      FieldProperties key = new FieldProperties(adapter.stringFieldType());
+      key.analyzer = "custom_with_char_filter";
+      fields.put(name, key);
+      return this;
+    }
+
     Builder add(String name, String type) {
       fields.put(name, new FieldProperties(type));
       return this;
@@ -102,6 +109,7 @@
     String type;
     String index;
     String format;
+    String analyzer;
     Map<String, FieldProperties> fields;
 
     FieldProperties(String type) {
diff --git a/gerrit-elasticsearch/src/main/java/com/google/gerrit/elasticsearch/ElasticSetting.java b/gerrit-elasticsearch/src/main/java/com/google/gerrit/elasticsearch/ElasticSetting.java
new file mode 100644
index 0000000..6fd234d
--- /dev/null
+++ b/gerrit-elasticsearch/src/main/java/com/google/gerrit/elasticsearch/ElasticSetting.java
@@ -0,0 +1,92 @@
+// Copyright (C) 2018 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gerrit.elasticsearch;
+
+import com.google.common.collect.ImmutableMap;
+import java.util.Map;
+
+class ElasticSetting {
+  /** The custom char mappings of "." to " " and "_" to " " in the form of UTF-8 */
+  private static final ImmutableMap<String, String> CUSTOM_CHAR_MAPPING =
+      ImmutableMap.of("\\u002E", "\\u0020", "\\u005F", "\\u0020");
+
+  static SettingProperties createSetting() {
+    ElasticSetting.Builder settings = new ElasticSetting.Builder();
+    settings.addCharFilter();
+    settings.addAnalyzer();
+    return settings.build();
+  }
+
+  static class Builder {
+    private final ImmutableMap.Builder<String, FieldProperties> fields =
+        new ImmutableMap.Builder<>();
+
+    SettingProperties build() {
+      SettingProperties properties = new SettingProperties();
+      properties.analysis = fields.build();
+      return properties;
+    }
+
+    void addCharFilter() {
+      FieldProperties charMapping = new FieldProperties("mapping");
+      charMapping.mappings = getCustomCharMappings(CUSTOM_CHAR_MAPPING);
+
+      FieldProperties charFilter = new FieldProperties();
+      charFilter.customMapping = charMapping;
+      fields.put("char_filter", charFilter);
+    }
+
+    void addAnalyzer() {
+      FieldProperties customAnalyzer = new FieldProperties("custom");
+      customAnalyzer.tokenizer = "standard";
+      customAnalyzer.charFilter = new String[] {"custom_mapping"};
+      customAnalyzer.filter = new String[] {"lowercase"};
+
+      FieldProperties analyzer = new FieldProperties();
+      analyzer.customWithCharFilter = customAnalyzer;
+      fields.put("analyzer", analyzer);
+    }
+
+    private static String[] getCustomCharMappings(ImmutableMap<String, String> map) {
+      int mappingIndex = 0;
+      int numOfMappings = map.size();
+      String[] mapping = new String[numOfMappings];
+      for (Map.Entry<String, String> e : map.entrySet()) {
+        mapping[mappingIndex++] = e.getKey() + "=>" + e.getValue();
+      }
+      return mapping;
+    }
+  }
+
+  static class SettingProperties {
+    Map<String, FieldProperties> analysis;
+  }
+
+  static class FieldProperties {
+    String tokenizer;
+    String type;
+    String[] charFilter;
+    String[] filter;
+    String[] mappings;
+    FieldProperties customMapping;
+    FieldProperties customWithCharFilter;
+
+    FieldProperties() {}
+
+    FieldProperties(String type) {
+      this.type = type;
+    }
+  }
+}