Merge "Tweak Lucene analyzer's definition of a whole word"
diff --git a/gerrit-lucene/src/main/java/com/google/gerrit/lucene/CustomMappingAnalyzer.java b/gerrit-lucene/src/main/java/com/google/gerrit/lucene/CustomMappingAnalyzer.java
new file mode 100644
index 0000000..3d7faeb
--- /dev/null
+++ b/gerrit-lucene/src/main/java/com/google/gerrit/lucene/CustomMappingAnalyzer.java
@@ -0,0 +1,65 @@
+// Copyright (C) 2014 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gerrit.lucene;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.AnalyzerWrapper;
+import org.apache.lucene.analysis.charfilter.MappingCharFilter;
+import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
+
+import java.io.Reader;
+import java.util.Map;
+
+/**
+ * This analyzer can be used to provide custom char mappings.
+ *
+ * <p>Example usage:
+ *
+ * <pre class="prettyprint">
+ * {@code
+ * Map<String,String> customMapping = new HashMap<>();
+ * customMapping.put("_", " ");
+ * customMapping.put(".", " ");
+ *
+ * CustomMappingAnalyzer analyzer =
+ * new CustomMappingAnalyzer(new StandardAnalyzer(version), customMapping);
+ * }
+ * </pre>
+ */
+public class CustomMappingAnalyzer extends AnalyzerWrapper {
+ private Analyzer delegate;
+ private Map<String, String> customMappings;
+
+ public CustomMappingAnalyzer(Analyzer delegate,
+ Map<String, String> customMappings) {
+ super(delegate.getReuseStrategy());
+ this.delegate = delegate;
+ this.customMappings = customMappings;
+ }
+
+ @Override
+ protected Analyzer getWrappedAnalyzer(String fieldName) {
+ return delegate;
+ }
+
+ @Override
+ protected Reader wrapReader(String fieldName, Reader reader) {
+ NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
+ for (Map.Entry<String, String> e : customMappings.entrySet()) {
+ builder.add(e.getKey(), e.getValue());
+ }
+ return new MappingCharFilter(builder.build(), reader);
+ }
+}
diff --git a/gerrit-lucene/src/main/java/com/google/gerrit/lucene/LuceneChangeIndex.java b/gerrit-lucene/src/main/java/com/google/gerrit/lucene/LuceneChangeIndex.java
index 9ae5802..365f7e9 100644
--- a/gerrit-lucene/src/main/java/com/google/gerrit/lucene/LuceneChangeIndex.java
+++ b/gerrit-lucene/src/main/java/com/google/gerrit/lucene/LuceneChangeIndex.java
@@ -58,7 +58,6 @@
import com.google.inject.assistedinject.Assisted;
import com.google.inject.assistedinject.AssistedInject;
-import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.document.Document;
@@ -122,6 +121,8 @@
private static final String ID_FIELD = ChangeField.LEGACY_ID.getName();
private static final ImmutableSet<String> FIELDS = ImmutableSet.of(
ADDED_FIELD, APPROVAL_FIELD, CHANGE_FIELD, DELETED_FIELD, ID_FIELD);
+ private static final Map<String, String> CUSTOM_CHAR_MAPPING = ImmutableMap.of(
+ "_", " ", ".", " ");
private static final Map<Schema<ChangeData>, Version> LUCENE_VERSIONS;
static {
@@ -173,8 +174,10 @@
private long commitWithinMs;
private GerritIndexWriterConfig(Version version, Config cfg, String name) {
- luceneConfig = new IndexWriterConfig(version,
- new StandardAnalyzer(version, CharArraySet.EMPTY_SET));
+ CustomMappingAnalyzer analyzer =
+ new CustomMappingAnalyzer(new StandardAnalyzer(version,
+ CharArraySet.EMPTY_SET), CUSTOM_CHAR_MAPPING);
+ luceneConfig = new IndexWriterConfig(version, analyzer);
luceneConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
double m = 1 << 20;
luceneConfig.setRAMBufferSizeMB(cfg.getLong(
@@ -237,9 +240,9 @@
Version luceneVersion = checkNotNull(
LUCENE_VERSIONS.get(schema),
"unknown Lucene version for index schema: %s", schema);
-
- Analyzer analyzer =
- new StandardAnalyzer(luceneVersion, CharArraySet.EMPTY_SET);
+ CustomMappingAnalyzer analyzer =
+ new CustomMappingAnalyzer(new StandardAnalyzer(luceneVersion,
+ CharArraySet.EMPTY_SET), CUSTOM_CHAR_MAPPING);
queryBuilder = new QueryBuilder(schema, analyzer);
GerritIndexWriterConfig openConfig =
diff --git a/gerrit-server/src/test/java/com/google/gerrit/server/query/change/LuceneQueryChangesTest.java b/gerrit-server/src/test/java/com/google/gerrit/server/query/change/LuceneQueryChangesTest.java
index 370dd9d..1c75487 100644
--- a/gerrit-server/src/test/java/com/google/gerrit/server/query/change/LuceneQueryChangesTest.java
+++ b/gerrit-server/src/test/java/com/google/gerrit/server/query/change/LuceneQueryChangesTest.java
@@ -14,12 +14,39 @@
package com.google.gerrit.server.query.change;
+import static org.junit.Assert.assertTrue;
+
+import com.google.gerrit.reviewdb.client.Change;
import com.google.gerrit.testutil.InMemoryModule;
import com.google.inject.Guice;
import com.google.inject.Injector;
+import org.eclipse.jgit.internal.storage.dfs.InMemoryRepository;
+import org.eclipse.jgit.junit.TestRepository;
+import org.eclipse.jgit.revwalk.RevCommit;
+import org.junit.Test;
+
public class LuceneQueryChangesTest extends AbstractQueryChangesTest {
protected Injector createInjector() {
return Guice.createInjector(new InMemoryModule());
}
+
+ @Test
+ public void fullTextWithSpecialChars() throws Exception {
+ TestRepository<InMemoryRepository> repo = createProject("repo");
+ RevCommit commit1 =
+ repo.parseBody(repo.commit().message("foo_bar_foo").create());
+ Change change1 = newChange(repo, commit1, null, null, null).insert();
+ RevCommit commit2 =
+ repo.parseBody(repo.commit().message("one.two.three").create());
+ Change change2 = newChange(repo, commit2, null, null, null).insert();
+
+ assertTrue(query("message:foo_ba").isEmpty());
+ assertResultEquals(change1, queryOne("message:bar"));
+ assertResultEquals(change1, queryOne("message:foo_bar"));
+ assertResultEquals(change1, queryOne("message:foo bar"));
+ assertResultEquals(change2, queryOne("message:two"));
+ assertResultEquals(change2, queryOne("message:one.two"));
+ assertResultEquals(change2, queryOne("message:one two"));
+ }
}