Use Lucene's QueryBuilder for building full-text queries
FuzzyQuery was a mistake; it uses edit distance to find terms in the
index close to the provided search term. This produces bizarre results
for queries like "message:1234".
Instead, use Lucene's QueryBuilder with an analyzer to convert a
full-text search word/phrase into a phrase query.
Add some tests for full-text matching behavior on numbers, which
should hopefully not be too dependent on specific Lucene behavior.
Coincidentally, a copy-paste error in the byMessageExact test
prevented this poor behavior from showing up in tests sooner.
Change-Id: I384f74f1455d0433433a27f880204ac8ecbf93da
diff --git a/gerrit-solr/BUCK b/gerrit-solr/BUCK
index c072830..ec3c728 100644
--- a/gerrit-solr/BUCK
+++ b/gerrit-solr/BUCK
@@ -12,6 +12,7 @@
'//lib/guice:guice',
'//lib/jgit:jgit',
'//lib/log:api',
+ '//lib/lucene:analyzers-common',
'//lib/lucene:core',
'//lib/solr:solrj',
],
diff --git a/gerrit-solr/src/main/java/com/google/gerrit/solr/SolrChangeIndex.java b/gerrit-solr/src/main/java/com/google/gerrit/solr/SolrChangeIndex.java
index 73dc07e..7101a4d 100644
--- a/gerrit-solr/src/main/java/com/google/gerrit/solr/SolrChangeIndex.java
+++ b/gerrit-solr/src/main/java/com/google/gerrit/solr/SolrChangeIndex.java
@@ -45,7 +45,10 @@
import com.google.gwtorm.server.ResultSet;
import com.google.inject.Provider;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.search.Query;
+import org.apache.lucene.util.Version;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
@@ -79,6 +82,7 @@
private final CloudSolrServer openIndex;
private final CloudSolrServer closedIndex;
private final Schema<ChangeData> schema;
+ private final QueryBuilder queryBuilder;
SolrChangeIndex(
@GerritServerConfig Config cfg,
@@ -101,6 +105,14 @@
throw new IllegalStateException("index.solr.url must be supplied");
}
+ // Version is only used to determine the list of stop words used by the
+ // analyzer, so use the latest version rather than trying to match the Solr
+ // server version.
+ @SuppressWarnings("deprecation")
+ Version v = Version.LUCENE_CURRENT;
+ queryBuilder = new QueryBuilder(
+ schema, new StandardAnalyzer(v, CharArraySet.EMPTY_SET));
+
base = Strings.nullToEmpty(base);
openIndex = new CloudSolrServer(url);
openIndex.setDefaultCollection(base + CHANGES_OPEN);
@@ -208,7 +220,7 @@
if (!Sets.intersection(statuses, CLOSED_STATUSES).isEmpty()) {
indexes.add(closedIndex);
}
- return new QuerySource(indexes, QueryBuilder.toQuery(schema, p), limit,
+ return new QuerySource(indexes, queryBuilder.toQuery(p), limit,
ChangeQueryBuilder.hasNonTrivialSortKeyAfter(schema, p));
}