blob: 7c1644872c721f30817e41e9775faaf9d16029d6 [file] [log] [blame]
/*
* Copyright 2014 gitblit.com.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.gitblit.tickets;
import java.io.File;
import java.io.IOException;
import java.text.MessageFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.gitblit.Keys;
import com.gitblit.manager.IRuntimeManager;
import com.gitblit.models.RepositoryModel;
import com.gitblit.models.TicketModel;
import com.gitblit.models.TicketModel.Attachment;
import com.gitblit.models.TicketModel.Patchset;
import com.gitblit.models.TicketModel.Status;
import com.gitblit.utils.LuceneIndexStore;
import com.gitblit.utils.StringUtils;
/**
* Indexes tickets in a Lucene database.
*
* @author James Moger
*
*/
public class TicketIndexer {
/**
* Fields in the Lucene index
*/
public static enum Lucene {
rid(Type.STRING),
did(Type.STRING),
project(Type.STRING),
repository(Type.STRING),
number(Type.LONG),
title(Type.STRING),
body(Type.STRING),
topic(Type.STRING),
created(Type.LONG),
createdby(Type.STRING),
updated(Type.LONG),
updatedby(Type.STRING),
responsible(Type.STRING),
milestone(Type.STRING),
status(Type.STRING),
type(Type.STRING),
labels(Type.STRING),
participants(Type.STRING),
watchedby(Type.STRING),
mentions(Type.STRING),
attachments(Type.INT),
content(Type.STRING),
patchset(Type.STRING),
comments(Type.INT),
mergesha(Type.STRING),
mergeto(Type.STRING),
patchsets(Type.INT),
votes(Type.INT),
//NOTE: Indexing on the underlying value to allow flexibility on naming
priority(Type.INT),
severity(Type.INT);
final static int INDEX_VERSION = 2;
final Type fieldType;
Lucene(Type fieldType) {
this.fieldType = fieldType;
}
public String colon() {
return name() + ":";
}
public String matches(String value) {
if (StringUtils.isEmpty(value)) {
return "";
}
boolean not = value.charAt(0) == '!';
if (not) {
return "!" + name() + ":" + escape(value.substring(1));
}
return name() + ":" + escape(value);
}
public String doesNotMatch(String value) {
if (StringUtils.isEmpty(value)) {
return "";
}
return "NOT " + name() + ":" + escape(value);
}
public String isNotNull() {
return matches("[* TO *]");
}
public SortField asSortField(boolean descending) {
return new SortField(name(), fieldType, descending);
}
private String escape(String value) {
if (value.charAt(0) != '"') {
for (char c : value.toCharArray()) {
if (!Character.isLetterOrDigit(c)) {
return "\"" + value + "\"";
}
}
}
return value;
}
public static Lucene fromString(String value) {
for (Lucene field : values()) {
if (field.name().equalsIgnoreCase(value)) {
return field;
}
}
return created;
}
}
private final Logger log = LoggerFactory.getLogger(getClass());
private final LuceneIndexStore indexStore;
private IndexWriter writer;
private IndexSearcher searcher;
public TicketIndexer(IRuntimeManager runtimeManager) {
File luceneDir = runtimeManager.getFileOrFolder(Keys.tickets.indexFolder, "${baseFolder}/tickets/lucene");
this.indexStore = new LuceneIndexStore(luceneDir, Lucene.INDEX_VERSION);
}
/**
* Close all writers and searchers used by the ticket indexer.
*/
public void close() {
closeSearcher();
closeWriter();
}
/**
* Deletes the entire ticket index for all repositories.
*/
public void deleteAll() {
close();
indexStore.delete();
}
/**
* Deletes all tickets for the the repository from the index.
*/
public boolean deleteAll(RepositoryModel repository) {
try {
IndexWriter writer = getWriter();
StandardAnalyzer analyzer = new StandardAnalyzer();
QueryParser qp = new QueryParser(Lucene.rid.name(), analyzer);
BooleanQuery query = new BooleanQuery.Builder().add(qp.parse(repository.getRID()), Occur.MUST).build();
int numDocsBefore = writer.numDocs();
writer.deleteDocuments(query);
writer.commit();
closeSearcher();
int numDocsAfter = writer.numDocs();
if (numDocsBefore == numDocsAfter) {
log.debug(MessageFormat.format("no records found to delete in {0}", repository));
return false;
} else {
log.debug(MessageFormat.format("deleted {0} records in {1}", numDocsBefore - numDocsAfter, repository));
return true;
}
} catch (Exception e) {
log.error("error", e);
}
return false;
}
/**
* Checks if a tickets index exists, that is compatible with Lucene.INDEX_VERSION
* and the Lucene codec version.
*
* @return true if no tickets index is found, false otherwise.
*
* @since 1.9.0
*/
boolean shouldReindex() {
return ! this.indexStore.hasIndex();
}
/**
* Bulk Add/Update tickets in the Lucene index
*
* @param tickets
*/
public void index(List<TicketModel> tickets) {
try {
IndexWriter writer = getWriter();
for (TicketModel ticket : tickets) {
Document doc = ticketToDoc(ticket);
writer.addDocument(doc);
}
writer.commit();
closeSearcher();
} catch (Exception e) {
log.error("error", e);
}
}
/**
* Add/Update a ticket in the Lucene index
*
* @param ticket
*/
public void index(TicketModel ticket) {
try {
IndexWriter writer = getWriter();
delete(ticket.repository, ticket.number, writer);
Document doc = ticketToDoc(ticket);
writer.addDocument(doc);
writer.commit();
closeSearcher();
} catch (Exception e) {
log.error("error", e);
}
}
/**
* Delete a ticket from the Lucene index.
*
* @param ticket
* @throws Exception
* @return true, if deleted, false if no record was deleted
*/
public boolean delete(TicketModel ticket) {
try {
IndexWriter writer = getWriter();
return delete(ticket.repository, ticket.number, writer);
} catch (Exception e) {
log.error("Failed to delete ticket " + ticket.number, e);
}
return false;
}
/**
* Delete a ticket from the Lucene index.
*
* @param repository
* @param ticketId
* @throws Exception
* @return true, if deleted, false if no record was deleted
*/
private boolean delete(String repository, long ticketId, IndexWriter writer) throws Exception {
StandardAnalyzer analyzer = new StandardAnalyzer();
QueryParser qp = new QueryParser(Lucene.did.name(), analyzer);
BooleanQuery query = new BooleanQuery.Builder().add(qp.parse(StringUtils.getSHA1(repository + ticketId)), Occur.MUST).build();
int numDocsBefore = writer.numDocs();
writer.deleteDocuments(query);
writer.commit();
closeSearcher();
int numDocsAfter = writer.numDocs();
if (numDocsBefore == numDocsAfter) {
log.debug(MessageFormat.format("no records found to delete in {0}", repository));
return false;
} else {
log.debug(MessageFormat.format("deleted {0} records in {1}", numDocsBefore - numDocsAfter, repository));
return true;
}
}
/**
* Returns true if the repository has tickets in the index.
*
* @param repository
* @return true if there are indexed tickets
*/
public boolean hasTickets(RepositoryModel repository) {
return !queryFor(Lucene.rid.matches(repository.getRID()), 1, 0, null, true).isEmpty();
}
/**
* Search for tickets matching the query. The returned tickets are
* shadows of the real ticket, but suitable for a results list.
*
* @param repository
* @param text
* @param page
* @param pageSize
* @return search results
*/
public List<QueryResult> searchFor(RepositoryModel repository, String text, int page, int pageSize) {
if (StringUtils.isEmpty(text)) {
return Collections.emptyList();
}
Set<QueryResult> results = new LinkedHashSet<QueryResult>();
StandardAnalyzer analyzer = new StandardAnalyzer();
try {
// search the title, description and content
BooleanQuery.Builder bldr = new BooleanQuery.Builder();
QueryParser qp;
qp = new QueryParser(Lucene.title.name(), analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
qp = new QueryParser(Lucene.body.name(), analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
qp = new QueryParser(Lucene.content.name(), analyzer);
qp.setAllowLeadingWildcard(true);
bldr.add(qp.parse(text), Occur.SHOULD);
IndexSearcher searcher = getSearcher();
Query rewrittenQuery = searcher.rewrite(bldr.build());
log.debug(rewrittenQuery.toString());
TopScoreDocCollector collector = TopScoreDocCollector.create(5000);
searcher.search(rewrittenQuery, collector);
int offset = Math.max(0, (page - 1) * pageSize);
ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
for (int i = 0; i < hits.length; i++) {
int docId = hits[i].doc;
Document doc = searcher.doc(docId);
QueryResult result = docToQueryResult(doc);
if (repository != null) {
if (!result.repository.equalsIgnoreCase(repository.name)) {
continue;
}
}
results.add(result);
}
} catch (Exception e) {
log.error(MessageFormat.format("Exception while searching for {0}", text), e);
}
return new ArrayList<QueryResult>(results);
}
/**
* Search for tickets matching the query. The returned tickets are
* shadows of the real ticket, but suitable for a results list.
*
* @param text
* @param page
* @param pageSize
* @param sortBy
* @param desc
* @return
*/
public List<QueryResult> queryFor(String queryText, int page, int pageSize, String sortBy, boolean desc) {
if (StringUtils.isEmpty(queryText)) {
return Collections.emptyList();
}
Set<QueryResult> results = new LinkedHashSet<QueryResult>();
StandardAnalyzer analyzer = new StandardAnalyzer();
try {
QueryParser qp = new QueryParser(Lucene.content.name(), analyzer);
Query query = qp.parse(queryText);
IndexSearcher searcher = getSearcher();
Query rewrittenQuery = searcher.rewrite(query);
log.debug(rewrittenQuery.toString());
Sort sort;
if (sortBy == null) {
sort = new Sort(Lucene.created.asSortField(desc));
} else {
sort = new Sort(Lucene.fromString(sortBy).asSortField(desc));
}
int maxSize = 5000;
TopFieldDocs docs = searcher.search(rewrittenQuery, maxSize, sort, false, false);
int size = (pageSize <= 0) ? maxSize : pageSize;
int offset = Math.max(0, (page - 1) * size);
ScoreDoc[] hits = subset(docs.scoreDocs, offset, size);
for (int i = 0; i < hits.length; i++) {
int docId = hits[i].doc;
Document doc = searcher.doc(docId);
QueryResult result = docToQueryResult(doc);
result.docId = docId;
result.totalResults = docs.totalHits;
results.add(result);
}
} catch (Exception e) {
log.error(MessageFormat.format("Exception while searching for {0}", queryText), e);
}
return new ArrayList<QueryResult>(results);
}
private ScoreDoc [] subset(ScoreDoc [] docs, int offset, int size) {
if (docs.length >= (offset + size)) {
ScoreDoc [] set = new ScoreDoc[size];
System.arraycopy(docs, offset, set, 0, set.length);
return set;
} else if (docs.length >= offset) {
ScoreDoc [] set = new ScoreDoc[docs.length - offset];
System.arraycopy(docs, offset, set, 0, set.length);
return set;
} else {
return new ScoreDoc[0];
}
}
private IndexWriter getWriter() throws IOException {
if (writer == null) {
indexStore.create();
Directory directory = FSDirectory.open(indexStore.getPath());
StandardAnalyzer analyzer = new StandardAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
writer = new IndexWriter(directory, config);
}
return writer;
}
private synchronized void closeWriter() {
try {
if (writer != null) {
writer.close();
}
} catch (Exception e) {
log.error("failed to close writer!", e);
} finally {
writer = null;
}
}
private IndexSearcher getSearcher() throws IOException {
if (searcher == null) {
searcher = new IndexSearcher(DirectoryReader.open(getWriter(), true));
}
return searcher;
}
private synchronized void closeSearcher() {
try {
if (searcher != null) {
searcher.getIndexReader().close();
}
} catch (Exception e) {
log.error("failed to close searcher!", e);
} finally {
searcher = null;
}
}
/**
* Creates a Lucene document from a ticket.
*
* @param ticket
* @return a Lucene document
*/
private Document ticketToDoc(TicketModel ticket) {
Document doc = new Document();
// repository and document ids for Lucene querying
toDocField(doc, Lucene.rid, StringUtils.getSHA1(ticket.repository));
toDocField(doc, Lucene.did, StringUtils.getSHA1(ticket.repository + ticket.number));
toDocField(doc, Lucene.project, ticket.project);
toDocField(doc, Lucene.repository, ticket.repository);
toDocField(doc, Lucene.number, ticket.number);
toDocField(doc, Lucene.title, ticket.title);
toDocField(doc, Lucene.body, ticket.body);
toDocField(doc, Lucene.created, ticket.created);
toDocField(doc, Lucene.createdby, ticket.createdBy);
toDocField(doc, Lucene.updated, ticket.updated);
toDocField(doc, Lucene.updatedby, ticket.updatedBy);
toDocField(doc, Lucene.responsible, ticket.responsible);
toDocField(doc, Lucene.milestone, ticket.milestone);
toDocField(doc, Lucene.topic, ticket.topic);
toDocField(doc, Lucene.status, ticket.status.name());
toDocField(doc, Lucene.comments, ticket.getComments().size());
toDocField(doc, Lucene.type, ticket.type == null ? null : ticket.type.name());
toDocField(doc, Lucene.mergesha, ticket.mergeSha);
toDocField(doc, Lucene.mergeto, ticket.mergeTo);
toDocField(doc, Lucene.labels, StringUtils.flattenStrings(ticket.getLabels(), ";").toLowerCase());
toDocField(doc, Lucene.participants, StringUtils.flattenStrings(ticket.getParticipants(), ";").toLowerCase());
toDocField(doc, Lucene.watchedby, StringUtils.flattenStrings(ticket.getWatchers(), ";").toLowerCase());
toDocField(doc, Lucene.mentions, StringUtils.flattenStrings(ticket.getMentions(), ";").toLowerCase());
toDocField(doc, Lucene.votes, ticket.getVoters().size());
toDocField(doc, Lucene.priority, ticket.priority.getValue());
toDocField(doc, Lucene.severity, ticket.severity.getValue());
List<String> attachments = new ArrayList<String>();
for (Attachment attachment : ticket.getAttachments()) {
attachments.add(attachment.name.toLowerCase());
}
toDocField(doc, Lucene.attachments, StringUtils.flattenStrings(attachments, ";"));
List<Patchset> patches = ticket.getPatchsets();
if (!patches.isEmpty()) {
toDocField(doc, Lucene.patchsets, patches.size());
Patchset patchset = patches.get(patches.size() - 1);
String flat =
patchset.number + ":" +
patchset.rev + ":" +
patchset.tip + ":" +
patchset.base + ":" +
patchset.commits;
doc.add(new org.apache.lucene.document.Field(Lucene.patchset.name(), flat, TextField.TYPE_STORED));
}
doc.add(new TextField(Lucene.content.name(), ticket.toIndexableString(), Store.NO));
return doc;
}
private void toDocField(Document doc, Lucene lucene, Date value) {
if (value == null) {
return;
}
doc.add(new LongField(lucene.name(), value.getTime(), Store.YES));
doc.add(new NumericDocValuesField(lucene.name(), value.getTime()));
}
private void toDocField(Document doc, Lucene lucene, long value) {
doc.add(new LongField(lucene.name(), value, Store.YES));
doc.add(new NumericDocValuesField(lucene.name(), value));
}
private void toDocField(Document doc, Lucene lucene, int value) {
doc.add(new IntField(lucene.name(), value, Store.YES));
doc.add(new NumericDocValuesField(lucene.name(), value));
}
private void toDocField(Document doc, Lucene lucene, String value) {
if (StringUtils.isEmpty(value)) {
return;
}
doc.add(new org.apache.lucene.document.Field(lucene.name(), value, TextField.TYPE_STORED));
doc.add(new SortedDocValuesField(lucene.name(), new BytesRef(value)));
}
/**
* Creates a query result from the Lucene document. This result is
* not a high-fidelity representation of the real ticket, but it is
* suitable for display in a table of search results.
*
* @param doc
* @return a query result
* @throws ParseException
*/
private QueryResult docToQueryResult(Document doc) throws ParseException {
QueryResult result = new QueryResult();
result.project = unpackString(doc, Lucene.project);
result.repository = unpackString(doc, Lucene.repository);
result.number = unpackLong(doc, Lucene.number);
result.createdBy = unpackString(doc, Lucene.createdby);
result.createdAt = unpackDate(doc, Lucene.created);
result.updatedBy = unpackString(doc, Lucene.updatedby);
result.updatedAt = unpackDate(doc, Lucene.updated);
result.title = unpackString(doc, Lucene.title);
result.body = unpackString(doc, Lucene.body);
result.status = Status.fromObject(unpackString(doc, Lucene.status), Status.New);
result.responsible = unpackString(doc, Lucene.responsible);
result.milestone = unpackString(doc, Lucene.milestone);
result.topic = unpackString(doc, Lucene.topic);
result.type = TicketModel.Type.fromObject(unpackString(doc, Lucene.type), TicketModel.Type.defaultType);
result.mergeSha = unpackString(doc, Lucene.mergesha);
result.mergeTo = unpackString(doc, Lucene.mergeto);
result.commentsCount = unpackInt(doc, Lucene.comments);
result.votesCount = unpackInt(doc, Lucene.votes);
result.attachments = unpackStrings(doc, Lucene.attachments);
result.labels = unpackStrings(doc, Lucene.labels);
result.participants = unpackStrings(doc, Lucene.participants);
result.watchedby = unpackStrings(doc, Lucene.watchedby);
result.mentions = unpackStrings(doc, Lucene.mentions);
result.priority = TicketModel.Priority.fromObject(unpackInt(doc, Lucene.priority), TicketModel.Priority.defaultPriority);
result.severity = TicketModel.Severity.fromObject(unpackInt(doc, Lucene.severity), TicketModel.Severity.defaultSeverity);
if (!StringUtils.isEmpty(doc.get(Lucene.patchset.name()))) {
// unpack most recent patchset
String [] values = doc.get(Lucene.patchset.name()).split(":", 5);
Patchset patchset = new Patchset();
patchset.number = Integer.parseInt(values[0]);
patchset.rev = Integer.parseInt(values[1]);
patchset.tip = values[2];
patchset.base = values[3];
patchset.commits = Integer.parseInt(values[4]);
result.patchset = patchset;
}
return result;
}
private String unpackString(Document doc, Lucene lucene) {
return doc.get(lucene.name());
}
private List<String> unpackStrings(Document doc, Lucene lucene) {
if (!StringUtils.isEmpty(doc.get(lucene.name()))) {
return StringUtils.getStringsFromValue(doc.get(lucene.name()), ";");
}
return null;
}
private Date unpackDate(Document doc, Lucene lucene) {
String val = doc.get(lucene.name());
if (!StringUtils.isEmpty(val)) {
long time = Long.parseLong(val);
Date date = new Date(time);
return date;
}
return null;
}
private long unpackLong(Document doc, Lucene lucene) {
String val = doc.get(lucene.name());
if (StringUtils.isEmpty(val)) {
return 0;
}
long l = Long.parseLong(val);
return l;
}
private int unpackInt(Document doc, Lucene lucene) {
String val = doc.get(lucene.name());
if (StringUtils.isEmpty(val)) {
return 0;
}
int i = Integer.parseInt(val);
return i;
}
}