src/main/java/com/gitblit/service/LuceneService.java - gitblit - Git at Google

 /*
  * Copyright 2012 gitblit.com.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package com.gitblit.service;

 import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;

 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.text.MessageFormat;
 import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;
 import java.util.concurrent.ConcurrentHashMap;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.DateTools;
 import org.apache.lucene.document.DateTools.Resolution;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.MultiReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TopScoreDocCollector;
 import org.apache.lucene.search.highlight.Fragmenter;
 import org.apache.lucene.search.highlight.Highlighter;
 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
 import org.apache.lucene.search.highlight.QueryScorer;
 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
 import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.pdf.PDFParser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
 import org.eclipse.jgit.lib.Constants;
 import org.eclipse.jgit.lib.FileMode;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.ObjectLoader;
 import org.eclipse.jgit.lib.ObjectReader;
 import org.eclipse.jgit.lib.Repository;
 import org.eclipse.jgit.lib.RepositoryCache.FileKey;
 import org.eclipse.jgit.revwalk.RevCommit;
 import org.eclipse.jgit.revwalk.RevTree;
 import org.eclipse.jgit.revwalk.RevWalk;
 import org.eclipse.jgit.storage.file.FileBasedConfig;
 import org.eclipse.jgit.treewalk.EmptyTreeIterator;
 import org.eclipse.jgit.treewalk.TreeWalk;
 import org.eclipse.jgit.util.FS;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import com.gitblit.Constants.SearchObjectType;
 import com.gitblit.GitBlit;
 import com.gitblit.IStoredSettings;
 import com.gitblit.Keys;
 import com.gitblit.manager.FilestoreManager;
 import com.gitblit.manager.IFilestoreManager;
 import com.gitblit.manager.IRepositoryManager;
 import com.gitblit.models.PathModel.PathChangeModel;
 import com.gitblit.models.RefModel;
 import com.gitblit.models.RepositoryModel;
 import com.gitblit.models.SearchResult;
 import com.gitblit.utils.ArrayUtils;
 import com.gitblit.utils.JGitUtils;
 import com.gitblit.utils.StringUtils;

 /**
  * The Lucene service handles indexing and searching repositories.
  *
  * @author James Moger
  *
  */
 public class LuceneService implements Runnable {


 	private static final int INDEX_VERSION = 6;

 	private static final String FIELD_OBJECT_TYPE = "type";
 	private static final String FIELD_PATH = "path";
 	private static final String FIELD_COMMIT = "commit";
 	private static final String FIELD_BRANCH = "branch";
 	private static final String FIELD_SUMMARY = "summary";
 	private static final String FIELD_CONTENT = "content";
 	private static final String FIELD_AUTHOR = "author";
 	private static final String FIELD_COMMITTER = "committer";
 	private static final String FIELD_DATE = "date";
 	private static final String FIELD_TAG = "tag";

 	private static final String CONF_FILE = "lucene.conf";
 	private static final String LUCENE_DIR = "lucene";
 	private static final String CONF_INDEX = "index";
 	private static final String CONF_VERSION = "version";
 	private static final String CONF_ALIAS = "aliases";
 	private static final String CONF_BRANCH = "branches";

 	private static final Version LUCENE_VERSION = Version.LUCENE_4_10_0;

 	private final Logger logger = LoggerFactory.getLogger(LuceneService.class);

 	private final IStoredSettings storedSettings;
 	private final IRepositoryManager repositoryManager;
 	private final IFilestoreManager filestoreManager;

 	private final File repositoriesFolder;

 	private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
 	private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();

 	private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
 	private Set<String> excludedExtensions;

 	public LuceneService(
 			IStoredSettings settings,
 			IRepositoryManager repositoryManager,
 			IFilestoreManager filestoreManager) {

 		this.storedSettings = settings;
 		this.repositoryManager = repositoryManager;
 		this.filestoreManager = filestoreManager;
 		this.repositoriesFolder = repositoryManager.getRepositoriesFolder();
 		String exts = luceneIgnoreExtensions;
 		if (settings != null) {
 			exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
 		}
 		excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
 	}

 	/**
 	 * Run is executed by the Gitblit executor service.  Because this is called
 	 * by an executor service, calls will queue - i.e. there can never be
 	 * concurrent execution of repository index updates.
 	 */
 	@Override
 	public void run() {
 		if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
 			// Lucene indexing is disabled
 			return;
 		}
 		// reload the excluded extensions
 		String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
 		excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));

 		if (repositoryManager.isCollectingGarbage()) {
 			// busy collecting garbage, try again later
 			return;
 		}

 		for (String repositoryName: repositoryManager.getRepositoryList()) {
 			RepositoryModel model = repositoryManager.getRepositoryModel(repositoryName);
 			if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
 				Repository repository = repositoryManager.getRepository(model.name);
 				if (repository == null) {
 					if (repositoryManager.isCollectingGarbage(model.name)) {
 						logger.info(MessageFormat.format("Skipping Lucene index of {0}, busy garbage collecting", repositoryName));
 					}
 					continue;
 				}
 				index(model, repository);
 				repository.close();
 				System.gc();
 			}
 		}
 	}

 	/**
 	 * Synchronously indexes a repository. This may build a complete index of a
 	 * repository or it may update an existing index.
 	 *
 	 * @param displayName
 	 *            the name of the repository
 	 * @param repository
 	 *            the repository object
 	 */
 	private void index(RepositoryModel model, Repository repository) {
 		try {
 			if (shouldReindex(repository)) {
 				// (re)build the entire index
 				IndexResult result = reindex(model, repository);

 				if (result.success) {
 					if (result.commitCount > 0) {
 						String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
 						logger.info(MessageFormat.format(msg, model.name, result.commitCount,
 								result.blobCount, result.branchCount, result.duration()));
 					}
 				} else {
 					String msg = "Could not build {0} Lucene index!";
 					logger.error(MessageFormat.format(msg, model.name));
 				}
 			} else {
 				// update the index with latest commits
 				IndexResult result = updateIndex(model, repository);
 				if (result.success) {
 					if (result.commitCount > 0) {
 						String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
 						logger.info(MessageFormat.format(msg, model.name, result.commitCount,
 								result.blobCount, result.branchCount, result.duration()));
 					}
 				} else {
 					String msg = "Could not update {0} Lucene index!";
 					logger.error(MessageFormat.format(msg, model.name));
 				}
 			}
 		} catch (Throwable t) {
 			logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
 		}
 	}

 	/**
 	 * Close the writer/searcher objects for a repository.
 	 *
 	 * @param repositoryName
 	 */
 	public synchronized void close(String repositoryName) {
 		try {
 			IndexSearcher searcher = searchers.remove(repositoryName);
 			if (searcher != null) {
 				searcher.getIndexReader().close();
 			}
 		} catch (Exception e) {
 			logger.error("Failed to close index searcher for " + repositoryName, e);
 		}

 		try {
 			IndexWriter writer = writers.remove(repositoryName);
 			if (writer != null) {
 				writer.close();
 			}
 		} catch (Exception e) {
 			logger.error("Failed to close index writer for " + repositoryName, e);
 		}
 	}

 	/**
 	 * Close all Lucene indexers.
 	 *
 	 */
 	public synchronized void close() {
 		// close all writers
 		for (String writer : writers.keySet()) {
 			try {
 				writers.get(writer).close(true);
 			} catch (Throwable t) {
 				logger.error("Failed to close Lucene writer for " + writer, t);
 			}
 		}
 		writers.clear();

 		// close all searchers
 		for (String searcher : searchers.keySet()) {
 			try {
 				searchers.get(searcher).getIndexReader().close();
 			} catch (Throwable t) {
 				logger.error("Failed to close Lucene searcher for " + searcher, t);
 			}
 		}
 		searchers.clear();
 	}


 	/**
 	 * Deletes the Lucene index for the specified repository.
 	 *
 	 * @param repositoryName
 	 * @return true, if successful
 	 */
 	public boolean deleteIndex(String repositoryName) {
 		try {
 			// close any open writer/searcher
 			close(repositoryName);

 			// delete the index folder
 			File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repositoryName), FS.DETECTED);
 			File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
 			if (luceneIndex.exists()) {
 				org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
 						org.eclipse.jgit.util.FileUtils.RECURSIVE);
 			}
 			// delete the config file
 			File luceneConfig = new File(repositoryFolder, CONF_FILE);
 			if (luceneConfig.exists()) {
 				luceneConfig.delete();
 			}
 			return true;
 		} catch (IOException e) {
 			throw new RuntimeException(e);
 		}
 	}

 	/**
 	 * Returns the author for the commit, if this information is available.
 	 *
 	 * @param commit
 	 * @return an author or unknown
 	 */
 	private String getAuthor(RevCommit commit) {
 		String name = "unknown";
 		try {
 			name = commit.getAuthorIdent().getName();
 			if (StringUtils.isEmpty(name)) {
 				name = commit.getAuthorIdent().getEmailAddress();
 			}
 		} catch (NullPointerException n) {
 		}
 		return name;
 	}

 	/**
 	 * Returns the committer for the commit, if this information is available.
 	 *
 	 * @param commit
 	 * @return an committer or unknown
 	 */
 	private String getCommitter(RevCommit commit) {
 		String name = "unknown";
 		try {
 			name = commit.getCommitterIdent().getName();
 			if (StringUtils.isEmpty(name)) {
 				name = commit.getCommitterIdent().getEmailAddress();
 			}
 		} catch (NullPointerException n) {
 		}
 		return name;
 	}

 	/**
 	 * Get the tree associated with the given commit.
 	 *
 	 * @param walk
 	 * @param commit
 	 * @return tree
 	 * @throws IOException
 	 */
 	private RevTree getTree(final RevWalk walk, final RevCommit commit)
 			throws IOException {
 		final RevTree tree = commit.getTree();
 		if (tree != null) {
 			return tree;
 		}
 		walk.parseHeaders(commit);
 		return commit.getTree();
 	}

 	/**
 	 * Construct a keyname from the branch.
 	 *
 	 * @param branchName
 	 * @return a keyname appropriate for the Git config file format
 	 */
 	private String getBranchKey(String branchName) {
 		return StringUtils.getSHA1(branchName);
 	}

 	/**
 	 * Returns the Lucene configuration for the specified repository.
 	 *
 	 * @param repository
 	 * @return a config object
 	 */
 	private FileBasedConfig getConfig(Repository repository) {
 		File file = new File(repository.getDirectory(), CONF_FILE);
 		FileBasedConfig config = new FileBasedConfig(file, FS.detect());
 		return config;
 	}

 	/**
 	 * Reads the Lucene config file for the repository to check the index
 	 * version. If the index version is different, then rebuild the repository
 	 * index.
 	 *
 	 * @param repository
 	 * @return true of the on-disk index format is different than INDEX_VERSION
 	 */
 	private boolean shouldReindex(Repository repository) {
 		try {
 			FileBasedConfig config = getConfig(repository);
 			config.load();
 			int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
 			// reindex if versions do not match
 			return indexVersion != INDEX_VERSION;
 		} catch (Throwable t) {
 		}
 		return true;
 	}


 	/**
 	 * This completely indexes the repository and will destroy any existing
 	 * index.
 	 *
 	 * @param repositoryName
 	 * @param repository
 	 * @return IndexResult
 	 */
 	public IndexResult reindex(RepositoryModel model, Repository repository) {
 		IndexResult result = new IndexResult();
 		if (!deleteIndex(model.name)) {
 			return result;
 		}
 		try {
 			String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
 			FileBasedConfig config = getConfig(repository);
 			Set<String> indexedCommits = new TreeSet<String>();
 			IndexWriter writer = getIndexWriter(model.name);
 			// build a quick lookup of tags
 			Map<String, List<String>> tags = new HashMap<String, List<String>>();
 			for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
 				if (!tag.isAnnotatedTag()) {
 					// skip non-annotated tags
 					continue;
 				}
 				if (!tags.containsKey(tag.getReferencedObjectId().getName())) {
 					tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
 				}
 				tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
 			}

 			ObjectReader reader = repository.newObjectReader();

 			// get the local branches
 			List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);

 			// sort them by most recently updated
 			Collections.sort(branches, new Comparator<RefModel>() {
 				@Override
 				public int compare(RefModel ref1, RefModel ref2) {
 					return ref2.getDate().compareTo(ref1.getDate());
 				}
 			});

 			// reorder default branch to first position
 			RefModel defaultBranch = null;
 			ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
 			for (RefModel branch :  branches) {
 				if (branch.getObjectId().equals(defaultBranchId)) {
 					defaultBranch = branch;
 					break;
 				}
 			}
 			branches.remove(defaultBranch);
 			branches.add(0, defaultBranch);

 			// walk through each branch
 			for (RefModel branch : branches) {

 				boolean indexBranch = false;
 				if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
 						&& branch.equals(defaultBranch)) {
 					// indexing "default" branch
 					indexBranch = true;
 				} else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
 					// skip internal meta branches
 					indexBranch = false;
 				} else {
 					// normal explicit branch check
 					indexBranch = model.indexedBranches.contains(branch.getName());
 				}

 				// if this branch is not specifically indexed then skip
 				if (!indexBranch) {
 					continue;
 				}

 				String branchName = branch.getName();
 				RevWalk revWalk = new RevWalk(reader);
 				RevCommit tip = revWalk.parseCommit(branch.getObjectId());
 				String tipId = tip.getId().getName();

 				String keyName = getBranchKey(branchName);
 				config.setString(CONF_ALIAS, null, keyName, branchName);
 				config.setString(CONF_BRANCH, null, keyName, tipId);

 				// index the blob contents of the tree
 				TreeWalk treeWalk = new TreeWalk(repository);
 				treeWalk.addTree(tip.getTree());
 				treeWalk.setRecursive(true);

 				Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
 				while (treeWalk.next()) {
 					// ensure path is not in a submodule
 					if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
 						paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
 					}
 				}

 				ByteArrayOutputStream os = new ByteArrayOutputStream();
 				byte[] tmp = new byte[32767];

 				RevWalk commitWalk = new RevWalk(reader);
 				commitWalk.markStart(tip);

 				RevCommit commit;
 				while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
 					TreeWalk diffWalk = new TreeWalk(reader);
 					int parentCount = commit.getParentCount();
 					switch (parentCount) {
 					case 0:
 						diffWalk.addTree(new EmptyTreeIterator());
 						break;
 					case 1:
 						diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
 						break;
 					default:
 						// skip merge commits
 						continue;
 					}
 					diffWalk.addTree(getTree(commitWalk, commit));
 					diffWalk.setFilter(ANY_DIFF);
 					diffWalk.setRecursive(true);
 					while ((paths.size() > 0) && diffWalk.next()) {
 						String path = diffWalk.getPathString();
 						if (!paths.containsKey(path)) {
 							continue;
 						}
 //TODO: Figure out filestore oid the path - bit more involved than updating the index

 						// remove path from set
 						ObjectId blobId = paths.remove(path);
 						result.blobCount++;

 						// index the blob metadata
 						String blobAuthor = getAuthor(commit);
 						String blobCommitter = getCommitter(commit);
 						String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
 								Resolution.MINUTE);

 						Document doc = new Document();
 						doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
 						doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
 						doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
 						doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED));
 						doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED));
 						doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED));
 						doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED));

 						// determine extension to compare to the extension
 						// blacklist
 						String ext = null;
 						String name = path.toLowerCase();
 						if (name.indexOf('.') > -1) {
 							ext = name.substring(name.lastIndexOf('.') + 1);
 						}

 						// index the blob content
 						if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
 							ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
 							InputStream in = ldr.openStream();
 							int n;
 							while ((n = in.read(tmp)) > 0) {
 								os.write(tmp, 0, n);
 							}
 							in.close();
 							byte[] content = os.toByteArray();
 							String str = StringUtils.decodeString(content, encodings);
 							doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
 							os.reset();
 						}

 						// add the blob to the index
 						writer.addDocument(doc);
 					}
 				}

 				os.close();

 				// index the tip commit object
 				if (indexedCommits.add(tipId)) {
 					Document doc = createDocument(tip, tags.get(tipId));
 					doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
 					writer.addDocument(doc);
 					result.commitCount += 1;
 					result.branchCount += 1;
 				}

 				// traverse the log and index the previous commit objects
 				RevWalk historyWalk = new RevWalk(reader);
 				historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
 				RevCommit rev;
 				while ((rev = historyWalk.next()) != null) {
 					String hash = rev.getId().getName();
 					if (indexedCommits.add(hash)) {
 						Document doc = createDocument(rev, tags.get(hash));
 						doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
 						writer.addDocument(doc);
 						result.commitCount += 1;
 					}
 				}
 			}

 			// finished
 			reader.close();

 			// commit all changes and reset the searcher
 			config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
 			config.save();
 			writer.commit();
 			resetIndexSearcher(model.name);
 			result.success();
 		} catch (Exception e) {
 			logger.error("Exception while reindexing " + model.name, e);
 		}
 		return result;
 	}

 	/**
 	 * Incrementally update the index with the specified commit for the
 	 * repository.
 	 *
 	 * @param repositoryName
 	 * @param repository
 	 * @param branch
 	 *            the fully qualified branch name (e.g. refs/heads/master)
 	 * @param commit
 	 * @return true, if successful
 	 */
 	private IndexResult index(String repositoryName, Repository repository,
 			String branch, RevCommit commit) {
 		IndexResult result = new IndexResult();
 		try {
 			String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
 			List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
 			String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
 					Resolution.MINUTE);
 			IndexWriter writer = getIndexWriter(repositoryName);
 			for (PathChangeModel path : changedPaths) {
 				if (path.isSubmodule()) {
 					continue;
 				}
 				// delete the indexed blob
 				deleteBlob(repositoryName, branch, path.name);

 				// re-index the blob
 				if (!ChangeType.DELETE.equals(path.changeType)) {
 					result.blobCount++;
 					Document doc = new Document();
 					doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
 					doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
 					doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
 					doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED));
 					doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED));
 					doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
 					doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));

 					// determine extension to compare to the extension
 					// blacklist
 					String ext = null;
 					String name = path.name.toLowerCase();
 					if (name.indexOf('.') > -1) {
 						ext = name.substring(name.lastIndexOf('.') + 1);
 					}

 					if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
 						String str = "";
 						// read the blob content
 						if (path.isFilestoreItem()) {
 							//Get file from filestore
 							BodyContentHandler handler = new BodyContentHandler();
 	                        Metadata metadata = new Metadata();
 	                        PDFParser parser = new PDFParser();

 	                        ParseContext parseContext = new ParseContext();
 	                        File lfsFile = filestoreManager.getStoragePath(path.getFilestoreOid());
 	                        FileInputStream inputstream = new FileInputStream(lfsFile);
 	                        parser.parse(inputstream, handler, metadata, parseContext);
 							str = handler.toString();
 						} else {
 							str = JGitUtils.getStringContent(repository, commit.getTree(),
 								path.path, encodings);
 						}

 						if (str != null) {
 							doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
 							writer.addDocument(doc);
 						}
 					}
 				}
 			}
 			writer.commit();

 			// get any annotated commit tags
 			List<String> commitTags = new ArrayList<String>();
 			for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
 				if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
 					commitTags.add(ref.displayName);
 				}
 			}

 			// create and write the Lucene document
 			Document doc = createDocument(commit, commitTags);
 			doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
 			result.commitCount++;
 			result.success = index(repositoryName, doc);
 		} catch (Exception e) {
 			logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
 		}
 		return result;
 	}

 	/**
 	 * Delete a blob from the specified branch of the repository index.
 	 *
 	 * @param repositoryName
 	 * @param branch
 	 * @param path
 	 * @throws Exception
 	 * @return true, if deleted, false if no record was deleted
 	 */
 	public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception {
 		String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH);
 		String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path);

 		BooleanQuery query = new BooleanQuery();
 		StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
 		QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
 		query.add(qp.parse(q), Occur.MUST);

 		IndexWriter writer = getIndexWriter(repositoryName);
 		int numDocsBefore = writer.numDocs();
 		writer.deleteDocuments(query);
 		writer.commit();
 		int numDocsAfter = writer.numDocs();
 		if (numDocsBefore == numDocsAfter) {
 			logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
 			return false;
 		} else {
 			logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
 			return true;
 		}
 	}

 	/**
 	 * Updates a repository index incrementally from the last indexed commits.
 	 *
 	 * @param model
 	 * @param repository
 	 * @return IndexResult
 	 */
 	private IndexResult updateIndex(RepositoryModel model, Repository repository) {
 		IndexResult result = new IndexResult();
 		try {
 			FileBasedConfig config = getConfig(repository);
 			config.load();

 			// build a quick lookup of annotated tags
 			Map<String, List<String>> tags = new HashMap<String, List<String>>();
 			for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
 				if (!tag.isAnnotatedTag()) {
 					// skip non-annotated tags
 					continue;
 				}
 				if (!tags.containsKey(tag.getObjectId().getName())) {
 					tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
 				}
 				tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
 			}

 			// detect branch deletion
 			// first assume all branches are deleted and then remove each
 			// existing branch from deletedBranches during indexing
 			Set<String> deletedBranches = new TreeSet<String>();
 			for (String alias : config.getNames(CONF_ALIAS)) {
 				String branch = config.getString(CONF_ALIAS, null, alias);
 				deletedBranches.add(branch);
 			}

 			// get the local branches
 			List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);

 			// sort them by most recently updated
 			Collections.sort(branches, new Comparator<RefModel>() {
 				@Override
 				public int compare(RefModel ref1, RefModel ref2) {
 					return ref2.getDate().compareTo(ref1.getDate());
 				}
 			});

 			// reorder default branch to first position
 			RefModel defaultBranch = null;
 			ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
 			for (RefModel branch :  branches) {
 				if (branch.getObjectId().equals(defaultBranchId)) {
 					defaultBranch = branch;
 					break;
 				}
 			}
 			branches.remove(defaultBranch);
 			branches.add(0, defaultBranch);

 			// walk through each branches
 			for (RefModel branch : branches) {
 				String branchName = branch.getName();

 				boolean indexBranch = false;
 				if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
 						&& branch.equals(defaultBranch)) {
 					// indexing "default" branch
 					indexBranch = true;
 				} else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
 					// ignore internal meta branches
 					indexBranch = false;
 				} else {
 					// normal explicit branch check
 					indexBranch = model.indexedBranches.contains(branch.getName());
 				}

 				// if this branch is not specifically indexed then skip
 				if (!indexBranch) {
 					continue;
 				}

 				// remove this branch from the deletedBranches set
 				deletedBranches.remove(branchName);

 				// determine last commit
 				String keyName = getBranchKey(branchName);
 				String lastCommit = config.getString(CONF_BRANCH, null, keyName);

 				List<RevCommit> revs;
 				if (StringUtils.isEmpty(lastCommit)) {
 					// new branch/unindexed branch, get all commits on branch
 					revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
 				} else {
 					// pre-existing branch, get changes since last commit
 					revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
 				}

 				if (revs.size() > 0) {
 					result.branchCount += 1;
 				}

 				// reverse the list of commits so we start with the first commit
 				Collections.reverse(revs);
 				for (RevCommit commit : revs) {
 					// index a commit
 					result.add(index(model.name, repository, branchName, commit));
 				}

 				// update the config
 				config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
 				config.setString(CONF_ALIAS, null, keyName, branchName);
 				config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
 				config.save();
 			}

 			// the deletedBranches set will normally be empty by this point
 			// unless a branch really was deleted and no longer exists
 			if (deletedBranches.size() > 0) {
 				for (String branch : deletedBranches) {
 					IndexWriter writer = getIndexWriter(model.name);
 					writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
 					writer.commit();
 				}
 			}
 			result.success = true;
 		} catch (Throwable t) {
 			logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
 		}
 		return result;
 	}

 	/**
 	 * Creates a Lucene document for a commit
 	 *
 	 * @param commit
 	 * @param tags
 	 * @return a Lucene document
 	 */
 	private Document createDocument(RevCommit commit, List<String> tags) {
 		Document doc = new Document();
 		doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), StringField.TYPE_STORED));
 		doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
 		doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
 				Resolution.MINUTE), StringField.TYPE_STORED));
 		doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
 		doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
 		doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), TextField.TYPE_STORED));
 		doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), TextField.TYPE_STORED));
 		if (!ArrayUtils.isEmpty(tags)) {
 			doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), TextField.TYPE_STORED));
 		}
 		return doc;
 	}

 	/**
 	 * Incrementally index an object for the repository.
 	 *
 	 * @param repositoryName
 	 * @param doc
 	 * @return true, if successful
 	 */
 	private boolean index(String repositoryName, Document doc) {
 		try {
 			IndexWriter writer = getIndexWriter(repositoryName);
 			writer.addDocument(doc);
 			writer.commit();
 			resetIndexSearcher(repositoryName);
 			return true;
 		} catch (Exception e) {
 			logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
 		}
 		return false;
 	}

 	private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
 		SearchResult result = new SearchResult();
 		result.hitId = hitId;
 		result.totalHits = totalHits;
 		result.score = score;
 		result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
 		result.summary = doc.get(FIELD_SUMMARY);
 		result.author = doc.get(FIELD_AUTHOR);
 		result.committer = doc.get(FIELD_COMMITTER);
 		result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
 		result.branch = doc.get(FIELD_BRANCH);
 		result.commitId = doc.get(FIELD_COMMIT);
 		result.path = doc.get(FIELD_PATH);
 		if (doc.get(FIELD_TAG) != null) {
 			result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
 		}
 		return result;
 	}

 	private synchronized void resetIndexSearcher(String repository) throws IOException {
 		IndexSearcher searcher = searchers.remove(repository);
 		if (searcher != null) {
 			searcher.getIndexReader().close();
 		}
 	}

 	/**
 	 * Gets an index searcher for the repository.
 	 *
 	 * @param repository
 	 * @return
 	 * @throws IOException
 	 */
 	private IndexSearcher getIndexSearcher(String repository) throws IOException {
 		IndexSearcher searcher = searchers.get(repository);
 		if (searcher == null) {
 			IndexWriter writer = getIndexWriter(repository);
 			searcher = new IndexSearcher(DirectoryReader.open(writer, true));
 			searchers.put(repository, searcher);
 		}
 		return searcher;
 	}

 	/**
 	 * Gets an index writer for the repository. The index will be created if it
 	 * does not already exist or if forceCreate is specified.
 	 *
 	 * @param repository
 	 * @return an IndexWriter
 	 * @throws IOException
 	 */
 	private IndexWriter getIndexWriter(String repository) throws IOException {
 		IndexWriter indexWriter = writers.get(repository);
 		File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
 		File indexFolder = new File(repositoryFolder, LUCENE_DIR);
 		Directory directory = FSDirectory.open(indexFolder);

 		if (indexWriter == null) {
 			if (!indexFolder.exists()) {
 				indexFolder.mkdirs();
 			}
 			StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
 			IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);
 			config.setOpenMode(OpenMode.CREATE_OR_APPEND);
 			indexWriter = new IndexWriter(directory, config);
 			writers.put(repository, indexWriter);
 		}
 		return indexWriter;
 	}

 	/**
 	 * Searches the specified repositories for the given text or query
 	 *
 	 * @param text
 	 *            if the text is null or empty, null is returned
 	 * @param page
 	 *            the page number to retrieve. page is 1-indexed.
 	 * @param pageSize
 	 *            the number of elements to return for this page
 	 * @param repositories
 	 *            a list of repositories to search. if no repositories are
 	 *            specified null is returned.
 	 * @return a list of SearchResults in order from highest to the lowest score
 	 *
 	 */
 	public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
 		if (ArrayUtils.isEmpty(repositories)) {
 			return null;
 		}
 		return search(text, page, pageSize, repositories.toArray(new String[0]));
 	}

 	/**
 	 * Searches the specified repositories for the given text or query
 	 *
 	 * @param text
 	 *            if the text is null or empty, null is returned
 	 * @param page
 	 *            the page number to retrieve. page is 1-indexed.
 	 * @param pageSize
 	 *            the number of elements to return for this page
 	 * @param repositories
 	 *            a list of repositories to search. if no repositories are
 	 *            specified null is returned.
 	 * @return a list of SearchResults in order from highest to the lowest score
 	 *
 	 */
 	public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
 		if (StringUtils.isEmpty(text)) {
 			return null;
 		}
 		if (ArrayUtils.isEmpty(repositories)) {
 			return null;
 		}
 		Set<SearchResult> results = new LinkedHashSet<SearchResult>();
 		StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
 		try {
 			// default search checks summary and content
 			BooleanQuery query = new BooleanQuery();
 			QueryParser qp;
 			qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
 			qp.setAllowLeadingWildcard(true);
 			query.add(qp.parse(text), Occur.SHOULD);

 			qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
 			qp.setAllowLeadingWildcard(true);
 			query.add(qp.parse(text), Occur.SHOULD);

 			IndexSearcher searcher;
 			if (repositories.length == 1) {
 				// single repository search
 				searcher = getIndexSearcher(repositories[0]);
 			} else {
 				// multiple repository search
 				List<IndexReader> readers = new ArrayList<IndexReader>();
 				for (String repository : repositories) {
 					IndexSearcher repositoryIndex = getIndexSearcher(repository);
 					readers.add(repositoryIndex.getIndexReader());
 				}
 				IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
 				MultiSourceReader reader = new MultiSourceReader(rdrs);
 				searcher = new IndexSearcher(reader);
 			}

 			Query rewrittenQuery = searcher.rewrite(query);
 			logger.debug(rewrittenQuery.toString());

 			TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
 			searcher.search(rewrittenQuery, collector);
 			int offset = Math.max(0, (page - 1) * pageSize);
 			ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
 			int totalHits = collector.getTotalHits();
 			for (int i = 0; i < hits.length; i++) {
 				int docId = hits[i].doc;
 				Document doc = searcher.doc(docId);
 				SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
 				if (repositories.length == 1) {
 					// single repository search
 					result.repository = repositories[0];
 				} else {
 					// multi-repository search
 					MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
 					int index = reader.getSourceIndex(docId);
 					result.repository = repositories[index];
 				}
 				String content = doc.get(FIELD_CONTENT);
 				result.fragment = getHighlightedFragment(analyzer, query, content, result);
 				results.add(result);
 			}
 		} catch (Exception e) {
 			logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
 		}
 		return new ArrayList<SearchResult>(results);
 	}

 	/**
 	 *
 	 * @param analyzer
 	 * @param query
 	 * @param content
 	 * @param result
 	 * @return
 	 * @throws IOException
 	 * @throws InvalidTokenOffsetsException
 	 */
 	private String getHighlightedFragment(Analyzer analyzer, Query query,
 			String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
 		if (content == null) {
 			content = "";
 		}

 		int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4);
 		int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;

 		QueryScorer scorer = new QueryScorer(query, "content");
 		Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);

 		// use an artificial delimiter for the token
 		String termTag = "!!--[";
 		String termTagEnd = "]--!!";
 		SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
 		Highlighter highlighter = new Highlighter(formatter, scorer);
 		highlighter.setTextFragmenter(fragmenter);

 		String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
 		if (ArrayUtils.isEmpty(fragments)) {
 			if (SearchObjectType.blob  == result.type) {
 				return "";
 			}
 			// clip commit message
 			String fragment = content;
 			if (fragment.length() > fragmentLength) {
 				fragment = fragment.substring(0, fragmentLength) + "...";
 			}
 			return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>";
 		}

 		// make sure we have unique fragments
 		Set<String> uniqueFragments = new LinkedHashSet<String>();
 		for (String fragment : fragments) {
 			uniqueFragments.add(fragment);
 		}
 		fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);

 		StringBuilder sb = new StringBuilder();
 		for (int i = 0, len = fragments.length; i < len; i++) {
 			String fragment = fragments[i];
 			String tag = "<pre class=\"text\">";

 			// resurrect the raw fragment from removing the artificial delimiters
 			String raw = fragment.replace(termTag, "").replace(termTagEnd, "");

 			// determine position of the raw fragment in the content
 			int pos = content.indexOf(raw);

 			// restore complete first line of fragment
 			int c = pos;
 			while (c > 0) {
 				c--;
 				if (content.charAt(c) == '\n') {
 					break;
 				}
 			}
 			if (c > 0) {
 				// inject leading chunk of first fragment line
 				fragment = content.substring(c + 1, pos) + fragment;
 			}

 			if (SearchObjectType.blob  == result.type) {
 				// count lines as offset into the content for this fragment
 				int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));

 				// create fragment tag with line number and language
 				String lang = "";
 				String ext = StringUtils.getFileExtension(result.path).toLowerCase();
 				if (!StringUtils.isEmpty(ext)) {
 					// maintain leading space!
 					lang = " lang-" + ext;
 				}
 				tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);

 			}

 			sb.append(tag);

 			// replace the artificial delimiter with html tags
 			String html = StringUtils.escapeForHtml(fragment, false);
 			html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
 			sb.append(html);
 			sb.append("</pre>");
 			if (i < len - 1) {
 				sb.append("<span class=\"ellipses\">...</span><br/>");
 			}
 		}
 		return sb.toString();
 	}

 	/**
 	 * Simple class to track the results of an index update.
 	 */
 	private class IndexResult {
 		long startTime = System.currentTimeMillis();
 		long endTime = startTime;
 		boolean success;
 		int branchCount;
 		int commitCount;
 		int blobCount;

 		void add(IndexResult result) {
 			this.branchCount += result.branchCount;
 			this.commitCount += result.commitCount;
 			this.blobCount += result.blobCount;
 		}

 		void success() {
 			success = true;
 			endTime = System.currentTimeMillis();
 		}

 		float duration() {
 			return (endTime - startTime)/1000f;
 		}
 	}

 	/**
 	 * Custom subclass of MultiReader to identify the source index for a given
 	 * doc id.  This would not be necessary of there was a public method to
 	 * obtain this information.
 	 *
 	 */
 	private class MultiSourceReader extends MultiReader {

 		MultiSourceReader(IndexReader [] readers) {
 			super(readers, false);
 		}

 		int getSourceIndex(int docId) {
 			int index = -1;
 			try {
 				index = super.readerIndex(docId);
 			} catch (Exception e) {
 				logger.error("Error getting source index", e);
 			}
 			return index;
 		}
 	}
 }