blob: 0ac1af2a18013581ec872f4a4b7a1af535b6751c [file] [log] [blame]
// Copyright (C) 2009 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package com.google.gerrit.server.patch;
import com.google.gerrit.server.cache.EntryCreator;
import com.google.gerrit.server.config.ConfigUtil;
import com.google.gerrit.server.config.GerritServerConfig;
import com.google.inject.Inject;
import org.eclipse.jgit.diff.Edit;
import org.eclipse.jgit.diff.MyersDiff;
import org.eclipse.jgit.diff.ReplaceEdit;
import org.eclipse.jgit.lib.Config;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
class IntraLineLoader extends EntryCreator<IntraLineDiffKey, IntraLineDiff> {
private static final Logger log = LoggerFactory
.getLogger(IntraLineLoader.class);
private static final Pattern BLANK_LINE_RE = Pattern
.compile("^[ \\t]*(|[{}]|/\\*\\*?|\\*)[ \\t]*$");
private static final Pattern CONTROL_BLOCK_START_RE = Pattern
.compile("[{:][ \\t]*$");
private final BlockingQueue<Worker> workerPool;
private final long timeoutMillis;
@Inject
IntraLineLoader(final @GerritServerConfig Config cfg) {
final int workers =
cfg.getInt("cache", PatchListCacheImpl.INTRA_NAME, "maxIdleWorkers",
Runtime.getRuntime().availableProcessors() * 3 / 2);
workerPool = new ArrayBlockingQueue<Worker>(workers, true /* fair */);
timeoutMillis =
ConfigUtil.getTimeUnit(cfg, "cache", PatchListCacheImpl.INTRA_NAME,
"timeout", TimeUnit.MILLISECONDS.convert(5, TimeUnit.SECONDS),
TimeUnit.MILLISECONDS);
}
@Override
public IntraLineDiff createEntry(IntraLineDiffKey key) throws Exception {
Worker w = workerPool.poll();
if (w == null) {
w = new Worker();
}
Worker.Result r = w.computeWithTimeout(key, timeoutMillis);
if (r == Worker.Result.TIMEOUT) {
// Don't keep this thread. We have to murder it unsafely, which
// means its unable to be reused in the future. Return back a
// null result, indicating the cache cannot load this key.
//
return new IntraLineDiff(IntraLineDiff.Status.TIMEOUT);
}
if (!workerPool.offer(w)) {
// If the idle worker pool is full, terminate this thread.
//
w.end();
}
if (r.error != null) {
// If there was an error computing the result, carry it
// up to the caller so the cache knows this key is invalid.
//
throw r.error;
}
return r.diff;
}
private static class Worker {
private static final AtomicInteger count = new AtomicInteger(1);
private final ArrayBlockingQueue<Input> input;
private final ArrayBlockingQueue<Result> result;
private final Thread thread;
Worker() {
input = new ArrayBlockingQueue<Input>(1);
result = new ArrayBlockingQueue<Result>(1);
thread = new Thread(new Runnable() {
public void run() {
workerLoop();
}
});
thread.setName("IntraLineDiff-" + count.getAndIncrement());
thread.setDaemon(true);
thread.start();
}
Result computeWithTimeout(IntraLineDiffKey key, long timeoutMillis)
throws Exception {
if (!input.offer(new Input(key))) {
log.error("Cannot enqueue task to thread " + thread.getName());
return null;
}
Result r = result.poll(timeoutMillis, TimeUnit.MILLISECONDS);
if (r != null) {
return r;
} else {
log.warn(timeoutMillis + " ms timeout reached for IntraLineDiff"
+ " in project " + key.getProject().get() //
+ " on commit " + key.getCommit().name() //
+ " for path " + key.getPath() //
+ " comparing " + key.getBlobA().name() //
+ ".." + key.getBlobB().name() //
+ ". Killing " + thread.getName());
try {
thread.stop();
} catch (Throwable error) {
// Ignore any reason the thread won't stop.
log.error("Cannot stop runaway thread " + thread.getName(), error);
}
return Result.TIMEOUT;
}
}
void end() {
if (!input.offer(Input.END_THREAD)) {
log.error("Cannot gracefully stop thread " + thread.getName());
}
}
private void workerLoop() {
try {
for (;;) {
Input in;
try {
in = input.take();
} catch (InterruptedException e) {
log.error("Unexpected interrupt on " + thread.getName());
continue;
}
if (in == Input.END_THREAD) {
return;
}
Result r;
try {
r = new Result(IntraLineLoader.compute(in.key));
} catch (Exception error) {
r = new Result(error);
}
if (!result.offer(r)) {
log.error("Cannot return result from " + thread.getName());
}
}
} catch (ThreadDeath iHaveBeenShot) {
// Handle thread death by gracefully returning to the caller,
// allowing the thread to be destroyed.
}
}
private static class Input {
static final Input END_THREAD = new Input(null);
final IntraLineDiffKey key;
Input(IntraLineDiffKey key) {
this.key = key;
}
}
static class Result {
static final Result TIMEOUT = new Result((IntraLineDiff) null);
final IntraLineDiff diff;
final Exception error;
Result(IntraLineDiff diff) {
this.diff = diff;
this.error = null;
}
Result(Exception error) {
this.diff = null;
this.error = error;
}
}
}
private static IntraLineDiff compute(IntraLineDiffKey key) throws Exception {
List<Edit> edits = new ArrayList<Edit>(key.getEdits());
Text aContent = key.getTextA();
Text bContent = key.getTextB();
combineLineEdits(edits, aContent, bContent);
for (int i = 0; i < edits.size(); i++) {
Edit e = edits.get(i);
if (e.getType() == Edit.Type.REPLACE) {
CharText a = new CharText(aContent, e.getBeginA(), e.getEndA());
CharText b = new CharText(bContent, e.getBeginB(), e.getEndB());
CharTextComparator cmp = new CharTextComparator();
List<Edit> wordEdits = MyersDiff.INSTANCE.diff(cmp, a, b);
// Combine edits that are really close together. If they are
// just a few characters apart we tend to get better results
// by joining them together and taking the whole span.
//
for (int j = 0; j < wordEdits.size() - 1;) {
Edit c = wordEdits.get(j);
Edit n = wordEdits.get(j + 1);
if (n.getBeginA() - c.getEndA() <= 5
|| n.getBeginB() - c.getEndB() <= 5) {
int ab = c.getBeginA();
int ae = n.getEndA();
int bb = c.getBeginB();
int be = n.getEndB();
if (canCoalesce(a, c.getEndA(), n.getBeginA())
&& canCoalesce(b, c.getEndB(), n.getBeginB())) {
wordEdits.set(j, new Edit(ab, ae, bb, be));
wordEdits.remove(j + 1);
continue;
}
}
j++;
}
// Apply some simple rules to fix up some of the edits. Our
// logic above, along with our per-character difference tends
// to produce some crazy stuff.
//
for (int j = 0; j < wordEdits.size(); j++) {
Edit c = wordEdits.get(j);
int ab = c.getBeginA();
int ae = c.getEndA();
int bb = c.getBeginB();
int be = c.getEndB();
// Sometimes the diff generator produces an INSERT or DELETE
// right up against a REPLACE, but we only find this after
// we've also played some shifting games on the prior edit.
// If that happened to us, coalesce them together so we can
// correct this mess for the user. If we don't we wind up
// with silly stuff like "es" -> "es = Addresses".
//
if (1 < j) {
Edit p = wordEdits.get(j - 1);
if (p.getEndA() == ab || p.getEndB() == bb) {
if (p.getEndA() == ab && p.getBeginA() < p.getEndA()) {
ab = p.getBeginA();
}
if (p.getEndB() == bb && p.getBeginB() < p.getEndB()) {
bb = p.getBeginB();
}
wordEdits.remove(--j);
}
}
// We sometimes collapsed an edit together in a strange way,
// such that the edges of each text is identical. Fix by
// by dropping out that incorrectly replaced region.
//
while (ab < ae && bb < be && cmp.equals(a, ab, b, bb)) {
ab++;
bb++;
}
while (ab < ae && bb < be && cmp.equals(a, ae - 1, b, be - 1)) {
ae--;
be--;
}
// The leading part of an edit and its trailing part in the same
// text might be identical. Slide down that edit and use the tail
// rather than the leading bit. If however the edit is only on a
// whitespace block try to shift it to the left margin, assuming
// that it is an indentation change.
//
boolean aShift = true;
if (ab < ae && isOnlyWhitespace(a, ab, ae)) {
int lf = findLF(wordEdits, j, a, ab);
if (lf < ab && a.charAt(lf) == '\n') {
int nb = lf + 1;
int p = 0;
while (p < ae - ab) {
if (cmp.equals(a, ab + p, a, ab + p))
p++;
else
break;
}
if (p == ae - ab) {
ab = nb;
ae = nb + p;
aShift = false;
}
}
}
if (aShift) {
while (0 < ab && ab < ae && a.charAt(ab - 1) != '\n'
&& cmp.equals(a, ab - 1, a, ae - 1)) {
ab--;
ae--;
}
if (!a.isLineStart(ab) || !a.contains(ab, ae, '\n')) {
while (ab < ae && ae < a.size() && cmp.equals(a, ab, a, ae)) {
ab++;
ae++;
if (a.charAt(ae - 1) == '\n') {
break;
}
}
}
}
boolean bShift = true;
if (bb < be && isOnlyWhitespace(b, bb, be)) {
int lf = findLF(wordEdits, j, b, bb);
if (lf < bb && b.charAt(lf) == '\n') {
int nb = lf + 1;
int p = 0;
while (p < be - bb) {
if (cmp.equals(b, bb + p, b, bb + p))
p++;
else
break;
}
if (p == be - bb) {
bb = nb;
be = nb + p;
bShift = false;
}
}
}
if (bShift) {
while (0 < bb && bb < be && b.charAt(bb - 1) != '\n'
&& cmp.equals(b, bb - 1, b, be - 1)) {
bb--;
be--;
}
if (!b.isLineStart(bb) || !b.contains(bb, be, '\n')) {
while (bb < be && be < b.size() && cmp.equals(b, bb, b, be)) {
bb++;
be++;
if (b.charAt(be - 1) == '\n') {
break;
}
}
}
}
// If most of a line was modified except the LF was common, make
// the LF part of the modification region. This is easier to read.
//
if (ab < ae //
&& (ab == 0 || a.charAt(ab - 1) == '\n') //
&& ae < a.size() && a.charAt(ae) == '\n') {
ae++;
}
if (bb < be //
&& (bb == 0 || b.charAt(bb - 1) == '\n') //
&& be < b.size() && b.charAt(be) == '\n') {
be++;
}
wordEdits.set(j, new Edit(ab, ae, bb, be));
}
edits.set(i, new ReplaceEdit(e, wordEdits));
}
}
return new IntraLineDiff(edits);
}
private static void combineLineEdits(List<Edit> edits, Text a, Text b) {
for (int j = 0; j < edits.size() - 1;) {
Edit c = edits.get(j);
Edit n = edits.get(j + 1);
// Combine edits that are really close together. Right now our rule
// is, coalesce two line edits which are only one line apart if that
// common context line is either a "pointless line", or is identical
// on both sides and starts a new block of code. These are mostly
// block reindents to add or remove control flow operators.
//
final int ad = n.getBeginA() - c.getEndA();
final int bd = n.getBeginB() - c.getEndB();
if ((1 <= ad && isBlankLineGap(a, c.getEndA(), n.getBeginA()))
|| (1 <= bd && isBlankLineGap(b, c.getEndB(), n.getBeginB()))
|| (ad == 1 && bd == 1 && isControlBlockStart(a, c.getEndA()))) {
int ab = c.getBeginA();
int ae = n.getEndA();
int bb = c.getBeginB();
int be = n.getEndB();
edits.set(j, new Edit(ab, ae, bb, be));
edits.remove(j + 1);
continue;
}
j++;
}
}
private static boolean isBlankLineGap(Text a, int b, int e) {
for (; b < e; b++) {
if (!BLANK_LINE_RE.matcher(a.getString(b)).matches()) {
return false;
}
}
return true;
}
private static boolean isControlBlockStart(Text a, int idx) {
return CONTROL_BLOCK_START_RE.matcher(a.getString(idx)).find();
}
private static boolean canCoalesce(CharText a, int b, int e) {
while (b < e) {
if (a.charAt(b++) == '\n') {
return false;
}
}
return true;
}
private static int findLF(List<Edit> edits, int j, CharText t, int b) {
int lf = b;
int limit = 0 < j ? edits.get(j - 1).getEndB() : 0;
while (limit < lf && t.charAt(lf) != '\n') {
lf--;
}
return lf;
}
private static boolean isOnlyWhitespace(CharText t, final int b, final int e) {
for (int c = b; c < e; c++) {
if (!Character.isWhitespace(t.charAt(c))) {
return false;
}
}
return b < e;
}
}