| // Copyright (C) 2009 The Android Open Source Project |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| |
| package com.google.gerrit.server.patch; |
| |
| import com.google.common.base.Throwables; |
| import com.google.common.cache.CacheLoader; |
| import com.google.gerrit.server.config.ConfigUtil; |
| import com.google.gerrit.server.config.GerritServerConfig; |
| import com.google.inject.Inject; |
| |
| import org.eclipse.jgit.diff.Edit; |
| import org.eclipse.jgit.diff.MyersDiff; |
| import org.eclipse.jgit.diff.ReplaceEdit; |
| import org.eclipse.jgit.lib.Config; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.concurrent.Callable; |
| import java.util.concurrent.ExecutionException; |
| import java.util.concurrent.ExecutorService; |
| import java.util.concurrent.Future; |
| import java.util.concurrent.TimeUnit; |
| import java.util.concurrent.TimeoutException; |
| import java.util.regex.Pattern; |
| |
| class IntraLineLoader extends CacheLoader<IntraLineDiffKey, IntraLineDiff> { |
| static final Logger log = LoggerFactory.getLogger(IntraLineLoader.class); |
| |
| private static final Pattern BLANK_LINE_RE = Pattern |
| .compile("^[ \\t]*(|[{}]|/\\*\\*?|\\*)[ \\t]*$"); |
| |
| private static final Pattern CONTROL_BLOCK_START_RE = Pattern |
| .compile("[{:][ \\t]*$"); |
| |
| private final ExecutorService diffExecutor; |
| private final long timeoutMillis; |
| |
| @Inject |
| IntraLineLoader(@DiffExecutor ExecutorService diffExecutor, |
| @GerritServerConfig Config cfg) { |
| this.diffExecutor = diffExecutor; |
| timeoutMillis = |
| ConfigUtil.getTimeUnit(cfg, "cache", PatchListCacheImpl.INTRA_NAME, |
| "timeout", TimeUnit.MILLISECONDS.convert(5, TimeUnit.SECONDS), |
| TimeUnit.MILLISECONDS); |
| } |
| |
| @Override |
| public IntraLineDiff load(final IntraLineDiffKey key) throws Exception { |
| Future<IntraLineDiff> result = diffExecutor.submit(new Callable<IntraLineDiff>() { |
| @Override |
| public IntraLineDiff call() throws Exception { |
| return IntraLineLoader.compute(key); |
| } |
| }); |
| try { |
| return result.get(timeoutMillis, TimeUnit.MILLISECONDS); |
| } catch (InterruptedException | TimeoutException e) { |
| log.warn(timeoutMillis + " ms timeout reached for IntraLineDiff" |
| + " in project " + key.getProject().get() |
| + " on commit " + key.getCommit().name() |
| + " for path " + key.getPath() |
| + " comparing " + key.getBlobA().name() |
| + ".." + key.getBlobB().name()); |
| result.cancel(true); |
| return new IntraLineDiff(IntraLineDiff.Status.TIMEOUT); |
| } catch (ExecutionException e) { |
| // If there was an error computing the result, carry it |
| // up to the caller so the cache knows this key is invalid. |
| Throwables.propagateIfInstanceOf(e.getCause(), Exception.class); |
| throw new Exception(e.getMessage(), e.getCause()); |
| } |
| } |
| |
| static IntraLineDiff compute(IntraLineDiffKey key) throws Exception { |
| List<Edit> edits = new ArrayList<>(key.getEdits()); |
| Text aContent = key.getTextA(); |
| Text bContent = key.getTextB(); |
| combineLineEdits(edits, aContent, bContent); |
| |
| for (int i = 0; i < edits.size(); i++) { |
| Edit e = edits.get(i); |
| |
| if (e.getType() == Edit.Type.REPLACE) { |
| CharText a = new CharText(aContent, e.getBeginA(), e.getEndA()); |
| CharText b = new CharText(bContent, e.getBeginB(), e.getEndB()); |
| CharTextComparator cmp = new CharTextComparator(); |
| |
| List<Edit> wordEdits = MyersDiff.INSTANCE.diff(cmp, a, b); |
| |
| // Combine edits that are really close together. If they are |
| // just a few characters apart we tend to get better results |
| // by joining them together and taking the whole span. |
| // |
| for (int j = 0; j < wordEdits.size() - 1;) { |
| Edit c = wordEdits.get(j); |
| Edit n = wordEdits.get(j + 1); |
| |
| if (n.getBeginA() - c.getEndA() <= 5 |
| || n.getBeginB() - c.getEndB() <= 5) { |
| int ab = c.getBeginA(); |
| int ae = n.getEndA(); |
| |
| int bb = c.getBeginB(); |
| int be = n.getEndB(); |
| |
| if (canCoalesce(a, c.getEndA(), n.getBeginA()) |
| && canCoalesce(b, c.getEndB(), n.getBeginB())) { |
| wordEdits.set(j, new Edit(ab, ae, bb, be)); |
| wordEdits.remove(j + 1); |
| continue; |
| } |
| } |
| |
| j++; |
| } |
| |
| // Apply some simple rules to fix up some of the edits. Our |
| // logic above, along with our per-character difference tends |
| // to produce some crazy stuff. |
| // |
| for (int j = 0; j < wordEdits.size(); j++) { |
| Edit c = wordEdits.get(j); |
| int ab = c.getBeginA(); |
| int ae = c.getEndA(); |
| |
| int bb = c.getBeginB(); |
| int be = c.getEndB(); |
| |
| // Sometimes the diff generator produces an INSERT or DELETE |
| // right up against a REPLACE, but we only find this after |
| // we've also played some shifting games on the prior edit. |
| // If that happened to us, coalesce them together so we can |
| // correct this mess for the user. If we don't we wind up |
| // with silly stuff like "es" -> "es = Addresses". |
| // |
| if (1 < j) { |
| Edit p = wordEdits.get(j - 1); |
| if (p.getEndA() == ab || p.getEndB() == bb) { |
| if (p.getEndA() == ab && p.getBeginA() < p.getEndA()) { |
| ab = p.getBeginA(); |
| } |
| if (p.getEndB() == bb && p.getBeginB() < p.getEndB()) { |
| bb = p.getBeginB(); |
| } |
| wordEdits.remove(--j); |
| } |
| } |
| |
| // We sometimes collapsed an edit together in a strange way, |
| // such that the edges of each text is identical. Fix by |
| // by dropping out that incorrectly replaced region. |
| // |
| while (ab < ae && bb < be && cmp.equals(a, ab, b, bb)) { |
| ab++; |
| bb++; |
| } |
| while (ab < ae && bb < be && cmp.equals(a, ae - 1, b, be - 1)) { |
| ae--; |
| be--; |
| } |
| |
| // The leading part of an edit and its trailing part in the same |
| // text might be identical. Slide down that edit and use the tail |
| // rather than the leading bit. If however the edit is only on a |
| // whitespace block try to shift it to the left margin, assuming |
| // that it is an indentation change. |
| // |
| boolean aShift = true; |
| if (ab < ae && isOnlyWhitespace(a, ab, ae)) { |
| int lf = findLF(wordEdits, j, a, ab); |
| if (lf < ab && a.charAt(lf) == '\n') { |
| int nb = lf + 1; |
| int p = 0; |
| while (p < ae - ab) { |
| if (cmp.equals(a, ab + p, a, ab + p)) { |
| p++; |
| } else { |
| break; |
| } |
| } |
| if (p == ae - ab) { |
| ab = nb; |
| ae = nb + p; |
| aShift = false; |
| } |
| } |
| } |
| if (aShift) { |
| while (0 < ab && ab < ae && a.charAt(ab - 1) != '\n' |
| && cmp.equals(a, ab - 1, a, ae - 1)) { |
| ab--; |
| ae--; |
| } |
| if (!a.isLineStart(ab) || !a.contains(ab, ae, '\n')) { |
| while (ab < ae && ae < a.size() && cmp.equals(a, ab, a, ae)) { |
| ab++; |
| ae++; |
| if (a.charAt(ae - 1) == '\n') { |
| break; |
| } |
| } |
| } |
| } |
| |
| boolean bShift = true; |
| if (bb < be && isOnlyWhitespace(b, bb, be)) { |
| int lf = findLF(wordEdits, j, b, bb); |
| if (lf < bb && b.charAt(lf) == '\n') { |
| int nb = lf + 1; |
| int p = 0; |
| while (p < be - bb) { |
| if (cmp.equals(b, bb + p, b, bb + p)) { |
| p++; |
| } else { |
| break; |
| } |
| } |
| if (p == be - bb) { |
| bb = nb; |
| be = nb + p; |
| bShift = false; |
| } |
| } |
| } |
| if (bShift) { |
| while (0 < bb && bb < be && b.charAt(bb - 1) != '\n' |
| && cmp.equals(b, bb - 1, b, be - 1)) { |
| bb--; |
| be--; |
| } |
| if (!b.isLineStart(bb) || !b.contains(bb, be, '\n')) { |
| while (bb < be && be < b.size() && cmp.equals(b, bb, b, be)) { |
| bb++; |
| be++; |
| if (b.charAt(be - 1) == '\n') { |
| break; |
| } |
| } |
| } |
| } |
| |
| // If most of a line was modified except the LF was common, make |
| // the LF part of the modification region. This is easier to read. |
| // |
| if (ab < ae // |
| && (ab == 0 || a.charAt(ab - 1) == '\n') // |
| && ae < a.size() && a.charAt(ae - 1) != '\n' |
| && a.charAt(ae) == '\n') { |
| ae++; |
| } |
| if (bb < be // |
| && (bb == 0 || b.charAt(bb - 1) == '\n') // |
| && be < b.size() && b.charAt(be - 1) != '\n' |
| && b.charAt(be) == '\n') { |
| be++; |
| } |
| |
| wordEdits.set(j, new Edit(ab, ae, bb, be)); |
| } |
| |
| edits.set(i, new ReplaceEdit(e, wordEdits)); |
| } |
| } |
| |
| return new IntraLineDiff(edits); |
| } |
| |
| private static void combineLineEdits(List<Edit> edits, Text a, Text b) { |
| for (int j = 0; j < edits.size() - 1;) { |
| Edit c = edits.get(j); |
| Edit n = edits.get(j + 1); |
| |
| // Combine edits that are really close together. Right now our rule |
| // is, coalesce two line edits which are only one line apart if that |
| // common context line is either a "pointless line", or is identical |
| // on both sides and starts a new block of code. These are mostly |
| // block reindents to add or remove control flow operators. |
| // |
| final int ad = n.getBeginA() - c.getEndA(); |
| final int bd = n.getBeginB() - c.getEndB(); |
| if ((1 <= ad && isBlankLineGap(a, c.getEndA(), n.getBeginA())) |
| || (1 <= bd && isBlankLineGap(b, c.getEndB(), n.getBeginB())) |
| || (ad == 1 && bd == 1 && isControlBlockStart(a, c.getEndA()))) { |
| int ab = c.getBeginA(); |
| int ae = n.getEndA(); |
| |
| int bb = c.getBeginB(); |
| int be = n.getEndB(); |
| |
| edits.set(j, new Edit(ab, ae, bb, be)); |
| edits.remove(j + 1); |
| continue; |
| } |
| |
| j++; |
| } |
| } |
| |
| private static boolean isBlankLineGap(Text a, int b, int e) { |
| for (; b < e; b++) { |
| if (!BLANK_LINE_RE.matcher(a.getString(b)).matches()) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| private static boolean isControlBlockStart(Text a, int idx) { |
| return CONTROL_BLOCK_START_RE.matcher(a.getString(idx)).find(); |
| } |
| |
| private static boolean canCoalesce(CharText a, int b, int e) { |
| while (b < e) { |
| if (a.charAt(b++) == '\n') { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| private static int findLF(List<Edit> edits, int j, CharText t, int b) { |
| int lf = b; |
| int limit = 0 < j ? edits.get(j - 1).getEndB() : 0; |
| while (limit < lf && t.charAt(lf) != '\n') { |
| lf--; |
| } |
| return lf; |
| } |
| |
| private static boolean isOnlyWhitespace(CharText t, final int b, final int e) { |
| for (int c = b; c < e; c++) { |
| if (!Character.isWhitespace(t.charAt(c))) { |
| return false; |
| } |
| } |
| return b < e; |
| } |
| } |