| // Copyright (C) 2009 The Android Open Source Project |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| |
| package com.google.gerrit.server.patch; |
| |
| import com.google.common.base.Throwables; |
| import com.google.gerrit.server.config.ConfigUtil; |
| import com.google.gerrit.server.config.GerritServerConfig; |
| import com.google.inject.assistedinject.Assisted; |
| import com.google.inject.assistedinject.AssistedInject; |
| import java.util.List; |
| import java.util.concurrent.Callable; |
| import java.util.concurrent.ExecutionException; |
| import java.util.concurrent.ExecutorService; |
| import java.util.concurrent.Future; |
| import java.util.concurrent.TimeUnit; |
| import java.util.concurrent.TimeoutException; |
| import java.util.regex.Pattern; |
| import org.eclipse.jgit.diff.Edit; |
| import org.eclipse.jgit.diff.MyersDiff; |
| import org.eclipse.jgit.diff.ReplaceEdit; |
| import org.eclipse.jgit.lib.Config; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| class IntraLineLoader implements Callable<IntraLineDiff> { |
| static final Logger log = LoggerFactory.getLogger(IntraLineLoader.class); |
| |
| interface Factory { |
| IntraLineLoader create(IntraLineDiffKey key, IntraLineDiffArgs args); |
| } |
| |
| private static final Pattern BLANK_LINE_RE = |
| Pattern.compile("^[ \\t]*(|[{}]|/\\*\\*?|\\*)[ \\t]*$"); |
| |
| private static final Pattern CONTROL_BLOCK_START_RE = Pattern.compile("[{:][ \\t]*$"); |
| |
| private final ExecutorService diffExecutor; |
| private final long timeoutMillis; |
| private final IntraLineDiffKey key; |
| private final IntraLineDiffArgs args; |
| |
| @AssistedInject |
| IntraLineLoader( |
| @DiffExecutor ExecutorService diffExecutor, |
| @GerritServerConfig Config cfg, |
| @Assisted IntraLineDiffKey key, |
| @Assisted IntraLineDiffArgs args) { |
| this.diffExecutor = diffExecutor; |
| timeoutMillis = |
| ConfigUtil.getTimeUnit( |
| cfg, |
| "cache", |
| PatchListCacheImpl.INTRA_NAME, |
| "timeout", |
| TimeUnit.MILLISECONDS.convert(5, TimeUnit.SECONDS), |
| TimeUnit.MILLISECONDS); |
| this.key = key; |
| this.args = args; |
| } |
| |
| @Override |
| public IntraLineDiff call() throws Exception { |
| Future<IntraLineDiff> result = |
| diffExecutor.submit( |
| new Callable<IntraLineDiff>() { |
| @Override |
| public IntraLineDiff call() throws Exception { |
| return IntraLineLoader.compute(args.aText(), args.bText(), args.edits()); |
| } |
| }); |
| try { |
| return result.get(timeoutMillis, TimeUnit.MILLISECONDS); |
| } catch (InterruptedException | TimeoutException e) { |
| log.warn( |
| timeoutMillis |
| + " ms timeout reached for IntraLineDiff" |
| + " in project " |
| + args.project() |
| + " on commit " |
| + args.commit().name() |
| + " for path " |
| + args.path() |
| + " comparing " |
| + key.getBlobA().name() |
| + ".." |
| + key.getBlobB().name()); |
| result.cancel(true); |
| return new IntraLineDiff(IntraLineDiff.Status.TIMEOUT); |
| } catch (ExecutionException e) { |
| // If there was an error computing the result, carry it |
| // up to the caller so the cache knows this key is invalid. |
| Throwables.throwIfInstanceOf(e.getCause(), Exception.class); |
| throw new Exception(e.getMessage(), e.getCause()); |
| } |
| } |
| |
| static IntraLineDiff compute(Text aText, Text bText, List<Edit> edits) throws Exception { |
| combineLineEdits(edits, aText, bText); |
| |
| for (int i = 0; i < edits.size(); i++) { |
| Edit e = edits.get(i); |
| |
| if (e.getType() == Edit.Type.REPLACE) { |
| CharText a = new CharText(aText, e.getBeginA(), e.getEndA()); |
| CharText b = new CharText(bText, e.getBeginB(), e.getEndB()); |
| CharTextComparator cmp = new CharTextComparator(); |
| |
| List<Edit> wordEdits = MyersDiff.INSTANCE.diff(cmp, a, b); |
| |
| // Combine edits that are really close together. If they are |
| // just a few characters apart we tend to get better results |
| // by joining them together and taking the whole span. |
| // |
| for (int j = 0; j < wordEdits.size() - 1; ) { |
| Edit c = wordEdits.get(j); |
| Edit n = wordEdits.get(j + 1); |
| |
| if (n.getBeginA() - c.getEndA() <= 5 || n.getBeginB() - c.getEndB() <= 5) { |
| int ab = c.getBeginA(); |
| int ae = n.getEndA(); |
| |
| int bb = c.getBeginB(); |
| int be = n.getEndB(); |
| |
| if (canCoalesce(a, c.getEndA(), n.getBeginA()) |
| && canCoalesce(b, c.getEndB(), n.getBeginB())) { |
| wordEdits.set(j, new Edit(ab, ae, bb, be)); |
| wordEdits.remove(j + 1); |
| continue; |
| } |
| } |
| |
| j++; |
| } |
| |
| // Apply some simple rules to fix up some of the edits. Our |
| // logic above, along with our per-character difference tends |
| // to produce some crazy stuff. |
| // |
| for (int j = 0; j < wordEdits.size(); j++) { |
| Edit c = wordEdits.get(j); |
| int ab = c.getBeginA(); |
| int ae = c.getEndA(); |
| |
| int bb = c.getBeginB(); |
| int be = c.getEndB(); |
| |
| // Sometimes the diff generator produces an INSERT or DELETE |
| // right up against a REPLACE, but we only find this after |
| // we've also played some shifting games on the prior edit. |
| // If that happened to us, coalesce them together so we can |
| // correct this mess for the user. If we don't we wind up |
| // with silly stuff like "es" -> "es = Addresses". |
| // |
| if (1 < j) { |
| Edit p = wordEdits.get(j - 1); |
| if (p.getEndA() == ab || p.getEndB() == bb) { |
| if (p.getEndA() == ab && p.getBeginA() < p.getEndA()) { |
| ab = p.getBeginA(); |
| } |
| if (p.getEndB() == bb && p.getBeginB() < p.getEndB()) { |
| bb = p.getBeginB(); |
| } |
| wordEdits.remove(--j); |
| } |
| } |
| |
| // We sometimes collapsed an edit together in a strange way, |
| // such that the edges of each text is identical. Fix by |
| // by dropping out that incorrectly replaced region. |
| // |
| while (ab < ae && bb < be && cmp.equals(a, ab, b, bb)) { |
| ab++; |
| bb++; |
| } |
| while (ab < ae && bb < be && cmp.equals(a, ae - 1, b, be - 1)) { |
| ae--; |
| be--; |
| } |
| |
| // The leading part of an edit and its trailing part in the same |
| // text might be identical. Slide down that edit and use the tail |
| // rather than the leading bit. |
| // |
| while (0 < ab |
| && ab < ae |
| && a.charAt(ab - 1) != '\n' |
| && cmp.equals(a, ab - 1, a, ae - 1)) { |
| ab--; |
| ae--; |
| } |
| if (!a.isLineStart(ab) || !a.contains(ab, ae, '\n')) { |
| while (ab < ae && ae < a.size() && cmp.equals(a, ab, a, ae)) { |
| ab++; |
| ae++; |
| if (a.charAt(ae - 1) == '\n') { |
| break; |
| } |
| } |
| } |
| |
| while (0 < bb |
| && bb < be |
| && b.charAt(bb - 1) != '\n' |
| && cmp.equals(b, bb - 1, b, be - 1)) { |
| bb--; |
| be--; |
| } |
| if (!b.isLineStart(bb) || !b.contains(bb, be, '\n')) { |
| while (bb < be && be < b.size() && cmp.equals(b, bb, b, be)) { |
| bb++; |
| be++; |
| if (b.charAt(be - 1) == '\n') { |
| break; |
| } |
| } |
| } |
| |
| // If most of a line was modified except the LF was common, make |
| // the LF part of the modification region. This is easier to read. |
| // |
| if (ab < ae // |
| && (ab == 0 || a.charAt(ab - 1) == '\n') // |
| && ae < a.size() |
| && a.charAt(ae - 1) != '\n' |
| && a.charAt(ae) == '\n') { |
| ae++; |
| } |
| if (bb < be // |
| && (bb == 0 || b.charAt(bb - 1) == '\n') // |
| && be < b.size() |
| && b.charAt(be - 1) != '\n' |
| && b.charAt(be) == '\n') { |
| be++; |
| } |
| |
| wordEdits.set(j, new Edit(ab, ae, bb, be)); |
| } |
| |
| edits.set(i, new ReplaceEdit(e, wordEdits)); |
| } |
| } |
| |
| return new IntraLineDiff(edits); |
| } |
| |
| private static void combineLineEdits(List<Edit> edits, Text a, Text b) { |
| for (int j = 0; j < edits.size() - 1; ) { |
| Edit c = edits.get(j); |
| Edit n = edits.get(j + 1); |
| |
| // Combine edits that are really close together. Right now our rule |
| // is, coalesce two line edits which are only one line apart if that |
| // common context line is either a "pointless line", or is identical |
| // on both sides and starts a new block of code. These are mostly |
| // block reindents to add or remove control flow operators. |
| // |
| final int ad = n.getBeginA() - c.getEndA(); |
| final int bd = n.getBeginB() - c.getEndB(); |
| if ((1 <= ad && isBlankLineGap(a, c.getEndA(), n.getBeginA())) |
| || (1 <= bd && isBlankLineGap(b, c.getEndB(), n.getBeginB())) |
| || (ad == 1 && bd == 1 && isControlBlockStart(a, c.getEndA()))) { |
| int ab = c.getBeginA(); |
| int ae = n.getEndA(); |
| |
| int bb = c.getBeginB(); |
| int be = n.getEndB(); |
| |
| edits.set(j, new Edit(ab, ae, bb, be)); |
| edits.remove(j + 1); |
| continue; |
| } |
| |
| j++; |
| } |
| } |
| |
| private static boolean isBlankLineGap(Text a, int b, int e) { |
| for (; b < e; b++) { |
| if (!BLANK_LINE_RE.matcher(a.getString(b)).matches()) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| private static boolean isControlBlockStart(Text a, int idx) { |
| return CONTROL_BLOCK_START_RE.matcher(a.getString(idx)).find(); |
| } |
| |
| private static boolean canCoalesce(CharText a, int b, int e) { |
| while (b < e) { |
| if (a.charAt(b++) == '\n') { |
| return false; |
| } |
| } |
| return true; |
| } |
| } |