Tweak the intraline difference heuristics

We had a few cases where the intraline difference was showing us
something confusing, like replacing characters in a common word
with other characters from a different word.

A bit of shifting the offsets after collapsing neighboring edits
resolves this nicely.

Change-Id: Ic947912277d96bf78afbc9831fb16e4b64e31fb1
Signed-off-by: Shawn O. Pearce <sop@google.com>
diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/patch/CharText.java b/gerrit-server/src/main/java/com/google/gerrit/server/patch/CharText.java
index ce2f7cc..c15c468 100644
--- a/gerrit-server/src/main/java/com/google/gerrit/server/patch/CharText.java
+++ b/gerrit-server/src/main/java/com/google/gerrit/server/patch/CharText.java
@@ -20,7 +20,11 @@
   private final String content;
 
   CharText(Text text, int s, int e) {
-    content = text.getLines(s, e);
+    content = text.getLines(s, e, false /* keep LF */);
+  }
+
+  char charAt(int idx) {
+    return content.charAt(idx);
   }
 
   @Override
diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/patch/PatchListCacheImpl.java b/gerrit-server/src/main/java/com/google/gerrit/server/patch/PatchListCacheImpl.java
index 8509ca1..be4eaab 100644
--- a/gerrit-server/src/main/java/com/google/gerrit/server/patch/PatchListCacheImpl.java
+++ b/gerrit-server/src/main/java/com/google/gerrit/server/patch/PatchListCacheImpl.java
@@ -229,23 +229,137 @@
         CharText b = new CharText(bContent, e.getBeginB(), e.getEndB());
 
         List<Edit> wordEdits = new MyersDiff(a, b).getEdits();
+
+        // Combine edits that are really close together. If they are
+        // just a few characters apart we tend to get better results
+        // by joining them together and taking the whole span.
+        //
         for (int j = 0; j < wordEdits.size() - 1;) {
           Edit c = wordEdits.get(j);
           Edit n = wordEdits.get(j + 1);
 
-          if (n.getBeginA() - c.getEndA() <= 2
-              || n.getBeginB() - c.getEndB() <= 2) {
-            // This edit is incredibly close to the start of the next.
-            // Combine them together.
-            //
-            wordEdits.set(j, new Edit(c.getBeginA(), n.getEndA(),
-                c.getBeginB(), n.getEndB()));
+          if (n.getBeginA() - c.getEndA() <= 5
+              || n.getBeginB() - c.getEndB() <= 5) {
+            int ab = c.getBeginA();
+            int ae = n.getEndA();
+
+            int bb = c.getBeginB();
+            int be = n.getEndB();
+
+            wordEdits.set(j, new Edit(ab, ae, bb, be));
             wordEdits.remove(j + 1);
             continue;
           }
 
           j++;
         }
+
+        // Apply some simple rules to fix up some of the edits. Our
+        // logic above, along with our per-character difference tends
+        // to produce some crazy stuff.
+        //
+        for (int j = 0; j < wordEdits.size(); j++) {
+          Edit c = wordEdits.get(j);
+          int ab = c.getBeginA();
+          int ae = c.getEndA();
+
+          int bb = c.getBeginB();
+          int be = c.getEndB();
+
+          // We sometimes collapsed an edit together in a strange way,
+          // such that the edges of each text is identical. Fix by
+          // by dropping out that incorrectly replaced region.
+          //
+          while (ab < ae && bb < be && a.equals(ab, b, bb)) {
+            ab++;
+            bb++;
+          }
+          while (ab < ae && bb < be && a.equals(ae - 1, b, be - 1)) {
+            ae--;
+            be--;
+          }
+
+          // The leading part of an edit and its trailing part in the same
+          // text might be identical. Slide down that edit and use the tail
+          // rather than the leading bit. If however the edit is only on a
+          // whitespace block try to shift it to the left margin, assuming
+          // that it is an indentation change.
+          //
+          boolean aShiftRight = true;
+          if (ab < ae && isOnlyWhitespace(a, ab, ae)) {
+            int lf = findLF(wordEdits, j, a, ab);
+            if (a.charAt(lf) == '\n') {
+              int nb = lf + 1;
+              int p = 0;
+              while (p < ae - ab) {
+                if (a.equals(ab + p, a, ab + p))
+                  p++;
+                else
+                  break;
+              }
+              if (p == ae - ab) {
+                ab = nb;
+                ae = nb + p;
+                aShiftRight = false;
+              }
+            }
+          }
+          if (aShiftRight) {
+            while (ab < ae && ae < a.size() && a.equals(ab, a, ae)) {
+              ab++;
+              ae++;
+              if (a.charAt(ae - 1) == '\n') {
+                break;
+              }
+            }
+          }
+
+          boolean bShiftRight = true;
+          if (bb < be && isOnlyWhitespace(b, bb, be)) {
+            int lf = findLF(wordEdits, j, b, bb);
+            if (b.charAt(lf) == '\n') {
+              int nb = lf + 1;
+              int p = 0;
+              while (p < be - bb) {
+                if (b.equals(bb + p, b, bb + p))
+                  p++;
+                else
+                  break;
+              }
+              if (p == be - bb) {
+                bb = nb;
+                be = nb + p;
+                bShiftRight = false;
+              }
+            }
+          }
+          if (bShiftRight) {
+            while (bb < be && be < b.size() && b.equals(bb, b, be)) {
+              bb++;
+              be++;
+              if (b.charAt(be - 1) == '\n') {
+                break;
+              }
+            }
+          }
+
+          // If most of a line was modified except the LF was common, make
+          // the LF part of the modification region. This is easier to read.
+          //
+          if (ab < ae //
+              && (ab == 0 || a.charAt(ab - 1) == '\n') //
+              && ae < a.size() && a.charAt(ae) == '\n') {
+            ae++;
+          }
+          if (bb < be //
+              && (bb == 0 || b.charAt(bb - 1) == '\n') //
+              && be < b.size() && b.charAt(be) == '\n') {
+            be++;
+          }
+
+          wordEdits.set(j, new Edit(ab, ae, bb, be));
+        }
+
         edits.set(i, new ReplaceEdit(e, wordEdits));
       }
     }
@@ -253,6 +367,24 @@
     return new PatchListEntry(fileHeader, edits);
   }
 
+  private static int findLF(List<Edit> edits, int j, CharText t, int b) {
+    int lf = b;
+    int limit = 0 < j ? edits.get(j - 1).getEndB() : 0;
+    while (limit < lf && t.charAt(lf) != '\n') {
+      lf--;
+    }
+    return lf;
+  }
+
+  private static boolean isOnlyWhitespace(CharText t, final int b, final int e) {
+    for (int c = b; c < e; c++) {
+      if (!Character.isWhitespace(t.charAt(c))) {
+        return false;
+      }
+    }
+    return b < e;
+  }
+
   private static Text read(Repository repo, String path, RevTree tree)
       throws IOException {
     TreeWalk tw = TreeWalk.forPath(repo, path, tree);
diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/patch/Text.java b/gerrit-server/src/main/java/com/google/gerrit/server/patch/Text.java
index f89acbd..502c7a6 100644
--- a/gerrit-server/src/main/java/com/google/gerrit/server/patch/Text.java
+++ b/gerrit-server/src/main/java/com/google/gerrit/server/patch/Text.java
@@ -52,17 +52,17 @@
   }
 
   public String getLine(final int i) {
-    return getLines(i, i + 1);
+    return getLines(i, i + 1, true);
   }
 
-  public String getLines(final int begin, final int end) {
+  public String getLines(final int begin, final int end, boolean dropLF) {
     if (begin == end) {
       return "";
     }
 
     final int s = getLineStart(begin);
     int e = getLineEnd(end - 1);
-    if (content[e - 1] == '\n') {
+    if (dropLF && content[e - 1] == '\n') {
       e--;
     }
     return decode(s, e);