Fix token highlighting after surrogate pairs
This solves the same bug for token highlighting that https://gerrit-review.git.corp.google.com/c/gerrit/+/352754 solved for intraline diff highlights.
Release-Notes: Fix token highlighting after surrogate pairs
Change-Id: Ia1f80d5bf2c65d4c3a35d6031ccb49f7a619a8ba
diff --git a/polygerrit-ui/app/embed/diff/gr-diff-builder/token-highlight-layer.ts b/polygerrit-ui/app/embed/diff/gr-diff-builder/token-highlight-layer.ts
index 1e5dd65..e9076aa 100644
--- a/polygerrit-ui/app/embed/diff/gr-diff-builder/token-highlight-layer.ts
+++ b/polygerrit-ui/app/embed/diff/gr-diff-builder/token-highlight-layer.ts
@@ -139,11 +139,17 @@
let atLeastOneTokenMatched = false;
while ((match = tokenMatcher.exec(text))) {
const token = match[0];
- const index = match.index;
- const length = token.length;
+
// Binary files encoded as text for example can have super long lines
// with super long tokens. Let's guard against this scenario.
- if (length > TOKEN_LENGTH_LIMIT) continue;
+ if (token.length > TOKEN_LENGTH_LIMIT) continue;
+
+ // This is to correctly count surrogate pairs in text and token.
+ // If the index calculation becomes a hotspot, we could precompute a code
+ // unit to code point index map for text before iterating over the results
+ const index = GrAnnotation.getStringLength(text.slice(0, match.index));
+ const length = GrAnnotation.getStringLength(token);
+
atLeastOneTokenMatched = true;
const highlightTypeClass =
token === this.currentHighlight ? CSS_HIGHLIGHT : '';
@@ -339,7 +345,7 @@
start_line: line,
start_column: index + 1, // 1-based inclusive
end_line: line,
- end_column: index + token.length, // 1-based inclusive
+ end_column: index + GrAnnotation.getStringLength(token), // 1-based inclusive
};
this.tokenHighlightListener({token, element, side, range});
}
diff --git a/polygerrit-ui/app/embed/diff/gr-diff-builder/token-highlight-layer_test.ts b/polygerrit-ui/app/embed/diff/gr-diff-builder/token-highlight-layer_test.ts
index 75c7908..8fd03bb 100644
--- a/polygerrit-ui/app/embed/diff/gr-diff-builder/token-highlight-layer_test.ts
+++ b/polygerrit-ui/app/embed/diff/gr-diff-builder/token-highlight-layer_test.ts
@@ -143,6 +143,33 @@
});
});
+ test('annotate adds css tokens w/ emojis', () => {
+ const annotateElementStub = sinon.stub(GrAnnotation, 'annotateElement');
+ const el = createLine('these 💩 are 👨👩👧👦 words');
+
+ annotate(el);
+
+ assert.isTrue(annotateElementStub.calledThrice);
+ assertAnnotation(annotateElementStub.args[0], {
+ parent: el,
+ offset: 0,
+ length: 5,
+ cssClass: 'tk-text-these tk-index-0 token ',
+ });
+ assertAnnotation(annotateElementStub.args[1], {
+ parent: el,
+ offset: 8,
+ length: 3,
+ cssClass: 'tk-text-are tk-index-8 token ',
+ });
+ assertAnnotation(annotateElementStub.args[2], {
+ parent: el,
+ offset: 20,
+ length: 5,
+ cssClass: 'tk-text-words tk-index-20 token ',
+ });
+ });
+
test('annotate adds mouse handlers', () => {
const el = createLine('these are words');
const addEventListenerStub = sinon.stub(el, 'addEventListener');