Fix GrAnnotation.getStringLength w/ multiple surrogate pairs
The existing version of GrAnnotation.getStringLength replaced only the _first_ surrogate pair in the given string, thus failing to count symbols in strings with more than one pair correctly.
The fix makes use of the fact that the string iterator iterates over Unicode code points not code units.
An alternative fix would be to add the `global` flag to the `REGEX_ASTRAL_SYMBOL` regex.
Release-Notes: Fix GrAnnotation.getStringLength w/ multiple surrogate pairs
Change-Id: Ie8c927f639ab9c89db18b1605133cc6a656ac308
diff --git a/polygerrit-ui/app/embed/diff/gr-diff-highlight/gr-annotation.ts b/polygerrit-ui/app/embed/diff/gr-diff-highlight/gr-annotation.ts
index cc7cd49..92175eb 100644
--- a/polygerrit-ui/app/embed/diff/gr-diff-highlight/gr-annotation.ts
+++ b/polygerrit-ui/app/embed/diff/gr-diff-highlight/gr-annotation.ts
@@ -22,8 +22,14 @@
return this.getStringLength(node.textContent || '');
},
+ /**
+ * Returns the number of Unicode code points in the given string
+ *
+ * This is not necessarily the same as the number of visible symbols.
+ * See https://mathiasbynens.be/notes/javascript-unicode for more details.
+ */
getStringLength(str: string) {
- return str.replace(REGEX_ASTRAL_SYMBOL, '_').length;
+ return [...str].length;
},
/**
diff --git a/polygerrit-ui/app/embed/diff/gr-diff-highlight/gr-annotation_test.ts b/polygerrit-ui/app/embed/diff/gr-diff-highlight/gr-annotation_test.ts
index 6c45f20..fdf1785 100644
--- a/polygerrit-ui/app/embed/diff/gr-diff-highlight/gr-annotation_test.ts
+++ b/polygerrit-ui/app/embed/diff/gr-diff-highlight/gr-annotation_test.ts
@@ -327,4 +327,20 @@
assert.equal(el.getAttribute('class'), 'hello world');
});
});
+
+ suite('getStringLength', () => {
+ test('ASCII characters are counted correctly', () => {
+ assert.equal(GrAnnotation.getStringLength('ASCII'), 5);
+ });
+
+ test('Unicode surrogate pairs count as one symbol', () => {
+ assert.equal(GrAnnotation.getStringLength('Unic💢de'), 7);
+ assert.equal(GrAnnotation.getStringLength('💢💢'), 2);
+ });
+
+ test('Grapheme clusters count as multiple symbols', () => {
+ assert.equal(GrAnnotation.getStringLength('man\u0303ana'), 7); // mañana
+ assert.equal(GrAnnotation.getStringLength('q\u0307\u0323'), 3); // q̣̇
+ });
+ });
});