Merge "Add a utility for processing the output of HighlightJS"
diff --git a/polygerrit-ui/app/utils/hljs-util.ts b/polygerrit-ui/app/utils/hljs-util.ts
new file mode 100644
index 0000000..1bd2072
--- /dev/null
+++ b/polygerrit-ui/app/utils/hljs-util.ts
@@ -0,0 +1,145 @@
+/**
+ * @license
+ * Copyright 2022 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * Utilities related to working with the HighlightJS syntax highlighting lib.
+ *
+ * Note that this utility is mostly used by the hljs-worker, which is a Web
+ * Worker and can thus not depend on document, the DOM or any related
+ * functionality.
+ */
+
+/**
+ * With these expressions you can match exactly what HighlightJS produces. It
+ * is really that simple:
+ * https://github.com/highlightjs/highlight.js/blob/main/src/lib/html_renderer.js
+ */
+const openingSpan = new RegExp('<span class="(.*?)">');
+const closingSpan = new RegExp('</span>');
+
+/** Can be used for `length` in SyntaxLayerRange. */
+const UNCLOSED = -1;
+
+/** Range of characters in a line to be syntax highlighted. */
+export interface SyntaxLayerRange {
+ /** 1-based inclusive. */
+ start: number;
+ /** Can only be UNCLOSED during processing. */
+ length: number;
+ /** HighlightJS specific names, e.g. 'literal'. */
+ className: string;
+}
+
+/**
+ * HighlightJS produces one long HTML string with HTML elements spanning
+ * multiple lines. <gr-diff> is line based, needs all elements closed at the end
+ * of the line, and is not interested in the HTML that HighlightJS produces.
+ *
+ * So we are splitting the HTML string up into lines and process them one by
+ * one. Each <span> is detected, converted into a SyntaxLayerRange and removed.
+ * Unclosed spans will be carried over to the next line.
+ */
+export function highlightedStringToRanges(
+ highlightedCode: string
+): SyntaxLayerRange[][] {
+ // What the function eventually returns.
+ const rangesPerLine: SyntaxLayerRange[][] = [];
+ // The unclosed ranges that are carried over from one line to the next.
+ let carryOverRanges: SyntaxLayerRange[] = [];
+
+ for (let line of highlightedCode.split('\n')) {
+ const ranges: SyntaxLayerRange[] = [...carryOverRanges];
+ carryOverRanges = [];
+ rangesPerLine.push(ranges);
+
+ // Remove all span tags one after another from left to right.
+ // For each opening <span ...> push a new (unclosed) range.
+ // For each closing </span> close the latest unclosed range.
+ let removal: SpanRemoval | undefined;
+ while ((removal = removeFirstSpan(line)) !== undefined) {
+ if (removal.type === SpanType.OPENING) {
+ ranges.push({
+ start: removal.offset,
+ length: UNCLOSED,
+ className: removal.class ?? '',
+ });
+ } else {
+ const unclosed = lastUnclosed(ranges);
+ unclosed.length = removal.offset - unclosed.start;
+ }
+ line = removal.lineAfter;
+ }
+
+ // All unclosed spans need to have the length set such that they extend to
+ // the end of the line. And they have to be carried over to the next line
+ // as cloned objects with start:0.
+ const lineLength = line.length;
+ for (const range of ranges) {
+ if (isUnclosed(range)) {
+ carryOverRanges.push({...range, start: 0});
+ range.length = lineLength - range.start;
+ }
+ }
+ }
+ if (carryOverRanges.length > 0) {
+ throw new Error('unclosed <span>s in highlighted code');
+ }
+ return rangesPerLine;
+}
+
+function isUnclosed(range: SyntaxLayerRange) {
+ return range.length === UNCLOSED;
+}
+
+function lastUnclosed(ranges: SyntaxLayerRange[]) {
+ const unclosed = [...ranges].reverse().find(isUnclosed);
+ if (!unclosed) throw new Error('no unclosed range found');
+ return unclosed;
+}
+
+/** Used for `type` in SpanRemoval. */
+export enum SpanType {
+ OPENING,
+ CLOSING,
+}
+
+/** Return type for removeFirstSpan(). */
+export interface SpanRemoval {
+ type: SpanType;
+ /** The line string after removing the matched span tag. */
+ lineAfter: string;
+ /** The matched css class for OPENING spans. undefined for CLOSING. */
+ class?: string;
+ /** At which char in the line did the removed span tag start? */
+ offset: number;
+}
+
+/**
+ * Finds the first <span ...> or </span>, removes it from the line and returns
+ * details about the removal. Returns `undefined`, if neither is found.
+ */
+export function removeFirstSpan(line: string): SpanRemoval | undefined {
+ const openingMatch = openingSpan.exec(line);
+ const openingIndex = openingMatch?.index ?? Number.MAX_VALUE;
+ const closingMatch = closingSpan.exec(line);
+ const closingIndex = closingMatch?.index ?? Number.MAX_VALUE;
+ if (openingIndex === Number.MAX_VALUE && closingIndex === Number.MAX_VALUE) {
+ return undefined;
+ }
+ const type =
+ openingIndex < closingIndex ? SpanType.OPENING : SpanType.CLOSING;
+ const offset = type === SpanType.OPENING ? openingIndex : closingIndex;
+ const match = type === SpanType.OPENING ? openingMatch : closingMatch;
+ if (match === null) return undefined;
+ const length = match[0].length;
+ const removal: SpanRemoval = {
+ type,
+ lineAfter: line.slice(0, offset) + line.slice(offset + length),
+ offset,
+ class: type === SpanType.OPENING ? match[1] : undefined,
+ };
+ return removal;
+}
diff --git a/polygerrit-ui/app/utils/hljs-util_test.ts b/polygerrit-ui/app/utils/hljs-util_test.ts
new file mode 100644
index 0000000..3c577ca
--- /dev/null
+++ b/polygerrit-ui/app/utils/hljs-util_test.ts
@@ -0,0 +1,162 @@
+/**
+ * @license
+ * Copyright 2022 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+import '../test/common-test-setup-karma';
+import './hljs-util';
+import {
+ highlightedStringToRanges,
+ removeFirstSpan,
+ SpanType,
+} from './hljs-util';
+
+suite('file hljs-util', () => {
+ suite('function removeFirstSpan()', () => {
+ test('no matches', async () => {
+ assert.isUndefined(removeFirstSpan(''));
+ assert.isUndefined(removeFirstSpan('span'));
+ assert.isUndefined(removeFirstSpan('<span>'));
+ assert.isUndefined(removeFirstSpan('</span'));
+ assert.isUndefined(removeFirstSpan('asdf'));
+ });
+
+ test('simple opening match', async () => {
+ const removal = removeFirstSpan('asdf<span class="c">asdf');
+ assert.deepEqual(removal, {
+ type: SpanType.OPENING,
+ lineAfter: 'asdfasdf',
+ class: 'c',
+ offset: 4,
+ });
+ });
+
+ test('simple closing match', async () => {
+ const removal = removeFirstSpan('asdf</span>asdf');
+ assert.deepEqual(removal, {
+ type: SpanType.CLOSING,
+ lineAfter: 'asdfasdf',
+ class: undefined,
+ offset: 4,
+ });
+ });
+ });
+
+ suite('function highlightedStringToRanges()', () => {
+ test('no ranges', async () => {
+ assert.deepEqual(highlightedStringToRanges(''), [[]]);
+ assert.deepEqual(highlightedStringToRanges('\n'), [[], []]);
+ assert.deepEqual(highlightedStringToRanges('asdf\nasdf\nasdf'), [
+ [],
+ [],
+ [],
+ ]);
+ });
+
+ test('one line, one span', async () => {
+ assert.deepEqual(
+ highlightedStringToRanges('asdf<span class="c">qwer</span>asdf'),
+ [[{start: 4, length: 4, className: 'c'}]]
+ );
+ assert.deepEqual(
+ highlightedStringToRanges('<span class="d">asdfqwer</span>'),
+ [[{start: 0, length: 8, className: 'd'}]]
+ );
+ });
+
+ test('one line, two spans one after another', async () => {
+ assert.deepEqual(
+ highlightedStringToRanges(
+ 'asdf<span class="c">qwer</span>zxcv<span class="d">qwer</span>asdf'
+ ),
+ [
+ [
+ {start: 4, length: 4, className: 'c'},
+ {start: 12, length: 4, className: 'd'},
+ ],
+ ]
+ );
+ });
+
+ test('one line, two nested spans', async () => {
+ assert.deepEqual(
+ highlightedStringToRanges(
+ 'asdf<span class="c">qwer<span class="d">zxcv</span>qwer</span>asdf'
+ ),
+ [
+ [
+ {start: 4, length: 12, className: 'c'},
+ {start: 8, length: 4, className: 'd'},
+ ],
+ ]
+ );
+ });
+
+ test('two lines, one span each', async () => {
+ assert.deepEqual(
+ highlightedStringToRanges(
+ 'asdf<span class="c">qwer</span>asdf\n' +
+ 'asd<span class="d">qwe</span>asd'
+ ),
+ [
+ [{start: 4, length: 4, className: 'c'}],
+ [{start: 3, length: 3, className: 'd'}],
+ ]
+ );
+ });
+
+ test('one span over two lines', async () => {
+ assert.deepEqual(
+ highlightedStringToRanges(
+ 'asdf<span class="c">qwer\n' + 'asdf</span>qwer'
+ ),
+ [
+ [{start: 4, length: 4, className: 'c'}],
+ [{start: 0, length: 4, className: 'c'}],
+ ]
+ );
+ });
+
+ test('two spans over two lines', async () => {
+ assert.deepEqual(
+ highlightedStringToRanges(
+ 'asdf<span class="c">qwer<span class="d">zxcv\n' +
+ 'asdf</span>qwer</span>zxcv'
+ ),
+ [
+ [
+ {start: 4, length: 8, className: 'c'},
+ {start: 8, length: 4, className: 'd'},
+ ],
+ [
+ {start: 0, length: 8, className: 'c'},
+ {start: 0, length: 4, className: 'd'},
+ ],
+ ]
+ );
+ });
+
+ test('two spans over four lines', async () => {
+ assert.deepEqual(
+ highlightedStringToRanges(
+ 'asdf<span class="c">qwer\n' +
+ 'asdf<span class="d">qwer\n' +
+ 'asdf</span>qwer\n' +
+ 'asdf</span>qwer'
+ ),
+ [
+ [{start: 4, length: 4, className: 'c'}],
+ [
+ {start: 0, length: 8, className: 'c'},
+ {start: 4, length: 4, className: 'd'},
+ ],
+ [
+ {start: 0, length: 8, className: 'c'},
+ {start: 0, length: 4, className: 'd'},
+ ],
+ [{start: 0, length: 4, className: 'c'}],
+ ]
+ );
+ });
+ });
+});