Merge "Add a utility for processing the output of HighlightJS"
diff --git a/polygerrit-ui/app/utils/hljs-util.ts b/polygerrit-ui/app/utils/hljs-util.ts
new file mode 100644
index 0000000..1bd2072
--- /dev/null
+++ b/polygerrit-ui/app/utils/hljs-util.ts
@@ -0,0 +1,145 @@
+/**
+ * @license
+ * Copyright 2022 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * Utilities related to working with the HighlightJS syntax highlighting lib.
+ *
+ * Note that this utility is mostly used by the hljs-worker, which is a Web
+ * Worker and can thus not depend on document, the DOM or any related
+ * functionality.
+ */
+
+/**
+ * With these expressions you can match exactly what HighlightJS produces. It
+ * is really that simple:
+ * https://github.com/highlightjs/highlight.js/blob/main/src/lib/html_renderer.js
+ */
+const openingSpan = new RegExp('<span class="(.*?)">');
+const closingSpan = new RegExp('</span>');
+
+/** Can be used for `length` in SyntaxLayerRange. */
+const UNCLOSED = -1;
+
+/** Range of characters in a line to be syntax highlighted. */
+export interface SyntaxLayerRange {
+  /** 1-based inclusive. */
+  start: number;
+  /** Can only be UNCLOSED during processing. */
+  length: number;
+  /** HighlightJS specific names, e.g. 'literal'. */
+  className: string;
+}
+
+/**
+ * HighlightJS produces one long HTML string with HTML elements spanning
+ * multiple lines. <gr-diff> is line based, needs all elements closed at the end
+ * of the line, and is not interested in the HTML that HighlightJS produces.
+ *
+ * So we are splitting the HTML string up into lines and process them one by
+ * one. Each <span> is detected, converted into a SyntaxLayerRange and removed.
+ * Unclosed spans will be carried over to the next line.
+ */
+export function highlightedStringToRanges(
+  highlightedCode: string
+): SyntaxLayerRange[][] {
+  // What the function eventually returns.
+  const rangesPerLine: SyntaxLayerRange[][] = [];
+  // The unclosed ranges that are carried over from one line to the next.
+  let carryOverRanges: SyntaxLayerRange[] = [];
+
+  for (let line of highlightedCode.split('\n')) {
+    const ranges: SyntaxLayerRange[] = [...carryOverRanges];
+    carryOverRanges = [];
+    rangesPerLine.push(ranges);
+
+    // Remove all span tags one after another from left to right.
+    // For each opening <span ...> push a new (unclosed) range.
+    // For each closing </span> close the latest unclosed range.
+    let removal: SpanRemoval | undefined;
+    while ((removal = removeFirstSpan(line)) !== undefined) {
+      if (removal.type === SpanType.OPENING) {
+        ranges.push({
+          start: removal.offset,
+          length: UNCLOSED,
+          className: removal.class ?? '',
+        });
+      } else {
+        const unclosed = lastUnclosed(ranges);
+        unclosed.length = removal.offset - unclosed.start;
+      }
+      line = removal.lineAfter;
+    }
+
+    // All unclosed spans need to have the length set such that they extend to
+    // the end of the line. And they have to be carried over to the next line
+    // as cloned objects with start:0.
+    const lineLength = line.length;
+    for (const range of ranges) {
+      if (isUnclosed(range)) {
+        carryOverRanges.push({...range, start: 0});
+        range.length = lineLength - range.start;
+      }
+    }
+  }
+  if (carryOverRanges.length > 0) {
+    throw new Error('unclosed <span>s in highlighted code');
+  }
+  return rangesPerLine;
+}
+
+function isUnclosed(range: SyntaxLayerRange) {
+  return range.length === UNCLOSED;
+}
+
+function lastUnclosed(ranges: SyntaxLayerRange[]) {
+  const unclosed = [...ranges].reverse().find(isUnclosed);
+  if (!unclosed) throw new Error('no unclosed range found');
+  return unclosed;
+}
+
+/** Used for `type` in SpanRemoval. */
+export enum SpanType {
+  OPENING,
+  CLOSING,
+}
+
+/** Return type for removeFirstSpan(). */
+export interface SpanRemoval {
+  type: SpanType;
+  /** The line string after removing the matched span tag. */
+  lineAfter: string;
+  /** The matched css class for OPENING spans. undefined for CLOSING. */
+  class?: string;
+  /** At which char in the line did the removed span tag start? */
+  offset: number;
+}
+
+/**
+ * Finds the first <span ...> or </span>, removes it from the line and returns
+ * details about the removal. Returns `undefined`, if neither is found.
+ */
+export function removeFirstSpan(line: string): SpanRemoval | undefined {
+  const openingMatch = openingSpan.exec(line);
+  const openingIndex = openingMatch?.index ?? Number.MAX_VALUE;
+  const closingMatch = closingSpan.exec(line);
+  const closingIndex = closingMatch?.index ?? Number.MAX_VALUE;
+  if (openingIndex === Number.MAX_VALUE && closingIndex === Number.MAX_VALUE) {
+    return undefined;
+  }
+  const type =
+    openingIndex < closingIndex ? SpanType.OPENING : SpanType.CLOSING;
+  const offset = type === SpanType.OPENING ? openingIndex : closingIndex;
+  const match = type === SpanType.OPENING ? openingMatch : closingMatch;
+  if (match === null) return undefined;
+  const length = match[0].length;
+  const removal: SpanRemoval = {
+    type,
+    lineAfter: line.slice(0, offset) + line.slice(offset + length),
+    offset,
+    class: type === SpanType.OPENING ? match[1] : undefined,
+  };
+  return removal;
+}
diff --git a/polygerrit-ui/app/utils/hljs-util_test.ts b/polygerrit-ui/app/utils/hljs-util_test.ts
new file mode 100644
index 0000000..3c577ca
--- /dev/null
+++ b/polygerrit-ui/app/utils/hljs-util_test.ts
@@ -0,0 +1,162 @@
+/**
+ * @license
+ * Copyright 2022 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+import '../test/common-test-setup-karma';
+import './hljs-util';
+import {
+  highlightedStringToRanges,
+  removeFirstSpan,
+  SpanType,
+} from './hljs-util';
+
+suite('file hljs-util', () => {
+  suite('function removeFirstSpan()', () => {
+    test('no matches', async () => {
+      assert.isUndefined(removeFirstSpan(''));
+      assert.isUndefined(removeFirstSpan('span'));
+      assert.isUndefined(removeFirstSpan('<span>'));
+      assert.isUndefined(removeFirstSpan('</span'));
+      assert.isUndefined(removeFirstSpan('asdf'));
+    });
+
+    test('simple opening match', async () => {
+      const removal = removeFirstSpan('asdf<span class="c">asdf');
+      assert.deepEqual(removal, {
+        type: SpanType.OPENING,
+        lineAfter: 'asdfasdf',
+        class: 'c',
+        offset: 4,
+      });
+    });
+
+    test('simple closing match', async () => {
+      const removal = removeFirstSpan('asdf</span>asdf');
+      assert.deepEqual(removal, {
+        type: SpanType.CLOSING,
+        lineAfter: 'asdfasdf',
+        class: undefined,
+        offset: 4,
+      });
+    });
+  });
+
+  suite('function highlightedStringToRanges()', () => {
+    test('no ranges', async () => {
+      assert.deepEqual(highlightedStringToRanges(''), [[]]);
+      assert.deepEqual(highlightedStringToRanges('\n'), [[], []]);
+      assert.deepEqual(highlightedStringToRanges('asdf\nasdf\nasdf'), [
+        [],
+        [],
+        [],
+      ]);
+    });
+
+    test('one line, one span', async () => {
+      assert.deepEqual(
+        highlightedStringToRanges('asdf<span class="c">qwer</span>asdf'),
+        [[{start: 4, length: 4, className: 'c'}]]
+      );
+      assert.deepEqual(
+        highlightedStringToRanges('<span class="d">asdfqwer</span>'),
+        [[{start: 0, length: 8, className: 'd'}]]
+      );
+    });
+
+    test('one line, two spans one after another', async () => {
+      assert.deepEqual(
+        highlightedStringToRanges(
+          'asdf<span class="c">qwer</span>zxcv<span class="d">qwer</span>asdf'
+        ),
+        [
+          [
+            {start: 4, length: 4, className: 'c'},
+            {start: 12, length: 4, className: 'd'},
+          ],
+        ]
+      );
+    });
+
+    test('one line, two nested spans', async () => {
+      assert.deepEqual(
+        highlightedStringToRanges(
+          'asdf<span class="c">qwer<span class="d">zxcv</span>qwer</span>asdf'
+        ),
+        [
+          [
+            {start: 4, length: 12, className: 'c'},
+            {start: 8, length: 4, className: 'd'},
+          ],
+        ]
+      );
+    });
+
+    test('two lines, one span each', async () => {
+      assert.deepEqual(
+        highlightedStringToRanges(
+          'asdf<span class="c">qwer</span>asdf\n' +
+            'asd<span class="d">qwe</span>asd'
+        ),
+        [
+          [{start: 4, length: 4, className: 'c'}],
+          [{start: 3, length: 3, className: 'd'}],
+        ]
+      );
+    });
+
+    test('one span over two lines', async () => {
+      assert.deepEqual(
+        highlightedStringToRanges(
+          'asdf<span class="c">qwer\n' + 'asdf</span>qwer'
+        ),
+        [
+          [{start: 4, length: 4, className: 'c'}],
+          [{start: 0, length: 4, className: 'c'}],
+        ]
+      );
+    });
+
+    test('two spans over two lines', async () => {
+      assert.deepEqual(
+        highlightedStringToRanges(
+          'asdf<span class="c">qwer<span class="d">zxcv\n' +
+            'asdf</span>qwer</span>zxcv'
+        ),
+        [
+          [
+            {start: 4, length: 8, className: 'c'},
+            {start: 8, length: 4, className: 'd'},
+          ],
+          [
+            {start: 0, length: 8, className: 'c'},
+            {start: 0, length: 4, className: 'd'},
+          ],
+        ]
+      );
+    });
+
+    test('two spans over four lines', async () => {
+      assert.deepEqual(
+        highlightedStringToRanges(
+          'asdf<span class="c">qwer\n' +
+            'asdf<span class="d">qwer\n' +
+            'asdf</span>qwer\n' +
+            'asdf</span>qwer'
+        ),
+        [
+          [{start: 4, length: 4, className: 'c'}],
+          [
+            {start: 0, length: 8, className: 'c'},
+            {start: 4, length: 4, className: 'd'},
+          ],
+          [
+            {start: 0, length: 8, className: 'c'},
+            {start: 0, length: 4, className: 'd'},
+          ],
+          [{start: 0, length: 4, className: 'c'}],
+        ]
+      );
+    });
+  });
+});