// Copyright (C) 2016 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.gitiles.doc;

import com.google.gitiles.doc.html.HtmlBuilder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.commonmark.Extension;
import org.commonmark.node.AbstractVisitor;
import org.commonmark.node.HardLineBreak;
import org.commonmark.node.HtmlBlock;
import org.commonmark.node.HtmlInline;
import org.commonmark.node.Node;
import org.commonmark.node.ThematicBreak;
import org.commonmark.parser.Parser;
import org.commonmark.parser.Parser.ParserExtension;
import org.commonmark.parser.PostProcessor;

/**
 * Convert some {@link HtmlInline} and {@link HtmlBlock} to safe types.
 *
 * <p>Gitiles style Markdown accepts only a very small subset of HTML that is safe for use within
 * the document. This {@code PostProcessor} scans parsed nodes and converts them to safer types for
 * rendering:
 *
 * <ul>
 *   <li>{@link HardLineBreak}
 *   <li>{@link ThematicBreak}
 *   <li>{@link NamedAnchor}
 *   <li>{@link IframeBlock}
 * </ul>
 */
public class GitilesHtmlExtension implements ParserExtension {
  private static final Pattern BREAK = Pattern.compile("<(hr|br)\\s*/?>", Pattern.CASE_INSENSITIVE);

  private static final Pattern ANCHOR_OPEN =
      Pattern.compile("<a\\s+name=([\"'])([^\"'\\s]+)\\1>", Pattern.CASE_INSENSITIVE);
  private static final Pattern ANCHOR_CLOSE = Pattern.compile("</[aA]>");

  private static final Pattern IFRAME_OPEN =
      Pattern.compile("<iframe\\s+", Pattern.CASE_INSENSITIVE);
  private static final Pattern IFRAME_CLOSE =
      Pattern.compile("(?:/?>|</iframe>)", Pattern.CASE_INSENSITIVE);

  private static final Pattern ATTR =
      Pattern.compile(
          "\\s+([a-z-]+)\\s*=\\s*([^\\s\"'=<>`]+|'[^']*'|\"[^\"]*\")", Pattern.CASE_INSENSITIVE);

  public static Extension create() {
    return new GitilesHtmlExtension();
  }

  private GitilesHtmlExtension() {}

  @Override
  public void extend(Parser.Builder builder) {
    builder.postProcessor(new HtmlProcessor());
  }

  private static class HtmlProcessor implements PostProcessor {
    @Override
    public Node process(Node node) {
      node.accept(new HtmlVisitor());
      return node;
    }
  }

  private static class HtmlVisitor extends AbstractVisitor {
    @Override
    public void visit(HtmlInline node) {
      inline(node);
    }

    @Override
    public void visit(HtmlBlock node) {
      block(node);
    }
  }

  private static void inline(HtmlInline curr) {
    String html = curr.getLiteral();
    Matcher m = BREAK.matcher(html);
    if (m.matches()) {
      switch (m.group(1).toLowerCase()) {
        case "br":
          curr.insertAfter(new HardLineBreak());
          curr.unlink();
          return;

        case "hr":
          curr.insertAfter(new ThematicBreak());
          curr.unlink();
          return;
      }
    }

    m = ANCHOR_OPEN.matcher(html);
    if (m.matches()) {
      String name = m.group(2);
      Node next = curr.getNext();

      // HtmlInline{<a name="id">}HtmlInline{</a>}
      if (isAnchorClose(next)) {
        next.unlink();

        NamedAnchor anchor = new NamedAnchor();
        anchor.setName(name);
        curr.insertAfter(anchor);
        curr.unlink();
        MarkdownUtil.trimPreviousWhitespace(anchor);
        return;
      }
    }
  }

  private static boolean isAnchorClose(Node n) {
    return n instanceof HtmlInline && ANCHOR_CLOSE.matcher(((HtmlInline) n).getLiteral()).matches();
  }

  private static void block(HtmlBlock curr) {
    String html = curr.getLiteral();
    Matcher m = IFRAME_OPEN.matcher(html);
    if (m.find()) {
      int start = m.end() - 1 /* leave whitespace */;
      m = IFRAME_CLOSE.matcher(html.substring(start));
      if (m.find()) {
        int end = start + m.start();
        IframeBlock f = iframe(html.substring(start, end));
        if (f != null) {
          curr.insertAfter(f);
          curr.unlink();
          return;
        }
      }
    }
  }

  private static IframeBlock iframe(String html) {
    IframeBlock iframe = new IframeBlock();
    Matcher m = ATTR.matcher(html);
    while (m.find()) {
      String att = m.group(1).toLowerCase();
      String val = attributeValue(m);
      switch (att) {
        case "src":
          if (!HtmlBuilder.isValidHttpUri(val)) {
            return null;
          }
          iframe.src = val;
          break;

        case "height":
          if (!HtmlBuilder.isValidCssDimension(val)) {
            return null;
          }
          iframe.height = val;
          break;

        case "width":
          if (!HtmlBuilder.isValidCssDimension(val)) {
            return null;
          }
          iframe.width = val;
          break;

        case "frameborder":
          iframe.border = !"0".equals(val);
          break;
      }
    }
    return iframe.src != null ? iframe : null;
  }

  private static String attributeValue(Matcher m) {
    String val = m.group(2);
    if (val.length() >= 2 && (val.charAt(0) == '\'' || val.charAt(0) == '"')) {
      // Capture group includes the opening and closing quotation marks if the
      // attribute value was quoted in the source document. Trim these.
      val = val.substring(1, val.length() - 1);
    }
    return val;
  }
}
