| /* |
| * Copyright 2013 gitblit.com. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package com.gitblit.wicket; |
| |
| import static org.pegdown.FastEncoder.encode; |
| |
| import java.io.Serializable; |
| import java.io.StringWriter; |
| import java.io.UnsupportedEncodingException; |
| import java.net.URLEncoder; |
| import java.text.MessageFormat; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| |
| import org.apache.wicket.Page; |
| import org.apache.wicket.RequestCycle; |
| import org.eclipse.jgit.lib.Repository; |
| import org.eclipse.jgit.revwalk.RevCommit; |
| import org.eclipse.mylyn.wikitext.confluence.core.ConfluenceLanguage; |
| import org.eclipse.mylyn.wikitext.core.parser.Attributes; |
| import org.eclipse.mylyn.wikitext.core.parser.MarkupParser; |
| import org.eclipse.mylyn.wikitext.core.parser.builder.HtmlDocumentBuilder; |
| import org.eclipse.mylyn.wikitext.core.parser.markup.MarkupLanguage; |
| import org.eclipse.mylyn.wikitext.mediawiki.core.MediaWikiLanguage; |
| import org.eclipse.mylyn.wikitext.textile.core.TextileLanguage; |
| import org.eclipse.mylyn.wikitext.tracwiki.core.TracWikiLanguage; |
| import org.eclipse.mylyn.wikitext.twiki.core.TWikiLanguage; |
| import org.pegdown.DefaultVerbatimSerializer; |
| import org.pegdown.LinkRenderer; |
| import org.pegdown.ToHtmlSerializer; |
| import org.pegdown.VerbatimSerializer; |
| import org.pegdown.ast.ExpImageNode; |
| import org.pegdown.ast.RefImageNode; |
| import org.pegdown.ast.WikiLinkNode; |
| import org.pegdown.plugins.ToHtmlSerializerPlugin; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import com.gitblit.IStoredSettings; |
| import com.gitblit.Keys; |
| import com.gitblit.models.PathModel; |
| import com.gitblit.servlet.RawServlet; |
| import com.gitblit.utils.JGitUtils; |
| import com.gitblit.utils.MarkdownUtils; |
| import com.gitblit.utils.StringUtils; |
| import com.gitblit.utils.XssFilter; |
| import com.gitblit.wicket.pages.DocPage; |
| import com.google.common.base.Joiner; |
| |
| /** |
| * Processes markup content and generates html with repository-relative page and |
| * image linking. |
| * |
| * @author James Moger |
| * |
| */ |
| public class MarkupProcessor { |
| |
| public enum MarkupSyntax { |
| PLAIN, MARKDOWN, TWIKI, TRACWIKI, TEXTILE, MEDIAWIKI, CONFLUENCE |
| } |
| |
| private Logger logger = LoggerFactory.getLogger(getClass()); |
| |
| private final IStoredSettings settings; |
| |
| private final XssFilter xssFilter; |
| |
| public static List<String> getMarkupExtensions(IStoredSettings settings) { |
| List<String> list = new ArrayList<String>(); |
| list.addAll(settings.getStrings(Keys.web.confluenceExtensions)); |
| list.addAll(settings.getStrings(Keys.web.markdownExtensions)); |
| list.addAll(settings.getStrings(Keys.web.mediawikiExtensions)); |
| list.addAll(settings.getStrings(Keys.web.textileExtensions)); |
| list.addAll(settings.getStrings(Keys.web.tracwikiExtensions)); |
| list.addAll(settings.getStrings(Keys.web.twikiExtensions)); |
| return list; |
| } |
| |
| public MarkupProcessor(IStoredSettings settings, XssFilter xssFilter) { |
| this.settings = settings; |
| this.xssFilter = xssFilter; |
| } |
| |
| public List<String> getMarkupExtensions() { |
| return getMarkupExtensions(settings); |
| } |
| |
| public List<String> getAllExtensions() { |
| List<String> list = getMarkupExtensions(settings); |
| list.add("txt"); |
| list.add("TXT"); |
| return list; |
| } |
| |
| private List<String> getRoots() { |
| return settings.getStrings(Keys.web.documents); |
| } |
| |
| private String [] getEncodings() { |
| return settings.getStrings(Keys.web.blobEncodings).toArray(new String[0]); |
| } |
| |
| private MarkupSyntax determineSyntax(String documentPath) { |
| String ext = StringUtils.getFileExtension(documentPath).toLowerCase(); |
| if (StringUtils.isEmpty(ext)) { |
| return MarkupSyntax.PLAIN; |
| } |
| |
| if (settings.getStrings(Keys.web.confluenceExtensions).contains(ext)) { |
| return MarkupSyntax.CONFLUENCE; |
| } else if (settings.getStrings(Keys.web.markdownExtensions).contains(ext)) { |
| return MarkupSyntax.MARKDOWN; |
| } else if (settings.getStrings(Keys.web.mediawikiExtensions).contains(ext)) { |
| return MarkupSyntax.MEDIAWIKI; |
| } else if (settings.getStrings(Keys.web.textileExtensions).contains(ext)) { |
| return MarkupSyntax.TEXTILE; |
| } else if (settings.getStrings(Keys.web.tracwikiExtensions).contains(ext)) { |
| return MarkupSyntax.TRACWIKI; |
| } else if (settings.getStrings(Keys.web.twikiExtensions).contains(ext)) { |
| return MarkupSyntax.TWIKI; |
| } |
| |
| return MarkupSyntax.PLAIN; |
| } |
| |
| public boolean hasRootDocs(Repository r) { |
| List<String> roots = getRoots(); |
| List<String> extensions = getAllExtensions(); |
| List<PathModel> paths = JGitUtils.getFilesInPath(r, null, null); |
| for (PathModel path : paths) { |
| if (!path.isTree()) { |
| String ext = StringUtils.getFileExtension(path.name).toLowerCase(); |
| String name = StringUtils.stripFileExtension(path.name).toLowerCase(); |
| |
| if (roots.contains(name)) { |
| if (StringUtils.isEmpty(ext) || extensions.contains(ext)) { |
| return true; |
| } |
| } |
| } |
| } |
| return false; |
| } |
| |
| public List<MarkupDocument> getRootDocs(Repository r, String repositoryName, String commitId) { |
| List<String> roots = getRoots(); |
| List<MarkupDocument> list = getDocs(r, repositoryName, commitId, roots); |
| return list; |
| } |
| |
| public MarkupDocument getReadme(Repository r, String repositoryName, String commitId) { |
| List<MarkupDocument> list = getDocs(r, repositoryName, commitId, Arrays.asList("readme")); |
| if (list.isEmpty()) { |
| return null; |
| } |
| return list.get(0); |
| } |
| |
| private List<MarkupDocument> getDocs(Repository r, String repositoryName, String commitId, List<String> names) { |
| List<String> extensions = getAllExtensions(); |
| String [] encodings = getEncodings(); |
| Map<String, MarkupDocument> map = new HashMap<String, MarkupDocument>(); |
| RevCommit commit = JGitUtils.getCommit(r, commitId); |
| List<PathModel> paths = JGitUtils.getFilesInPath(r, null, commit); |
| for (PathModel path : paths) { |
| if (!path.isTree()) { |
| String ext = StringUtils.getFileExtension(path.name).toLowerCase(); |
| String name = StringUtils.stripFileExtension(path.name).toLowerCase(); |
| |
| if (names.contains(name)) { |
| if (StringUtils.isEmpty(ext) || extensions.contains(ext)) { |
| String markup = JGitUtils.getStringContent(r, commit.getTree(), path.name, encodings); |
| MarkupDocument doc = parse(repositoryName, commitId, path.name, markup); |
| map.put(name, doc); |
| } |
| } |
| } |
| } |
| // return document list in requested order |
| List<MarkupDocument> list = new ArrayList<MarkupDocument>(); |
| for (String name : names) { |
| if (map.containsKey(name)) { |
| list.add(map.get(name)); |
| } |
| } |
| return list; |
| } |
| |
| public MarkupDocument parse(String repositoryName, String commitId, String documentPath, String markupText) { |
| final MarkupSyntax syntax = determineSyntax(documentPath); |
| final MarkupDocument doc = new MarkupDocument(documentPath, markupText, syntax); |
| |
| if (markupText != null) { |
| try { |
| switch (syntax){ |
| case CONFLUENCE: |
| parse(doc, repositoryName, commitId, new ConfluenceLanguage()); |
| break; |
| case MARKDOWN: |
| parse(doc, repositoryName, commitId); |
| break; |
| case MEDIAWIKI: |
| parse(doc, repositoryName, commitId, new MediaWikiLanguage()); |
| break; |
| case TEXTILE: |
| parse(doc, repositoryName, commitId, new TextileLanguage()); |
| break; |
| case TRACWIKI: |
| parse(doc, repositoryName, commitId, new TracWikiLanguage()); |
| break; |
| case TWIKI: |
| parse(doc, repositoryName, commitId, new TWikiLanguage()); |
| break; |
| default: |
| doc.html = MarkdownUtils.transformPlainText(markupText); |
| break; |
| } |
| } catch (Exception e) { |
| logger.error("failed to transform " + syntax, e); |
| } |
| } |
| |
| if (doc.html == null) { |
| // failed to transform markup |
| if (markupText == null) { |
| markupText = String.format("Document <b>%1$s</b> not found in <em>%2$s</em>", documentPath, repositoryName); |
| } |
| markupText = MessageFormat.format("<div class=\"alert alert-error\"><strong>{0}:</strong> {1}</div>{2}", "Error", "failed to parse markup", markupText); |
| doc.html = StringUtils.breakLinesForHtml(markupText); |
| } |
| |
| return doc; |
| } |
| |
| /** |
| * Parses the markup using the specified markup language |
| * |
| * @param doc |
| * @param repositoryName |
| * @param commitId |
| * @param lang |
| */ |
| private void parse(final MarkupDocument doc, final String repositoryName, final String commitId, MarkupLanguage lang) { |
| StringWriter writer = new StringWriter(); |
| HtmlDocumentBuilder builder = new HtmlDocumentBuilder(writer) { |
| |
| @Override |
| public void image(Attributes attributes, String imagePath) { |
| String url; |
| if (imagePath.indexOf("://") == -1) { |
| // relative image |
| String path = doc.getRelativePath(imagePath); |
| String contextUrl = RequestCycle.get().getRequest().getRelativePathPrefixToContextRoot(); |
| url = RawServlet.asLink(contextUrl, repositoryName, commitId, path); |
| } else { |
| // absolute image |
| url = imagePath; |
| } |
| super.image(attributes, url); |
| } |
| |
| @Override |
| public void link(Attributes attributes, String hrefOrHashName, String text) { |
| String url; |
| if (hrefOrHashName.charAt(0) != '#') { |
| if (hrefOrHashName.indexOf("://") == -1) { |
| // relative link |
| String path = doc.getRelativePath(hrefOrHashName); |
| url = getWicketUrl(DocPage.class, repositoryName, commitId, path); |
| } else { |
| // absolute link |
| url = hrefOrHashName; |
| } |
| } else { |
| // page-relative hash link |
| url = hrefOrHashName; |
| } |
| super.link(attributes, url, text); |
| } |
| }; |
| |
| // avoid the <html> and <body> tags |
| builder.setEmitAsDocument(false); |
| |
| MarkupParser parser = new MarkupParser(lang); |
| parser.setBuilder(builder); |
| parser.parse(doc.markup); |
| |
| final String content = writer.toString(); |
| final String safeContent = xssFilter.relaxed(content); |
| |
| doc.html = safeContent; |
| } |
| |
| /** |
| * Parses the document as Markdown using Pegdown. |
| * |
| * @param doc |
| * @param repositoryName |
| * @param commitId |
| */ |
| private void parse(final MarkupDocument doc, final String repositoryName, final String commitId) { |
| LinkRenderer renderer = new LinkRenderer() { |
| |
| @Override |
| public Rendering render(ExpImageNode node, String text) { |
| if (node.url.indexOf("://") == -1) { |
| // repository-relative image link |
| String path = doc.getRelativePath(node.url); |
| String contextUrl = RequestCycle.get().getRequest().getRelativePathPrefixToContextRoot(); |
| String url = RawServlet.asLink(contextUrl, repositoryName, commitId, path); |
| return new Rendering(url, text); |
| } |
| // absolute image link |
| return new Rendering(node.url, text); |
| } |
| |
| @Override |
| public Rendering render(RefImageNode node, String url, String title, String alt) { |
| Rendering rendering; |
| if (url.indexOf("://") == -1) { |
| // repository-relative image link |
| String path = doc.getRelativePath(url); |
| String contextUrl = RequestCycle.get().getRequest().getRelativePathPrefixToContextRoot(); |
| String wurl = RawServlet.asLink(contextUrl, repositoryName, commitId, path); |
| rendering = new Rendering(wurl, alt); |
| } else { |
| // absolute image link |
| rendering = new Rendering(url, alt); |
| } |
| return StringUtils.isEmpty(title) ? rendering : rendering.withAttribute("title", encode(title)); |
| } |
| |
| @Override |
| public Rendering render(WikiLinkNode node) { |
| String path = doc.getRelativePath(node.getText()); |
| String name = getDocumentName(path); |
| String url = getWicketUrl(DocPage.class, repositoryName, commitId, path); |
| return new Rendering(url, name); |
| } |
| }; |
| |
| final String content = MarkdownUtils.transformMarkdown(doc.markup, renderer); |
| final String safeContent = xssFilter.relaxed(content); |
| |
| doc.html = safeContent; |
| } |
| |
| private String getWicketUrl(Class<? extends Page> pageClass, final String repositoryName, final String commitId, final String document) { |
| String fsc = settings.getString(Keys.web.forwardSlashCharacter, "/"); |
| String encodedPath = document.replace(' ', '-'); |
| try { |
| encodedPath = URLEncoder.encode(encodedPath, "UTF-8"); |
| } catch (UnsupportedEncodingException e) { |
| logger.error(null, e); |
| } |
| encodedPath = encodedPath.replace("/", fsc).replace("%2F", fsc); |
| |
| String url = RequestCycle.get().urlFor(pageClass, WicketUtils.newPathParameter(repositoryName, commitId, encodedPath)).toString(); |
| return url; |
| } |
| |
| private String getDocumentName(final String document) { |
| // extract document name |
| String name = StringUtils.stripFileExtension(document); |
| name = name.replace('_', ' '); |
| if (name.indexOf('/') > -1) { |
| name = name.substring(name.lastIndexOf('/') + 1); |
| } |
| return name; |
| } |
| |
| public static class MarkupDocument implements Serializable { |
| |
| private static final long serialVersionUID = 1L; |
| |
| public final String documentPath; |
| public final String markup; |
| public final MarkupSyntax syntax; |
| public String html; |
| |
| MarkupDocument(String documentPath, String markup, MarkupSyntax syntax) { |
| this.documentPath = documentPath; |
| this.markup = markup; |
| this.syntax = syntax; |
| } |
| |
| String getCurrentPath() { |
| String basePath = ""; |
| if (documentPath.indexOf('/') > -1) { |
| basePath = documentPath.substring(0, documentPath.lastIndexOf('/') + 1); |
| if (basePath.charAt(0) == '/') { |
| return basePath.substring(1); |
| } |
| } |
| return basePath; |
| } |
| |
| String getRelativePath(String ref) { |
| if (ref.charAt(0) == '/') { |
| // absolute path in repository |
| return ref.substring(1); |
| } else { |
| // resolve relative repository path |
| String cp = getCurrentPath(); |
| if (StringUtils.isEmpty(cp)) { |
| return ref; |
| } |
| // this is a simple relative path resolver |
| List<String> currPathStrings = new ArrayList<String>(Arrays.asList(cp.split("/"))); |
| String file = ref; |
| while (file.startsWith("../")) { |
| // strip ../ from the file reference |
| // drop the last path element |
| file = file.substring(3); |
| currPathStrings.remove(currPathStrings.size() - 1); |
| } |
| currPathStrings.add(file); |
| String path = Joiner.on("/").join(currPathStrings); |
| return path; |
| } |
| } |
| } |
| |
| /** |
| * This class implements a workaround for a bug reported in issue-379. |
| * The bug was introduced by my own pegdown pull request #115. |
| * |
| * @author James Moger |
| * |
| */ |
| public static class WorkaroundHtmlSerializer extends ToHtmlSerializer { |
| |
| public WorkaroundHtmlSerializer(final LinkRenderer linkRenderer) { |
| super(linkRenderer, |
| Collections.<String, VerbatimSerializer>singletonMap(VerbatimSerializer.DEFAULT, DefaultVerbatimSerializer.INSTANCE), |
| Collections.<ToHtmlSerializerPlugin>emptyList()); |
| } |
| private void printAttribute(String name, String value) { |
| printer.print(' ').print(name).print('=').print('"').print(value).print('"'); |
| } |
| |
| /* Reimplement print image tag to eliminate a trailing double-quote */ |
| @Override |
| protected void printImageTag(LinkRenderer.Rendering rendering) { |
| printer.print("<img"); |
| printAttribute("src", rendering.href); |
| printAttribute("alt", rendering.text); |
| for (LinkRenderer.Attribute attr : rendering.attributes) { |
| printAttribute(attr.name, attr.value); |
| } |
| printer.print("/>"); |
| } |
| } |
| } |