| // Copyright (C) 2013 The Android Open Source Project |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| import static java.nio.charset.StandardCharsets.UTF_8; |
| |
| import com.google.gerrit.server.documentation.Constants; |
| import java.io.BufferedReader; |
| import java.io.ByteArrayOutputStream; |
| import java.io.File; |
| import java.io.FileNotFoundException; |
| import java.io.FileReader; |
| import java.io.IOException; |
| import java.io.InputStreamReader; |
| import java.io.UnsupportedEncodingException; |
| import java.nio.file.Files; |
| import java.nio.file.Paths; |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.jar.JarEntry; |
| import java.util.jar.JarOutputStream; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| import java.util.zip.ZipEntry; |
| import java.util.zip.ZipOutputStream; |
| import org.apache.lucene.analysis.standard.StandardAnalyzer; |
| import org.apache.lucene.analysis.util.CharArraySet; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.StringField; |
| import org.apache.lucene.document.TextField; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.index.IndexWriterConfig; |
| import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.store.RAMDirectory; |
| import org.kohsuke.args4j.Argument; |
| import org.kohsuke.args4j.CmdLineException; |
| import org.kohsuke.args4j.CmdLineParser; |
| import org.kohsuke.args4j.Option; |
| |
| public class DocIndexer { |
| private static final Pattern SECTION_HEADER = Pattern.compile("^=+ (.*)"); |
| |
| @Option(name = "-o", usage = "output JAR file") |
| private String outFile; |
| |
| @Option(name = "--prefix", usage = "prefix for the html filepath") |
| private String prefix = ""; |
| |
| @Option(name = "--in-ext", usage = "extension for input files") |
| private String inExt = ".txt"; |
| |
| @Option(name = "--out-ext", usage = "extension for output files") |
| private String outExt = ".html"; |
| |
| @Argument(usage = "input files") |
| private List<String> inputFiles = new ArrayList<>(); |
| |
| private void invoke(String... parameters) throws IOException { |
| CmdLineParser parser = new CmdLineParser(this); |
| try { |
| parser.parseArgument(parameters); |
| if (inputFiles.isEmpty()) { |
| throw new CmdLineException(parser, "FAILED: input file missing"); |
| } |
| } catch (CmdLineException e) { |
| System.err.println(e.getMessage()); |
| parser.printUsage(System.err); |
| System.exit(1); |
| return; |
| } |
| |
| try (JarOutputStream jar = new JarOutputStream(Files.newOutputStream(Paths.get(outFile)))) { |
| byte[] compressedIndex = zip(index()); |
| JarEntry entry = new JarEntry(String.format("%s/%s", Constants.PACKAGE, Constants.INDEX_ZIP)); |
| entry.setSize(compressedIndex.length); |
| jar.putNextEntry(entry); |
| jar.write(compressedIndex); |
| jar.closeEntry(); |
| } |
| } |
| |
| private RAMDirectory index() |
| throws IOException, UnsupportedEncodingException, FileNotFoundException { |
| RAMDirectory directory = new RAMDirectory(); |
| IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer(CharArraySet.EMPTY_SET)); |
| config.setOpenMode(OpenMode.CREATE); |
| config.setCommitOnClose(true); |
| try (IndexWriter iwriter = new IndexWriter(directory, config)) { |
| for (String inputFile : inputFiles) { |
| File file = new File(inputFile); |
| if (file.length() == 0) { |
| continue; |
| } |
| |
| String title; |
| try (BufferedReader titleReader = |
| new BufferedReader(new InputStreamReader(Files.newInputStream(file.toPath()), UTF_8))) { |
| title = titleReader.readLine(); |
| if (title != null && title.startsWith("[[")) { |
| // Generally the first line of the txt is the title. In a few cases the |
| // first line is a "[[tag]]" and the second line is the title. |
| title = titleReader.readLine(); |
| } |
| } |
| Matcher matcher = SECTION_HEADER.matcher(title); |
| if (matcher.matches()) { |
| title = matcher.group(1); |
| } |
| |
| String outputFile = AsciiDoctor.mapInFileToOutFile(inputFile, inExt, outExt); |
| try (FileReader reader = new FileReader(file)) { |
| Document doc = new Document(); |
| doc.add(new TextField(Constants.DOC_FIELD, reader)); |
| doc.add(new StringField(Constants.URL_FIELD, prefix + outputFile, Field.Store.YES)); |
| doc.add(new TextField(Constants.TITLE_FIELD, title, Field.Store.YES)); |
| iwriter.addDocument(doc); |
| } |
| } |
| } |
| return directory; |
| } |
| |
| private byte[] zip(RAMDirectory dir) throws IOException { |
| ByteArrayOutputStream buf = new ByteArrayOutputStream(); |
| try (ZipOutputStream zip = new ZipOutputStream(buf)) { |
| for (String name : dir.listAll()) { |
| try (IndexInput in = dir.openInput(name, null)) { |
| int len = (int) in.length(); |
| byte[] tmp = new byte[len]; |
| ZipEntry entry = new ZipEntry(name); |
| entry.setSize(len); |
| in.readBytes(tmp, 0, len); |
| zip.putNextEntry(entry); |
| zip.write(tmp, 0, len); |
| zip.closeEntry(); |
| } |
| } |
| } |
| |
| return buf.toByteArray(); |
| } |
| |
| public static void main(String[] args) { |
| try { |
| new DocIndexer().invoke(args); |
| } catch (IOException e) { |
| System.err.println(e.getMessage()); |
| System.exit(1); |
| } |
| } |
| } |