lib/asciidoctor/java/DocIndexer.java - gerrit - Git at Google

 // Copyright (C) 2013 The Android Open Source Project
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 import com.google.common.io.Files;

 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.store.NIOFSDirectory;
 import org.apache.lucene.util.Version;
 import org.kohsuke.args4j.Argument;
 import org.kohsuke.args4j.CmdLineException;
 import org.kohsuke.args4j.CmdLineParser;
 import org.kohsuke.args4j.Option;

 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.zip.ZipOutputStream;

 public class DocIndexer {
   private static final Version LUCENE_VERSION = Version.LUCENE_44;
   private static final String DOC_FIELD = "doc";
   private static final String URL_FIELD = "url";
   private static final String TITLE_FIELD = "title";

   @Option(name = "-z", usage = "output zip file")
   private String zipFile;

   @Option(name = "--prefix", usage = "prefix for the html filepath")
   private String prefix = "";

   @Option(name = "--in-ext", usage = "extension for input files")
   private String inExt = ".txt";

   @Option(name = "--out-ext", usage = "extension for output files")
   private String outExt = ".html";

   @Argument(usage = "input files")
   private List<String> inputFiles = new ArrayList<String>();

   private void invoke(String... parameters) throws IOException {
     CmdLineParser parser = new CmdLineParser(this);
     try {
       parser.parseArgument(parameters);
       if (inputFiles.isEmpty()) {
         throw new CmdLineException(parser, "FAILED: input file missing");
       }
     } catch (CmdLineException e) {
       System.err.println(e.getMessage());
       parser.printUsage(System.err);
       System.exit(1);
       return;
     }

     File tmp = Files.createTempDir();
     NIOFSDirectory directory = new NIOFSDirectory(tmp);
     IndexWriterConfig config = new IndexWriterConfig(
         LUCENE_VERSION,
         new StandardAnalyzer(LUCENE_VERSION, CharArraySet.EMPTY_SET));
     config.setOpenMode(OpenMode.CREATE);
     IndexWriter iwriter = new IndexWriter(directory, config);
     for (String inputFile : inputFiles) {
       File file = new File(inputFile);

       BufferedReader titleReader = new BufferedReader(
           new InputStreamReader(new FileInputStream(file), "UTF-8"));
       String title = titleReader.readLine();
       if (title.startsWith("[[")) {
         // Generally the first line of the txt is the title. In a few cases the
         // first line is a "[[tag]]" and the second line is the title.
         title = titleReader.readLine();
       }
       titleReader.close();

       String outputFile = AsciiDoctor.mapInFileToOutFile(
           inputFile, inExt, outExt);
       FileReader reader = new FileReader(file);
       Document doc = new Document();
       doc.add(new TextField(DOC_FIELD, reader));
       doc.add(new StringField(
             URL_FIELD, prefix + outputFile, Field.Store.YES));
       doc.add(new TextField(TITLE_FIELD, title, Field.Store.YES));
       iwriter.addDocument(doc);
       reader.close();
     }
     iwriter.close();

     ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile));
     AsciiDoctor.zipDir(tmp, "", zip);
     zip.close();
   }

   public static void main(String[] args) {
     try {
       new DocIndexer().invoke(args);
     } catch (IOException e) {
       System.err.println(e.getMessage());
       System.exit(1);
     }
   }
 }
	// Copyright (C) 2013 The Android Open Source Project
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	import com.google.common.io.Files;

	import org.apache.lucene.analysis.standard.StandardAnalyzer;
	import org.apache.lucene.analysis.util.CharArraySet;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.document.StringField;
	import org.apache.lucene.document.TextField;
	import org.apache.lucene.index.IndexWriter;
	import org.apache.lucene.index.IndexWriterConfig;
	import org.apache.lucene.index.IndexWriterConfig.OpenMode;
	import org.apache.lucene.store.NIOFSDirectory;
	import org.apache.lucene.util.Version;
	import org.kohsuke.args4j.Argument;
	import org.kohsuke.args4j.CmdLineException;
	import org.kohsuke.args4j.CmdLineParser;
	import org.kohsuke.args4j.Option;

	import java.io.BufferedReader;
	import java.io.File;
	import java.io.FileInputStream;
	import java.io.FileOutputStream;
	import java.io.FileReader;
	import java.io.IOException;
	import java.io.InputStreamReader;
	import java.util.ArrayList;
	import java.util.List;
	import java.util.zip.ZipOutputStream;

	public class DocIndexer {
	private static final Version LUCENE_VERSION = Version.LUCENE_44;
	private static final String DOC_FIELD = "doc";
	private static final String URL_FIELD = "url";
	private static final String TITLE_FIELD = "title";

	@Option(name = "-z", usage = "output zip file")
	private String zipFile;

	@Option(name = "--prefix", usage = "prefix for the html filepath")
	private String prefix = "";

	@Option(name = "--in-ext", usage = "extension for input files")
	private String inExt = ".txt";

	@Option(name = "--out-ext", usage = "extension for output files")
	private String outExt = ".html";

	@Argument(usage = "input files")
	private List<String> inputFiles = new ArrayList<String>();

	private void invoke(String... parameters) throws IOException {
	CmdLineParser parser = new CmdLineParser(this);
	try {
	parser.parseArgument(parameters);
	if (inputFiles.isEmpty()) {
	throw new CmdLineException(parser, "FAILED: input file missing");
	}
	} catch (CmdLineException e) {
	System.err.println(e.getMessage());
	parser.printUsage(System.err);
	System.exit(1);
	return;
	}

	File tmp = Files.createTempDir();
	NIOFSDirectory directory = new NIOFSDirectory(tmp);
	IndexWriterConfig config = new IndexWriterConfig(
	LUCENE_VERSION,
	new StandardAnalyzer(LUCENE_VERSION, CharArraySet.EMPTY_SET));
	config.setOpenMode(OpenMode.CREATE);
	IndexWriter iwriter = new IndexWriter(directory, config);
	for (String inputFile : inputFiles) {
	File file = new File(inputFile);

	BufferedReader titleReader = new BufferedReader(
	new InputStreamReader(new FileInputStream(file), "UTF-8"));
	String title = titleReader.readLine();
	if (title.startsWith("[[")) {
	// Generally the first line of the txt is the title. In a few cases the
	// first line is a "[[tag]]" and the second line is the title.
	title = titleReader.readLine();
	}
	titleReader.close();

	String outputFile = AsciiDoctor.mapInFileToOutFile(
	inputFile, inExt, outExt);
	FileReader reader = new FileReader(file);
	Document doc = new Document();
	doc.add(new TextField(DOC_FIELD, reader));
	doc.add(new StringField(
	URL_FIELD, prefix + outputFile, Field.Store.YES));
	doc.add(new TextField(TITLE_FIELD, title, Field.Store.YES));
	iwriter.addDocument(doc);
	reader.close();
	}
	iwriter.close();

	ZipOutputStream zip = new ZipOutputStream(new FileOutputStream(zipFile));
	AsciiDoctor.zipDir(tmp, "", zip);
	zip.close();
	}

	public static void main(String[] args) {
	try {
	new DocIndexer().invoke(args);
	} catch (IOException e) {
	System.err.println(e.getMessage());
	System.exit(1);
	}
	}
	}