Modify BuildFileToJsonParser to use Gson's streaming parser.
Summary:
Instead of using our hand-tuned streaming parser, use the default one in Gson.
This also enables us to parse arbitrary JSON structures instead of only arrays
of arrays of primitives.
This changes `BuildFileToJsonParser` to take `isServerMode` as a constructor param
so that we are explicit. Despite what the comment on `BuildFileToJsonParser#isServerMode`
says, I don't believe that we can use "regular mode" going forward.
That is, because the output from `buck.py` includes an `__includes` entry,
we cannot read a single JSON object in isolation because there may be context
from the `__includes` entry that it needs.
Test Plan: Sandcastle builds.
diff --git a/src/com/facebook/buck/json/BUCK b/src/com/facebook/buck/json/BUCK
index dac4faf..052934e 100644
--- a/src/com/facebook/buck/json/BUCK
+++ b/src/com/facebook/buck/json/BUCK
@@ -3,12 +3,12 @@
srcs = glob(['*.java']),
deps = [
'//lib:guava',
- '//lib:jackson-core',
'//src/com/facebook/buck/util/environment:environment',
'//src/com/facebook/buck/util:constants',
'//src/com/facebook/buck/util:exceptions',
'//src/com/facebook/buck/util:io',
'//src/com/facebook/buck/util:util',
+ '//third-party/java/gson:gson',
],
visibility = [
'PUBLIC',
diff --git a/src/com/facebook/buck/json/BuildFileToJsonParser.java b/src/com/facebook/buck/json/BuildFileToJsonParser.java
index 2a8e8eb..7121a25 100644
--- a/src/com/facebook/buck/json/BuildFileToJsonParser.java
+++ b/src/com/facebook/buck/json/BuildFileToJsonParser.java
@@ -16,18 +16,20 @@
package com.facebook.buck.json;
-import com.fasterxml.jackson.core.JsonFactory;
-import com.fasterxml.jackson.core.JsonParseException;
-import com.fasterxml.jackson.core.JsonParser;
-import com.fasterxml.jackson.core.JsonToken;
-import com.google.common.base.Preconditions;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
+import com.google.gson.Gson;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonPrimitive;
+import com.google.gson.stream.JsonReader;
+import com.google.gson.stream.JsonToken;
-import java.io.EOFException;
import java.io.IOException;
-import java.io.InputStream;
import java.io.Reader;
+import java.io.StringReader;
import java.util.List;
import java.util.Map;
@@ -37,142 +39,123 @@
* sort of nested arrays or objects are allowed in the output as Parser is implemented
* today. This simplification makes it easier to leverage Jackson's streaming JSON API.
*/
-public class BuildFileToJsonParser {
+public class BuildFileToJsonParser implements AutoCloseable {
- private final JsonParser parser;
+ private final Gson gson;
+ private final JsonReader reader;
- public BuildFileToJsonParser(String json) throws IOException {
- JsonFactory jsonFactory = new JsonFactory();
- this.parser = jsonFactory.createJsonParser(json);
+ /**
+ * The parser below uses these objects for stateful purposes with the ultimate goal
+ * of populating the parsed rules into `currentObjects`.
+ *
+ * The parser is expecting two different styles of output:
+ * 1. Server mode: [{"key": "value"}, {"key": "value"}, ...]
+ * 2. Regular mode: {"key": "value"}, {"key": "value"}, ...
+ *
+ * Server mode output is a necessary short-term step to keep logic in the main Parser
+ * consistent (expecting to be able to correlate a set of rules with the specific BUCK file
+ * that generated them). This requirement creates an unnecessary performance weakness
+ * in this design where we cannot parallelize buck.py's parsing of BUCK files with buck's
+ * processing of the result into a DAG. Once this can be addressed, server mode should be
+ * eliminated.
+ */
+ private final boolean isServerMode;
+
+ /**
+ * @param jsonReader That contains the JSON data.
+ */
+ public BuildFileToJsonParser(Reader jsonReader, boolean isServerMode) {
+ this.gson = new Gson();
+ this.reader = new JsonReader(jsonReader);
+ this.isServerMode = isServerMode;
+
+ // This is used to read one line at a time.
+ reader.setLenient(true);
}
- public BuildFileToJsonParser(InputStream json) throws IOException {
- JsonFactory jsonFactory = new JsonFactory();
- this.parser = jsonFactory.createJsonParser(json);
- }
-
- public BuildFileToJsonParser(Reader json) throws IOException {
- JsonFactory jsonFactory = new JsonFactory();
- this.parser = jsonFactory.createJsonParser(json);
+ @VisibleForTesting
+ public BuildFileToJsonParser(String json, boolean isServerMode) {
+ this(new StringReader(json), isServerMode);
}
/**
* Access the next set of rules from the build file processor. Note that for non-server
* invocations, this will collect all of the rules into one enormous list.
*
- * @return List of rules expressed as a <em>very</em> simple mapping of JSON field names
- * to Java primitives.
+ * @return The parsed JSON, represented as Java collections. Ideally, we would use Gson's object
+ * model directly to avoid the overhead of converting between object models. That would
+ * require updating all code that depends on this method, which may be a lot of work. Also,
+ * bear in mind that using the Java collections decouples clients of this method from the JSON
+ * parser that we use.
*/
- public List<Map<String, Object>> nextRules() throws IOException {
- // The parser below uses these objects for stateful purposes with the ultimate goal
- // of populating the parsed rules into `currentObjects`.
- //
- // The parser is expecting two different styles of output:
- // 1. Server mode: [{"key": "value"}, {"key": "value"}, ...]
- // 2. Regular mode: {"key": "value"}, {"key": "value"}, ...
- //
- // Server mode output is a necessary short-term step to keep logic in the main Parser
- // consistent (expecting to be able to correlate a set of rules with the specific BUCK file
- // that generated them). This requirement creates an unnecessary performance weakness
- // in this design where we cannot parallelize buck.py's parsing of BUCK files with buck's
- // processing of the result into a DAG. Once this can be addressed, server mode should be
- // eliminated.
- List<Map<String, Object>> currentObjects = null;
- String currentFieldName = null;
- List<String> currentArray = null;
- Map<String, Object> currentObject = null;
+ @SuppressWarnings("unchecked")
+ List<Map<String, Object>> nextRules() throws IOException {
+ List<Map<String, Object>> items = Lists.newArrayList();
- while (true) {
- JsonToken token = parser.nextToken();
- if (token == null) {
- if (currentObject != null) {
- throw new EOFException("unexpected end-of-stream");
- } else if (currentObjects == null) {
- // This happens when buck.py failed to produce any output for this build rule (python
- // parse error or raised exception, I bet).
- throw new EOFException("missing build rules");
- } else {
- return currentObjects;
- }
+ if (isServerMode) {
+ reader.beginArray();
+
+ while (reader.hasNext()) {
+ JsonObject json = gson.fromJson(reader, JsonObject.class);
+ items.add((Map<String, Object>) toRawTypes(json));
}
- switch (token) {
- case START_OBJECT:
- // The syntax differs very slightly between server mode and not. If we encounter
- // an object not inside of an array, we aren't in server mode and so we can just read
- // all the objects until exhaustion.
- if (currentObjects == null) {
- currentObjects = Lists.newArrayList();
- }
- currentObject = Maps.newHashMap();
- break;
-
- case END_OBJECT:
- currentObjects.add(currentObject);
- currentObject = null;
- break;
-
- case START_ARRAY:
- // Heuristic to detect whether we are in the above server mode or regular mode. If we
- // encounter START_ARRAY before START_OBJECT, it must be server mode so we should build
- // `currentObjects` now. Otherwise, this is the start of a new sub-array within
- // an object.
- if (currentObjects == null) {
- currentObjects = Lists.newArrayList();
- } else {
- currentArray = Lists.newArrayList();
- }
- break;
-
- case END_ARRAY:
- if (currentArray != null) {
- currentObject.put(currentFieldName, currentArray);
- currentArray = null;
- currentFieldName = null;
- break;
- } else {
- // Must be in server mode and we finished parsing a single BUCK file.
- return currentObjects;
- }
-
- case FIELD_NAME:
- currentFieldName = parser.getText();
- break;
-
- case VALUE_STRING:
- if (currentArray == null) {
- currentObject.put(currentFieldName, parser.getText());
- currentFieldName = null;
- } else {
- currentArray.add(parser.getText());
- }
- break;
-
- case VALUE_TRUE:
- case VALUE_FALSE:
- Preconditions.checkState(currentArray == null, "Unexpected boolean in JSON array");
- currentObject.put(currentFieldName, token == JsonToken.VALUE_TRUE);
- currentFieldName = null;
- break;
-
- case VALUE_NUMBER_INT:
- Preconditions.checkState(currentArray == null, "Unexpected int in JSON array");
- currentObject.put(currentFieldName, parser.getLongValue());
- currentFieldName = null;
- break;
-
- case VALUE_NULL:
- if (currentArray == null) {
- currentObject.put(currentFieldName, null);
- currentFieldName = null;
- } else {
- currentArray.add(null);
- }
- break;
-
- default:
- throw new JsonParseException("Unexpected token: " + token, parser.getCurrentLocation());
+ reader.endArray();
+ } else {
+ while (reader.peek() != JsonToken.END_DOCUMENT) {
+ JsonObject json = gson.fromJson(reader, JsonObject.class);
+ items.add((Map<String, Object>) toRawTypes(json));
}
}
+
+ return items;
+ }
+
+ /**
+ * @return One of: String, Boolean, Long, Number, List<Object>, Map<String, Object>.
+ */
+ @VisibleForTesting
+ static Object toRawTypes(JsonElement json) {
+ // Cases are ordered from most common to least common.
+ if (json.isJsonPrimitive()) {
+ JsonPrimitive primitive = json.getAsJsonPrimitive();
+ if (primitive.isString()) {
+ return primitive.getAsString();
+ } else if (primitive.isBoolean()) {
+ return primitive.getAsBoolean();
+ } else if (primitive.isNumber()) {
+ Number number = primitive.getAsNumber();
+ // Number is likely an instance of class com.google.gson.internal.LazilyParsedNumber.
+ if (number.longValue() == number.doubleValue()) {
+ return number.longValue();
+ } else {
+ return number;
+ }
+ } else {
+ throw new IllegalStateException("Unknown primitive type: " + primitive);
+ }
+ } else if (json.isJsonArray()) {
+ JsonArray array = json.getAsJsonArray();
+ List<Object> out = Lists.newArrayListWithCapacity(array.size());
+ for (JsonElement item : array) {
+ out.add(toRawTypes(item));
+ }
+ return out;
+ } else if (json.isJsonObject()) {
+ Map<String, Object> out = Maps.newHashMap();
+ for (Map.Entry<String, JsonElement> entry : json.getAsJsonObject().entrySet()) {
+ out.put(entry.getKey(), toRawTypes(entry.getValue()));
+ }
+ return out;
+ } else if (json.isJsonNull()) {
+ return null;
+ } else {
+ throw new IllegalStateException("Unknown type: " + json);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ reader.close();
}
}
diff --git a/src/com/facebook/buck/json/ProjectBuildFileParser.java b/src/com/facebook/buck/json/ProjectBuildFileParser.java
index ab1eb70..f87b5bb 100644
--- a/src/com/facebook/buck/json/ProjectBuildFileParser.java
+++ b/src/com/facebook/buck/json/ProjectBuildFileParser.java
@@ -21,6 +21,7 @@
import com.facebook.buck.util.ProjectFilesystem;
import com.facebook.buck.util.environment.Platform;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Charsets;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
@@ -31,8 +32,10 @@
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
+import java.io.Reader;
import java.util.Collections;
import java.util.List;
import java.util.Map;
@@ -127,6 +130,10 @@
stderrConsumer.start();
buckPyStdinWriter = new BufferedWriter(new OutputStreamWriter(stdin));
+
+ // TODO(mbolin): Ensure that the Reader gets closed.
+ Reader reader = new InputStreamReader(buckPyProcess.getInputStream(), Charsets.UTF_8);
+ buckPyStdoutParser = new BuildFileToJsonParser(reader, isServerMode);
}
private ImmutableList<String> buildArgs() {
@@ -223,21 +230,6 @@
buckPyStdinWriter.flush();
}
- // Construct the parser lazily because Jackson expects that when the parser is made it is
- // safe to immediately begin reading from the underlying stream to detect the encoding.
- // For our server use case, the server will produce no output until directed to by a
- // request for a particular build file.
- //
- // TODO: Jackson has a severe bug which assumes that it is safe to require at least 4 bytes
- // of input due to JSON's BOM concept (see detectEncoding). buck.py, and many other
- // facilities, do not write a BOM header and therefore may end up producing insufficient
- // bytes to unwedge the parser's construction. For example, if the first BUCK file
- // submitted outputted no rules (that is, "[]"), then this line would hang waiting for
- // more input!
- if (buckPyStdoutParser == null) {
- buckPyStdoutParser = new BuildFileToJsonParser(buckPyProcess.getInputStream());
- }
-
return buckPyStdoutParser.nextRules();
}
diff --git a/test/com/facebook/buck/json/BUCK b/test/com/facebook/buck/json/BUCK
index 18ac6fe..b56c69a 100644
--- a/test/com/facebook/buck/json/BUCK
+++ b/test/com/facebook/buck/json/BUCK
@@ -6,8 +6,8 @@
],
deps = [
'//lib:guava',
- '//lib:jackson-core',
'//lib:junit',
'//src/com/facebook/buck/json:json',
+ '//third-party/java/gson:gson',
],
)
diff --git a/test/com/facebook/buck/json/BuildFileToJsonParserTest.java b/test/com/facebook/buck/json/BuildFileToJsonParserTest.java
index dbe0068..19535db 100644
--- a/test/com/facebook/buck/json/BuildFileToJsonParserTest.java
+++ b/test/com/facebook/buck/json/BuildFileToJsonParserTest.java
@@ -18,11 +18,13 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertTrue;
-import com.fasterxml.jackson.core.JsonParseException;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+import com.google.gson.JsonArray;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonPrimitive;
import org.junit.Test;
@@ -36,54 +38,99 @@
public class BuildFileToJsonParserTest {
@Test
- public void testSimpleParse() throws JsonParseException, IOException {
+ public void testSimpleParse() throws IOException {
String json =
"{" +
"\"srcs\": [\"src/com/facebook/buck/Bar.java\", \"src/com/facebook/buck/Foo.java\"]" +
"}";
- BuildFileToJsonParser parser = new BuildFileToJsonParser(json);
- List<Map<String, Object>> tokens = parser.nextRules();
- assertEquals(
- ImmutableList.of(
- ImmutableMap.of("srcs",
- ImmutableList.of(
- "src/com/facebook/buck/Bar.java",
- "src/com/facebook/buck/Foo.java"))),
- tokens);
+
+ try (BuildFileToJsonParser parser = new BuildFileToJsonParser(json, false /* isServerMode */)) {
+ assertEquals(
+ ImmutableList.of(
+ ImmutableMap.of("srcs",
+ ImmutableList.of(
+ "src/com/facebook/buck/Bar.java",
+ "src/com/facebook/buck/Foo.java"))),
+ parser.nextRules());
+ }
}
@Test
public void testParseLong() throws IOException {
String json = "{\"thing\": 27}";
- BuildFileToJsonParser parser = new BuildFileToJsonParser(json);
- List<Map<String, Object>> rules = parser.nextRules();
+ List<Map<String, Object>> tokens;
+ try (BuildFileToJsonParser parser = new BuildFileToJsonParser(json, false /* isServerMode */)) {
+ tokens = parser.nextRules();
+ }
- assertEquals(1, rules.size());
- Map<String, Object> rule = rules.get(0);
+ assertEquals(1, tokens.size());
+ Map<String, Object> rule = tokens.get(0);
Object value = rule.get("thing");
- assertTrue(value instanceof Long);
+ assertEquals(Long.class, value.getClass());
assertEquals(27L, rule.get("thing"));
assertNotEquals(27, rule.get("thing"));
}
@Test
- public void testServerModeParse() throws JsonParseException, IOException {
+ public void testServerModeParse() throws IOException {
String json =
"[{\"foo\": \"a:1\"}, {\"foo\": \"a:2\"}]\n" +
"[{\"bar\": \"b:1\"}]";
- BuildFileToJsonParser parser = new BuildFileToJsonParser(json);
- List<Map<String, Object>> a = parser.nextRules();
- assertEquals(
- ImmutableList.of(
- ImmutableMap.of("foo", "a:1"),
- ImmutableMap.of("foo", "a:2")),
- a);
+ try (BuildFileToJsonParser parser = new BuildFileToJsonParser(json, true /* isServerMode */)) {
+ assertEquals(
+ ImmutableList.of(
+ ImmutableMap.of("foo", "a:1"),
+ ImmutableMap.of("foo", "a:2")),
+ parser.nextRules());
- List<Map<String, Object>> b = parser.nextRules();
- assertEquals(
- ImmutableList.of(
- ImmutableMap.of("bar", "b:1")),
- b);
+ assertEquals(
+ ImmutableList.of(
+ ImmutableMap.of("bar", "b:1")),
+ parser.nextRules());
+ }
+ }
+
+ @Test
+ public void testParseNestedStructures() throws IOException {
+ String json =
+ "{" +
+ "\"foo\": [\"a\", \"b\"]," +
+ "\"bar\": {\"baz\": \"quox\"}" +
+ "}";
+
+ try (BuildFileToJsonParser parser = new BuildFileToJsonParser(json, false /* isServerMode */)) {
+ assertEquals(
+ ImmutableList.of(ImmutableMap.of(
+ "foo", ImmutableList.of("a", "b"),
+ "bar", ImmutableMap.of("baz", "quox"))),
+ parser.nextRules());
+ }
+ }
+
+ @Test
+ public void testToRawTypes() {
+ JsonObject ruleJson = new JsonObject();
+
+ ruleJson.addProperty("name", "foo");
+ ruleJson.addProperty("export_deps", false);
+ ruleJson.addProperty("proguard_config", (String)null);
+ ruleJson.addProperty("source", 6);
+
+ JsonArray deps = new JsonArray();
+ deps.add(new JsonPrimitive("//foo:bar"));
+ deps.add(new JsonPrimitive("//foo:baz"));
+ deps.add(new JsonPrimitive("//foo:biz"));
+ ruleJson.add("deps", deps);
+
+ // Note that an ImmutableMap does not allow null values, but BuildFileToJsonParser.toRawTypes()
+ // can return a Map with null values, so we use an ordinary HashMap for the expected value.
+ Map<String, Object> expected = Maps.newHashMap();
+ expected.put("name", "foo");
+ expected.put("export_deps", false);
+ expected.put("proguard_config", null);
+ expected.put("source", 6L);
+ expected.put("deps", ImmutableList.of("//foo:bar", "//foo:baz", "//foo:biz"));
+ assertEquals(expected, BuildFileToJsonParser.toRawTypes(ruleJson));
}
}