Fallback to ISO-8859-1 if charset isn't supported
If UniversalDetecter gives us a character set name which isn't
supported by the JVM, log an error, but keep going using the
ISO-8859-1 character set as a fallback.
Bug: issue 545
Change-Id: I4b52836a69cdec2907ff8ff92d178faec3eb6fe7
Signed-off-by: Shawn O. Pearce <sop@google.com>
diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/patch/Text.java b/gerrit-server/src/main/java/com/google/gerrit/server/patch/Text.java
index 502c7a6..e5b2411 100644
--- a/gerrit-server/src/main/java/com/google/gerrit/server/patch/Text.java
+++ b/gerrit-server/src/main/java/com/google/gerrit/server/patch/Text.java
@@ -17,10 +17,17 @@
import org.eclipse.jgit.diff.RawText;
import org.eclipse.jgit.util.RawParseUtils;
import org.mozilla.universalchardet.UniversalDetector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
public class Text extends RawText {
+ private static final Logger log = LoggerFactory.getLogger(Text.class);
+ private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
+
public static final byte[] NO_BYTES = {};
public static final Text EMPTY = new Text(NO_BYTES);
@@ -36,9 +43,19 @@
encoding = d.getDetectedCharset();
}
if (encoding == null) {
- encoding = "ISO-8859-1";
+ return ISO_8859_1;
}
- return Charset.forName(encoding);
+ try {
+ return Charset.forName(encoding);
+
+ } catch (IllegalCharsetNameException err) {
+ log.error("Invalid detected charset name '" + encoding + "': " + err);
+ return ISO_8859_1;
+
+ } catch (UnsupportedCharsetException err) {
+ log.error("Detected charset '" + encoding + "' not supported: " + err);
+ return ISO_8859_1;
+ }
}
private Charset charset;