Configurable external robots.txt file
Currently the robots.txt file used by the application is inside the .war
file and thus difficult to modify for users. This patch adds a new
optional configuration parameter that allows the user to specify an
external robots file, so that it is easy to modify and easy to preserve
during upgrades of the application. If no configuration change is made
the original robots file is used, only if the following is added to the
gerrit.config file will the external file be used:
[httpd]
robotsFile = etc/myrobots.txt
If the file indicated by this parameter is relative then it will be
resolved as sub directory of the site directory, if it is absolute it
will be used as is.
If the file doesn't exist or can't be read a message will be written to
the log and the default file will be used.
Bug: issue 1968
Change-Id: Iad02dbd97633e9c45dbce15d1f227f3931255e0a
Signed-off-by: Juan Hernandez <juanantonio.hernandez@gmail.com>
diff --git a/Documentation/config-gerrit.txt b/Documentation/config-gerrit.txt
index e49ff77..2a9c350 100644
--- a/Documentation/config-gerrit.txt
+++ b/Documentation/config-gerrit.txt
@@ -1747,6 +1747,16 @@
filterClass = org.anyorg.MySecureFilter
----
+[[httpd.robotsFile]]httpd.robotsFile::
++
+Location of an external robots.txt file to be used instead of the one
+bundled with the .war of the application.
++
+If not absolute, the path is resolved relative to `$site_path`.
++
+If the file doesn't exist or can't be read the default robots.txt file
+bundled with the .war will be used instead.
+
[[ldap]]Section ldap
~~~~~~~~~~~~~~~~~~~~
diff --git a/gerrit-httpd/src/main/java/com/google/gerrit/httpd/UrlModule.java b/gerrit-httpd/src/main/java/com/google/gerrit/httpd/UrlModule.java
index 05b059b..bf39bfb 100644
--- a/gerrit-httpd/src/main/java/com/google/gerrit/httpd/UrlModule.java
+++ b/gerrit-httpd/src/main/java/com/google/gerrit/httpd/UrlModule.java
@@ -21,6 +21,7 @@
import com.google.gerrit.httpd.raw.CatServlet;
import com.google.gerrit.httpd.raw.HostPageServlet;
import com.google.gerrit.httpd.raw.LegacyGerritServlet;
+import com.google.gerrit.httpd.raw.RobotsServlet;
import com.google.gerrit.httpd.raw.SshInfoServlet;
import com.google.gerrit.httpd.raw.StaticServlet;
import com.google.gerrit.httpd.raw.ToolServlet;
@@ -112,6 +113,8 @@
if (cfg.deprecatedQuery) {
serve("/query").with(DeprecatedChangeQueryServlet.class);
}
+
+ serve("/robots.txt").with(RobotsServlet.class);
}
private Key<HttpServlet> notFound() {
diff --git a/gerrit-httpd/src/main/java/com/google/gerrit/httpd/raw/RobotsServlet.java b/gerrit-httpd/src/main/java/com/google/gerrit/httpd/raw/RobotsServlet.java
new file mode 100644
index 0000000..d19a0ce
--- /dev/null
+++ b/gerrit-httpd/src/main/java/com/google/gerrit/httpd/raw/RobotsServlet.java
@@ -0,0 +1,101 @@
+// Copyright (C) 2013 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gerrit.httpd.raw;
+
+import com.google.common.io.ByteStreams;
+import com.google.gerrit.server.config.GerritServerConfig;
+import com.google.gerrit.server.config.SitePaths;
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
+
+import org.eclipse.jgit.lib.Config;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+/**
+ * This class provides a mechanism to use a configurable robots.txt file,
+ * outside of the .war of the application. In order to configure it add the
+ * following to the <code>httpd</code> section of the <code>gerrit.conf</code>
+ * file:
+ *
+ * <pre>
+ * [httpd]
+ * robotsFile = etc/myrobots.txt
+ * </pre>
+ *
+ * If the specified file name is relative it will resolved as a sub directory of
+ * the site directory, if it is absolute it will be used as is.
+ *
+ * If the specified file doesn't exist or isn't readable the servlet will
+ * default to the <code>robots.txt</code> file bundled with the .war file of the
+ * application.
+ */
+@SuppressWarnings("serial")
+@Singleton
+public class RobotsServlet extends HttpServlet {
+ private static final Logger log =
+ LoggerFactory.getLogger(RobotsServlet.class);
+
+ private final File robotsFile;
+
+ @Inject
+ RobotsServlet(@GerritServerConfig final Config config, final SitePaths sitePaths) {
+ File file = sitePaths.resolve(
+ config.getString("httpd", null, "robotsFile"));
+ if (file != null && (!file.exists() || !file.canRead())) {
+ log.warn("Cannot read httpd.robotsFile, using default");
+ file = null;
+ }
+ robotsFile = file;
+ }
+
+ @Override
+ protected void doGet(final HttpServletRequest req, final HttpServletResponse rsp)
+ throws IOException {
+ rsp.setContentType("text/plain");
+ InputStream in = openRobotsFile();
+ try {
+ OutputStream out = rsp.getOutputStream();
+ try {
+ ByteStreams.copy(in, out);
+ } finally {
+ out.close();
+ }
+ } finally {
+ in.close();
+ }
+ }
+
+ private InputStream openRobotsFile() {
+ if (robotsFile != null) {
+ try {
+ return new FileInputStream(robotsFile);
+ } catch (IOException e) {
+ log.warn("Cannot read " + robotsFile + "; using default", e);
+ }
+ }
+ return getServletContext().getResourceAsStream("/robots.txt");
+ }
+}