Merge Lucene core and backward-codecs jars

Both of these jars provide a provider-configuration file in
META-INF/services/org.apache.lucene.codecs.Codec registering their
respective implementations as providers of this codec. The proper way
to merge these files is to concatenate them, but the normal Buck build
process would otherwise choose one arbitrarily.

Add a new custom rule merge_maven_jars to merge multiple Maven jars
together using a simple Python script. The script concatenates all the
entries in two zip files, preferring the entry found in the first file
on the command line, which is still arbitrary but at least
deterministic. It specially handles files in the META-INF/services
directory by concatenating them.

Use this new rule to merge the old :core and :backward-codecs rules
into a single :core-and-backward-codecs rule.

Change-Id: I42fd1a130e42cb0eebf7bee61cfdf8545397cd09
diff --git a/gerrit-httpd/BUCK b/gerrit-httpd/BUCK
index e215533..b29bd2a 100644
--- a/gerrit-httpd/BUCK
+++ b/gerrit-httpd/BUCK
@@ -34,7 +34,7 @@
     '//lib/jgit:jgit',
     '//lib/jgit:jgit-servlet',
     '//lib/log:api',
-    '//lib/lucene:core',
+    '//lib/lucene:core-and-backward-codecs',
   ],
   provided_deps = ['//lib:servlet-api-3_1'],
   visibility = ['PUBLIC'],
diff --git a/gerrit-lucene/BUCK b/gerrit-lucene/BUCK
index a146774..8ba7479 100644
--- a/gerrit-lucene/BUCK
+++ b/gerrit-lucene/BUCK
@@ -11,7 +11,7 @@
     '//gerrit-server:server',
     '//lib:gwtorm',
     '//lib:guava',
-    '//lib/lucene:core',
+    '//lib/lucene:core-and-backward-codecs',
   ],
   visibility = ['PUBLIC'],
 )
@@ -34,8 +34,7 @@
     '//lib/jgit:jgit',
     '//lib/log:api',
     '//lib/lucene:analyzers-common',
-    '//lib/lucene:backward-codecs',
-    '//lib/lucene:core',
+    '//lib/lucene:core-and-backward-codecs',
     '//lib/lucene:misc',
   ],
   visibility = ['PUBLIC'],
diff --git a/gerrit-server/BUCK b/gerrit-server/BUCK
index f7bb86f..31a9b49 100644
--- a/gerrit-server/BUCK
+++ b/gerrit-server/BUCK
@@ -63,7 +63,7 @@
     '//lib/log:jsonevent-layout',
     '//lib/log:log4j',
     '//lib/lucene:analyzers-common',
-    '//lib/lucene:core',
+    '//lib/lucene:core-and-backward-codecs',
     '//lib/lucene:queryparser',
     '//lib/ow2:ow2-asm',
     '//lib/ow2:ow2-asm-tree',
diff --git a/lib/asciidoctor/BUCK b/lib/asciidoctor/BUCK
index f8feb63..ad13313 100644
--- a/lib/asciidoctor/BUCK
+++ b/lib/asciidoctor/BUCK
@@ -36,7 +36,7 @@
     '//lib:args4j',
     '//lib:guava',
     '//lib/lucene:analyzers-common',
-    '//lib/lucene:core',
+    '//lib/lucene:core-and-backward-codecs',
   ],
   visibility = ['//tools/eclipse:classpath'],
 )
diff --git a/lib/lucene/BUCK b/lib/lucene/BUCK
index 6ab33d9..5e4a82f 100644
--- a/lib/lucene/BUCK
+++ b/lib/lucene/BUCK
@@ -2,8 +2,19 @@
 
 VERSION = '5.2.1'
 
+# core and backward-codecs both provide
+# META-INF/services/org.apache.lucene.codecs.Codec, so they must be merged.
+merge_maven_jars(
+  name = 'core-and-backward-codecs',
+  srcs = [
+    ':backward-codecs_jar',
+    ':core_jar',
+  ],
+  visibility = ['PUBLIC'],
+)
+
 maven_jar(
-  name = 'core',
+  name = 'core_jar',
   id = 'org.apache.lucene:lucene-core:' + VERSION,
   sha1 = 'a175590aa8b04e079eb1a136fd159f9163482ba4',
   license = 'Apache2.0',
@@ -11,6 +22,7 @@
     'META-INF/LICENSE.txt',
     'META-INF/NOTICE.txt',
   ],
+  visibility = [],
 )
 
 maven_jar(
@@ -18,7 +30,7 @@
   id = 'org.apache.lucene:lucene-analyzers-common:' + VERSION,
   sha1 = '33b7cc17d5a7c939af6fe3f67563f4709926d7f5',
   license = 'Apache2.0',
-  deps = [':core'],
+  deps = [':core-and-backward-codecs'],
   exclude = [
     'META-INF/LICENSE.txt',
     'META-INF/NOTICE.txt',
@@ -26,15 +38,16 @@
 )
 
 maven_jar(
-  name = 'backward-codecs',
+  name = 'backward-codecs_jar',
   id = 'org.apache.lucene:lucene-backward-codecs:' + VERSION,
   sha1 = '603d1f06b133449272799d698e5118db65e523ba',
   license = 'Apache2.0',
-  deps = [':core'],
+  deps = [':core_jar'],
   exclude = [
     'META-INF/LICENSE.txt',
     'META-INF/NOTICE.txt',
   ],
+  visibility = [],
 )
 
 maven_jar(
@@ -42,7 +55,7 @@
   id = 'org.apache.lucene:lucene-misc:' + VERSION,
   sha1 = 'be0a4f0ac06f0a2fa3689b4bf6cd1fe6847f9969',
   license = 'Apache2.0',
-  deps = [':core'],
+  deps = [':core-and-backward-codecs'],
   exclude = [
     'META-INF/LICENSE.txt',
     'META-INF/NOTICE.txt',
@@ -54,7 +67,7 @@
   id = 'org.apache.lucene:lucene-queryparser:' + VERSION,
   sha1 = '73be0a2d4ab3e6b574be1938bfb27f7f730f0ad9',
   license = 'Apache2.0',
-  deps = [':core'],
+  deps = [':core-and-backward-codecs'],
   exclude = [
     'META-INF/LICENSE.txt',
     'META-INF/NOTICE.txt',
diff --git a/lib/maven.defs b/lib/maven.defs
index 17f0e00..dd8097e 100644
--- a/lib/maven.defs
+++ b/lib/maven.defs
@@ -150,3 +150,30 @@
       visibility = visibility,
     )
 
+
+def merge_maven_jars(
+    name,
+    srcs,
+    visibility = []):
+
+  def cmd(jars):
+    return ('$(location //tools:merge_jars) $OUT '
+            + ' '.join(['$(location %s)' % j for j in jars]))
+
+  genrule(
+    name = '%s__merged_bin' % name,
+    cmd = cmd(['%s__download_bin' % s for s in srcs]),
+    out = '%s__merged.jar' % name,
+  )
+  genrule(
+    name = '%s__merged_src' % name,
+    cmd = cmd(['%s__download_src' % s for s in srcs]),
+    # tools/eclipse/project.py requires -src.jar suffix.
+    out = '%s__merged-src.jar' % name,
+  )
+  prebuilt_jar(
+    name = name,
+    binary_jar = ':%s__merged_bin' % name,
+    source_jar = ':%s__merged_src' % name,
+    visibility = visibility,
+  )
diff --git a/tools/BUCK b/tools/BUCK
index ee26062..0bdff3c 100644
--- a/tools/BUCK
+++ b/tools/BUCK
@@ -6,6 +6,12 @@
 )
 
 python_binary(
+  name = 'merge_jars',
+  main = 'merge_jars.py',
+  visibility = ['PUBLIC'],
+)
+
+python_binary(
   name = 'pack_war',
   main = 'pack_war.py',
   deps = [':util'],
diff --git a/tools/merge_jars.py b/tools/merge_jars.py
new file mode 100755
index 0000000..46016c0
--- /dev/null
+++ b/tools/merge_jars.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import collections
+import sys
+import zipfile
+
+
+if len(sys.argv) < 3:
+  print('usage: %s <out.zip> <in.zip>...' % sys.argv[0], file=sys.stderr)
+  exit(1)
+
+outfile = sys.argv[1]
+infiles = sys.argv[2:]
+seen = set()
+SERVICES = 'META-INF/services/'
+
+try:
+  with zipfile.ZipFile(outfile, 'w') as outzip:
+    services = collections.defaultdict(lambda: '')
+    for infile in infiles:
+      with zipfile.ZipFile(infile) as inzip:
+        for info in inzip.infolist():
+          n = info.filename
+          if n in seen:
+            continue
+          elif n.startswith(SERVICES):
+            # Concatenate all provider configuration files.
+            services[n] += inzip.read(n)
+            continue
+          outzip.writestr(info, inzip.read(n))
+          seen.add(n)
+
+    for n, v in services.iteritems():
+      outzip.writestr(n, v)
+except Exception as err:
+  exit('Failed to merge jars: %s' % err)