blob: 62141b34bd7ee925083456fda8ec803acfcf1cf5 [file] [log] [blame]
Matthias Sohn57822582010-02-12 01:49:34 +01001#!/usr/bin/perl
2# ------------------------------------------------------------
3# This script fixes the license headers of all Java sources
4# to use the Eclipse EDL license template and updates the
5# copyright statements using author information from git blame
6#
7# To fix this in all revisions rewrite the history
8# git filter-branch --tree-filter 'fixHeaders.pl' HEAD
9# ------------------------------------------------------------
10use strict;
11
12# Table of author names, start date, end date, actual copyright owner.
13#
14my @author_employers = (
15 [ qr/spearce\@spearce.org/, 2008, 8, 9999, 12, 'Google Inc.'],
16
17 [ qr/\@(.*\.|)google.com/, 0, 0, 9999, 12, 'Google Inc.'],
18);
19
20# License text itself.
21#
22my $license_text = <<'EOF';
23 and other copyright owners as documented in the project's IP log.
24
25 This program and the accompanying materials are made available
26 under the terms of the Eclipse Distribution License v1.0 which
27 accompanies this distribution, is reproduced below, and is
28 available at http://www.eclipse.org/org/documents/edl-v10.php
29
30 All rights reserved.
31
32 Redistribution and use in source and binary forms, with or
33 without modification, are permitted provided that the following
34 conditions are met:
35
36 - Redistributions of source code must retain the above copyright
37 notice, this list of conditions and the following disclaimer.
38
39 - Redistributions in binary form must reproduce the above
40 copyright notice, this list of conditions and the following
41 disclaimer in the documentation and/or other materials provided
42 with the distribution.
43
44 - Neither the name of the Eclipse Foundation, Inc. nor the
45 names of its contributors may be used to endorse or promote
46 products derived from this software without specific prior
47 written permission.
48
49 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
50 CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
51 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
52 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
54 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
56 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
57 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
58 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
59 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
60 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
61 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62EOF
63
64my @files = @ARGV;
65unless (@files) {
66 open( F, '-|', 'git ls-files' );
67 @files = <F>;
68 chop @files;
69 close F;
70}
71
72foreach (@files) {
73 if (/\.java$/ || $_ eq 'LICENSE') {
74 next if $_ eq 'org.eclipse.jgit/src/org/eclipse/jgit/util/Base64.java';
75 update_file(\&java_file, $_);
76
77 } elsif (/pom\.xml$/) {
78 update_file(\&pom_file, $_);
79
80 } elsif (/\.sh$/) {
81 update_file(\&sh_file, $_);
82 }
83}
84
85sub java_file
86{
87 my $fd = shift;
88 my $header = '';
89 my $preamble = '';
90
91 # header is everything before package statement
92 while (<$fd>) {
93 if (/^package /) {
94 $preamble = $_;
95 last;
96 }
97 $header .= $_;
98 }
99
100 # preamble is everything with blanks or imports
101 while (<$fd>) {
102 $preamble .= $_;
103 last unless (/^import / || /^$/);
104 }
105 my $lineno = $. - 1;
106
107 return ($header, $preamble, $lineno,
108 "/*\n", sub { s/^/ */mg }, " */\n");
109}
110
111sub pom_file
112{
113 my $fd = shift;
114 my $header = '';
115 my $preamble = '';
116
117 # header is everything before project
118 while (<$fd>) {
119 if (/<project/) {
120 $preamble = $_;
121 last;
122 }
123 $header .= $_;
124 }
125 my $lineno = $. - 1;
126
127 return ($header, $preamble, $lineno,
128 qq{<?xml version="1.0" encoding="UTF-8"?>\n<!--\n},
129 sub { s/^(.)/ $1/mg },
130 qq{-->\n});
131}
132
133sub sh_file
134{
135 my $fd = shift;
136 my $top = <$fd>;
137 my $header = '';
138 my $preamble = '';
139
140 while (<$fd>) {
141 if (/^#/) {
142 $header .= $_;
143 next;
144 }
145 $preamble = $_;
146 last;
147 }
148 my $lineno = $. - 1;
149
150 return ($header, $preamble, $lineno, $top, sub { s/^/#/mg }, "");
151}
152
153sub update_file
154{
155 my $func = shift;
156 my $old_file = shift;
157 my $new_file = "$old_file.license.$$";
158
159 open(I, '<', $old_file);
160 my ($header, $preamble, $lineno,
161 $top, $fmt, $btm) = &{$func}(\*I);
162
163 my %all_years;
164 my %author_years;
165 my %minyear;
166 my %maxyear;
167
168 # find explicit copyright statements in sources
169 my @lines = split( /\n/, $header );
170 foreach my $line ( @lines ) {
171 # * Copyright (c) 2008, Example Company Inc.
172 # * Copyright (c) 2008, Joe Developer <joe.dev@example.org>
173 # * Copyright (c) 2008, 2009 Joe Developer <joe.dev@example.org>
174 # * Copyright (c) 2005-2009 Joe Developer <joe.dev@example.org>
175 # * Copyright (c) 2008, 2009 Other Examples Inc.
176 # * Copyright (c) 2008-2010 Example Company Inc.
177 # * Copyright (C) 2009-2010, Yet More Examples Ltd.
178 if( $line =~ m/Copyright \(c\) (\d{4})(?:\s*[,-]\s*(\d{4}))?,?\s*([^<>]+)\s*(<.*?>)?/i ) {
179 my ($y, $y2, $n, $e) = ($1, $2, $3, $4);
180 my $year = trim($y);
181 my $author_name = trim($n);
182 my $author_email = trim($e);
183 my $who = $author_name;
184 $who .= " $author_email" if $author_email;
185 update_author_info(\%minyear, \%maxyear, \%all_years, \%author_years, $who, $year);
186 if (my $year2 = $y2) {
187 update_author_info(\%minyear, \%maxyear, \%all_years, \%author_years, $who, $year2);
188 }
189 }
190 }
191
192 if ($old_file eq 'LICENSE') {
193 } else {
194 # add implicit copyright statements from authors found in git blame
195 my (%line_counts, %line_authors);
196 my ($last_commit, $author_name, $author_email);
197 my @blame_args = ('git', 'blame', "-L$lineno,", '-C', '-w', '-p');
198 push(@blame_args, $ENV{'GIT_COMMIT'}) if $ENV{'GIT_COMMIT'};
199 push(@blame_args, '--', $old_file);
200 open( B, '-|', @blame_args);
201 while (<B>) {
202 chop;
203 if (/^([0-9a-f]{40}) \d+ \d+ (\d+)$/) {
204 $last_commit = $1;
205 $line_counts{$1} += $2;
206 next;
207 }
208 if (/^author (.*)$/) {
209 $author_name = trim($1);
210 next;
211 }
212 if (/^author-mail (<.*>)$/) {
213 $author_email = trim($1);
214 next;
215 }
216 if (/^author-time (\d+)$/) {
217 # skip uncommitted changes
218 my $who = "$author_name $author_email";
219 next if $who eq 'Not Committed Yet <not.committed.yet>';
220 my @tm = localtime($1);
221 my $year = $tm[5] + 1900;
222 my $mon = $tm[4] + 1;
223 $who = translate_author($who, $year, $mon);
224 $line_authors{$last_commit} = [$who, $year, $mon];
225 }
226 }
227 close B;
228
229 my %author_linecounts;
230 foreach $last_commit (keys %line_counts) {
231 my $who = $line_authors{$last_commit}[0];
232 next unless $who;
233 $author_linecounts{$who} += $line_counts{$last_commit};
234 }
235
236 my $sz = 100;
237 my $count_big = 0;
238 foreach (values %author_linecounts) {
239 $count_big++ if $_ >= $sz;
240 }
241
242 my $added_count = 0;
243 foreach (values %line_authors) {
244 my ($who, $year, $mon) = @$_;
245 next if ($count_big && $author_linecounts{$who} < $sz);
246 $all_years{$year} = 1;
247 update_author_info(\%minyear, \%maxyear, \%all_years, \%author_years, $who, $year, $mon);
248 }
249 }
250
251 # rewrite file
252 open( O, '>', $new_file );
253 print O $top;
254
255 my %used_author;
256 foreach my $year ( sort { $a cmp $b } keys %all_years ) {
257 foreach my $who ( sort keys %author_years ) {
258 next if $used_author{$who}++;
259 local $_ = format_copyright($minyear{$who}, $maxyear{$who}, $who);
260 &{$fmt}();
261 print O;
262 }
263 }
264
265 local $_ = $license_text;
266 &{$fmt}();
267 print O;
268 print O $btm;
269 print O "\n";
270 print O $preamble;
271 print O while <I>;
272 close I;
273 close O;
274
275 rename( $new_file, $old_file );
276}
277
278sub trim($)
279{
280 my $string = shift;
281 $string =~ s/^\s+//;
282 $string =~ s/\s+$//;
283 return $string;
284}
285
286sub update_author_info
287{
288 my ($minyear_ref, $maxyear_ref, $all_years_ref, $author_years_ref, $who, $year, $mon) = @_;
289
290 $who = translate_author($who, $year, $mon);
291 $all_years_ref->{$year} = 1;
292 $author_years_ref->{$who}{$year} = 1;
293
294 my $y = $minyear_ref->{$who};
295 if ($y < 1900) {
296 $y = 9999;
297 }
298 if ($year < $y) {
299 $minyear_ref->{$who} = $year;
300 }
301 $y = $maxyear_ref->{$who};
302 if ($year > $y) {
303 $maxyear_ref->{$who} = $year;
304 }
305}
306
307sub date_cmp
308{
309 my ($a_year, $a_mon, $b_year, $b_mon) = @_;
310
311 if ($a_year < $b_year) {
312 return -1;
313 } elsif ($a_year == $b_year) {
314 return ($a_mon <=> $b_mon);
315 } else {
316 return 1;
317 }
318}
319
320sub translate_author
321{
322 my ($who, $year, $mon) = @_;
323
324 return $who if not defined $mon;
325
326 foreach my $spec (@author_employers) {
327 next unless $who =~ $spec->[0];
328 next if (date_cmp($year, $mon, $spec->[1], $spec->[2]) < 0);
329 next if (date_cmp($year, $mon, $spec->[3], $spec->[4]) > 0);
330 return $spec->[5];
331 }
332 return $who;
333}
334
335sub format_copyright {
336 my ($minyear, $maxyear, $who) = @_;
337 if ($minyear < $maxyear) {
338 return " Copyright (C) $minyear-$maxyear, $who\n";
339 } else {
340 return " Copyright (C) $minyear, $who\n";
341 }
342}
343