| // Copyright (C) 2020 The Android Open Source Project |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package com.google.gerrit.server.patch.gitfilediff; |
| |
| import static java.nio.charset.StandardCharsets.UTF_8; |
| |
| import com.google.common.collect.ImmutableList; |
| import com.google.gerrit.entities.Patch; |
| import com.google.gerrit.entities.Patch.PatchType; |
| import java.util.Optional; |
| import org.eclipse.jgit.patch.CombinedFileHeader; |
| import org.eclipse.jgit.patch.FileHeader; |
| import org.eclipse.jgit.util.IntList; |
| import org.eclipse.jgit.util.RawParseUtils; |
| |
| /** A utility class for the {@link FileHeader} JGit object */ |
| public class FileHeaderUtil { |
| private static final Byte NUL = '\0'; |
| |
| /** |
| * The maximum number of characters to lookup in the binary file {@link FileHeader}. This is used |
| * to scan the file header for the occurrence of the {@link #NUL} character. |
| * |
| * <p>This limit assumes a uniform distribution of all characters, hence the probability of the |
| * occurrence of each character = (1 / 256). We want to find the limit that makes the prob. of |
| * finding {@link #NUL} > 0.999. 1 - (255 / 256) ^ N > 0.999 yields N = 1766. We set the limit to |
| * this value multiplied by 10 for more confidence. |
| */ |
| private static final int BIN_FILE_MAX_SCAN_LIMIT = 20000; |
| |
| /** Converts the {@link FileHeader} parameter to a String representation. */ |
| static String toString(FileHeader header) { |
| return new String(FileHeaderUtil.toByteArray(header), UTF_8); |
| } |
| |
| /** Converts the {@link FileHeader} parameter to a byte array. */ |
| static byte[] toByteArray(FileHeader header) { |
| int end = getEndOffset(header); |
| if (header.getStartOffset() == 0 && end == header.getBuffer().length) { |
| return header.getBuffer(); |
| } |
| |
| final byte[] buf = new byte[end - header.getStartOffset()]; |
| System.arraycopy(header.getBuffer(), header.getStartOffset(), buf, 0, buf.length); |
| return buf; |
| } |
| |
| /** Splits the {@code FileHeader} string to a list of strings, one string per header line. */ |
| public static ImmutableList<String> getHeaderLines(FileHeader fileHeader) { |
| String fileHeaderString = toString(fileHeader); |
| return getHeaderLines(fileHeaderString); |
| } |
| |
| public static ImmutableList<String> getHeaderLines(String header) { |
| return getHeaderLines(header.getBytes(UTF_8)); |
| } |
| |
| static ImmutableList<String> getHeaderLines(byte[] header) { |
| final IntList lineStartOffsets = RawParseUtils.lineMap(header, 0, header.length); |
| final ImmutableList.Builder<String> headerLines = |
| ImmutableList.builderWithExpectedSize(lineStartOffsets.size() - 1); |
| for (int i = 1; i < lineStartOffsets.size() - 1; i++) { |
| final int b = lineStartOffsets.get(i); |
| int e = lineStartOffsets.get(i + 1); |
| if (header[e - 1] == '\n') { |
| e--; |
| } |
| headerLines.add(RawParseUtils.decode(UTF_8, header, b, e)); |
| } |
| return headerLines.build(); |
| } |
| |
| /** |
| * Returns the old file path associated with the {@link FileHeader}, or empty if the file is |
| * {@link com.google.gerrit.entities.Patch.ChangeType#ADDED} or {@link |
| * com.google.gerrit.entities.Patch.ChangeType#REWRITE}. |
| */ |
| public static Optional<String> getOldPath(FileHeader header) { |
| Patch.ChangeType changeType = getChangeType(header); |
| switch (changeType) { |
| case DELETED: |
| case COPIED: |
| case RENAMED: |
| case MODIFIED: |
| return Optional.of(header.getOldPath()); |
| |
| case ADDED: |
| case REWRITE: |
| return Optional.empty(); |
| } |
| return Optional.empty(); |
| } |
| |
| /** |
| * Returns the new file path associated with the {@link FileHeader}, or empty if the file is |
| * {@link com.google.gerrit.entities.Patch.ChangeType#DELETED}. |
| */ |
| public static Optional<String> getNewPath(FileHeader header) { |
| Patch.ChangeType changeType = getChangeType(header); |
| switch (changeType) { |
| case DELETED: |
| return Optional.empty(); |
| |
| case ADDED: |
| case MODIFIED: |
| case REWRITE: |
| case COPIED: |
| case RENAMED: |
| return Optional.of(header.getNewPath()); |
| } |
| return Optional.empty(); |
| } |
| |
| /** Returns the change type associated with the file header. */ |
| public static Patch.ChangeType getChangeType(FileHeader header) { |
| // In Gerrit, we define our own entities of the JGit entities, so that we have full control |
| // over their behaviors (e.g. making sure that these entities are immutable so that we can add |
| // them as fields of keys / values of persisted caches). |
| |
| // TODO(ghareeb): remove the dead code of the value REWRITE and all its handling |
| switch (header.getChangeType()) { |
| case ADD: |
| return Patch.ChangeType.ADDED; |
| case MODIFY: |
| return Patch.ChangeType.MODIFIED; |
| case DELETE: |
| return Patch.ChangeType.DELETED; |
| case RENAME: |
| return Patch.ChangeType.RENAMED; |
| case COPY: |
| return Patch.ChangeType.COPIED; |
| default: |
| throw new IllegalArgumentException("Unsupported type " + header.getChangeType()); |
| } |
| } |
| |
| public static PatchType getPatchType(FileHeader header) { |
| PatchType patchType; |
| |
| switch (header.getPatchType()) { |
| case UNIFIED: |
| patchType = Patch.PatchType.UNIFIED; |
| break; |
| case GIT_BINARY: |
| case BINARY: |
| patchType = Patch.PatchType.BINARY; |
| break; |
| default: |
| throw new IllegalArgumentException("Unsupported type " + header.getPatchType()); |
| } |
| |
| if (patchType != PatchType.BINARY) { |
| byte[] buf = header.getBuffer(); |
| // TODO(ghareeb): should we adjust the max limit threshold? |
| // JGit sometimes misses the detection of binary files. In this case we look into the file |
| // header for the occurrence of NUL characters, which is a definite signal that the file is |
| // binary. We limit the number of characters to lookup to avoid performance bottlenecks. |
| for (int ptr = header.getStartOffset(); |
| ptr < Math.min(header.getEndOffset(), BIN_FILE_MAX_SCAN_LIMIT); |
| ptr++) { |
| if (buf[ptr] == NUL) { |
| // It's really binary, but Git couldn't see the nul early enough to realize its binary, |
| // and instead produced the diff. |
| // |
| // Force it to be a binary; it really should have been that. |
| return PatchType.BINARY; |
| } |
| } |
| } |
| return patchType; |
| } |
| |
| /** |
| * Returns the end offset of the diff header line of the {@code FileHeader parameter} before the |
| * appearance of any file edits (diff hunks). |
| */ |
| private static int getEndOffset(FileHeader fileHeader) { |
| if (fileHeader instanceof CombinedFileHeader) { |
| return fileHeader.getEndOffset(); |
| } |
| if (!fileHeader.getHunks().isEmpty()) { |
| return fileHeader.getHunks().get(0).getStartOffset(); |
| } |
| return fileHeader.getEndOffset(); |
| } |
| } |