| /* |
| * Copyright (C) 2008, 2019 Google Inc. and others |
| * |
| * This program and the accompanying materials are made available under the |
| * terms of the Eclipse Distribution License v. 1.0 which is available at |
| * https://www.eclipse.org/org/documents/edl-v10.php. |
| * |
| * SPDX-License-Identifier: BSD-3-Clause |
| */ |
| |
| package org.eclipse.jgit.util; |
| |
| import static java.nio.charset.StandardCharsets.UTF_8; |
| |
| import java.util.Arrays; |
| |
| import org.eclipse.jgit.lib.Constants; |
| |
| /** |
| * Utility functions related to quoted string handling. |
| */ |
| public abstract class QuotedString { |
| /** Quoting style that obeys the rules Git applies to file names */ |
| public static final GitPathStyle GIT_PATH = new GitPathStyle(true); |
| |
| /** |
| * Quoting style that obeys the rules Git applies to file names when |
| * {@code core.quotePath = false}. |
| * |
| * @since 5.6 |
| */ |
| public static final QuotedString GIT_PATH_MINIMAL = new GitPathStyle(false); |
| |
| /** |
| * Quoting style used by the Bourne shell. |
| * <p> |
| * Quotes are unconditionally inserted during {@link #quote(String)}. This |
| * protects shell meta-characters like <code>$</code> or <code>~</code> from |
| * being recognized as special. |
| */ |
| public static final BourneStyle BOURNE = new BourneStyle(); |
| |
| /** Bourne style, but permits <code>~user</code> at the start of the string. */ |
| public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle(); |
| |
| /** |
| * Quote an input string by the quoting rules. |
| * <p> |
| * If the input string does not require any quoting, the same String |
| * reference is returned to the caller. |
| * <p> |
| * Otherwise a quoted string is returned, including the opening and closing |
| * quotation marks at the start and end of the string. If the style does not |
| * permit raw Unicode characters then the string will first be encoded in |
| * UTF-8, with unprintable sequences possibly escaped by the rules. |
| * |
| * @param in |
| * any non-null Unicode string. |
| * @return a quoted string. See above for details. |
| */ |
| public abstract String quote(String in); |
| |
| /** |
| * Clean a previously quoted input, decoding the result via UTF-8. |
| * <p> |
| * This method must match quote such that: |
| * |
| * <pre> |
| * a.equals(dequote(quote(a))); |
| * </pre> |
| * |
| * is true for any <code>a</code>. |
| * |
| * @param in |
| * a Unicode string to remove quoting from. |
| * @return the cleaned string. |
| * @see #dequote(byte[], int, int) |
| */ |
| public String dequote(String in) { |
| final byte[] b = Constants.encode(in); |
| return dequote(b, 0, b.length); |
| } |
| |
| /** |
| * Decode a previously quoted input, scanning a UTF-8 encoded buffer. |
| * <p> |
| * This method must match quote such that: |
| * |
| * <pre> |
| * a.equals(dequote(Constants.encode(quote(a)))); |
| * </pre> |
| * |
| * is true for any <code>a</code>. |
| * <p> |
| * This method removes any opening/closing quotation marks added by |
| * {@link #quote(String)}. |
| * |
| * @param in |
| * the input buffer to parse. |
| * @param offset |
| * first position within <code>in</code> to scan. |
| * @param end |
| * one position past in <code>in</code> to scan. |
| * @return the cleaned string. |
| */ |
| public abstract String dequote(byte[] in, int offset, int end); |
| |
| /** |
| * Quoting style used by the Bourne shell. |
| * <p> |
| * Quotes are unconditionally inserted during {@link #quote(String)}. This |
| * protects shell meta-characters like <code>$</code> or <code>~</code> from |
| * being recognized as special. |
| */ |
| public static class BourneStyle extends QuotedString { |
| @Override |
| public String quote(String in) { |
| final StringBuilder r = new StringBuilder(); |
| r.append('\''); |
| int start = 0, i = 0; |
| for (; i < in.length(); i++) { |
| switch (in.charAt(i)) { |
| case '\'': |
| case '!': |
| r.append(in, start, i); |
| r.append('\''); |
| r.append('\\'); |
| r.append(in.charAt(i)); |
| r.append('\''); |
| start = i + 1; |
| break; |
| } |
| } |
| r.append(in, start, i); |
| r.append('\''); |
| return r.toString(); |
| } |
| |
| @Override |
| public String dequote(byte[] in, int ip, int ie) { |
| boolean inquote = false; |
| final byte[] r = new byte[ie - ip]; |
| int rPtr = 0; |
| while (ip < ie) { |
| final byte b = in[ip++]; |
| switch (b) { |
| case '\'': |
| inquote = !inquote; |
| continue; |
| case '\\': |
| if (inquote || ip == ie) |
| r[rPtr++] = b; // literal within a quote |
| else |
| r[rPtr++] = in[ip++]; |
| continue; |
| default: |
| r[rPtr++] = b; |
| continue; |
| } |
| } |
| return RawParseUtils.decode(UTF_8, r, 0, rPtr); |
| } |
| } |
| |
| /** Bourne style, but permits <code>~user</code> at the start of the string. */ |
| public static class BourneUserPathStyle extends BourneStyle { |
| @Override |
| public String quote(String in) { |
| if (in.matches("^~[A-Za-z0-9_-]+$")) { //$NON-NLS-1$ |
| // If the string is just "~user" we can assume they |
| // mean "~user/". |
| // |
| return in + "/"; //$NON-NLS-1$ |
| } |
| |
| if (in.matches("^~[A-Za-z0-9_-]*/.*$")) { //$NON-NLS-1$ |
| // If the string is of "~/path" or "~user/path" |
| // we must not escape ~/ or ~user/ from the shell. |
| // |
| final int i = in.indexOf('/') + 1; |
| if (i == in.length()) |
| return in; |
| return in.substring(0, i) + super.quote(in.substring(i)); |
| } |
| |
| return super.quote(in); |
| } |
| } |
| |
| /** Quoting style that obeys the rules Git applies to file names */ |
| public static final class GitPathStyle extends QuotedString { |
| private static final byte[] quote; |
| static { |
| quote = new byte[128]; |
| Arrays.fill(quote, (byte) -1); |
| |
| for (int i = '0'; i <= '9'; i++) |
| quote[i] = 0; |
| for (int i = 'a'; i <= 'z'; i++) |
| quote[i] = 0; |
| for (int i = 'A'; i <= 'Z'; i++) |
| quote[i] = 0; |
| quote[' '] = 0; |
| quote['$'] = 0; |
| quote['%'] = 0; |
| quote['&'] = 0; |
| quote['*'] = 0; |
| quote['+'] = 0; |
| quote[','] = 0; |
| quote['-'] = 0; |
| quote['.'] = 0; |
| quote['/'] = 0; |
| quote[':'] = 0; |
| quote[';'] = 0; |
| quote['='] = 0; |
| quote['?'] = 0; |
| quote['@'] = 0; |
| quote['_'] = 0; |
| quote['^'] = 0; |
| quote['|'] = 0; |
| quote['~'] = 0; |
| |
| quote['\u0007'] = 'a'; |
| quote['\b'] = 'b'; |
| quote['\f'] = 'f'; |
| quote['\n'] = 'n'; |
| quote['\r'] = 'r'; |
| quote['\t'] = 't'; |
| quote['\u000B'] = 'v'; |
| quote['\\'] = '\\'; |
| quote['"'] = '"'; |
| } |
| |
| private final boolean quoteHigh; |
| |
| @Override |
| public String quote(String instr) { |
| if (instr.isEmpty()) { |
| return "\"\""; //$NON-NLS-1$ |
| } |
| boolean reuse = true; |
| final byte[] in = Constants.encode(instr); |
| final byte[] out = new byte[4 * in.length + 2]; |
| int o = 0; |
| out[o++] = '"'; |
| for (byte element : in) { |
| final int c = element & 0xff; |
| if (c < quote.length) { |
| final byte style = quote[c]; |
| if (style == 0) { |
| out[o++] = (byte) c; |
| continue; |
| } |
| if (style > 0) { |
| reuse = false; |
| out[o++] = '\\'; |
| out[o++] = style; |
| continue; |
| } |
| } else if (!quoteHigh) { |
| out[o++] = (byte) c; |
| continue; |
| } |
| |
| reuse = false; |
| out[o++] = '\\'; |
| out[o++] = (byte) (((c >> 6) & 03) + '0'); |
| out[o++] = (byte) (((c >> 3) & 07) + '0'); |
| out[o++] = (byte) (((c >> 0) & 07) + '0'); |
| } |
| if (reuse) { |
| return instr; |
| } |
| out[o++] = '"'; |
| return new String(out, 0, o, UTF_8); |
| } |
| |
| @Override |
| public String dequote(byte[] in, int inPtr, int inEnd) { |
| if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"') |
| return dq(in, inPtr + 1, inEnd - 1); |
| return RawParseUtils.decode(UTF_8, in, inPtr, inEnd); |
| } |
| |
| private static String dq(byte[] in, int inPtr, int inEnd) { |
| final byte[] r = new byte[inEnd - inPtr]; |
| int rPtr = 0; |
| while (inPtr < inEnd) { |
| final byte b = in[inPtr++]; |
| if (b != '\\') { |
| r[rPtr++] = b; |
| continue; |
| } |
| |
| if (inPtr == inEnd) { |
| // Lone trailing backslash. Treat it as a literal. |
| // |
| r[rPtr++] = '\\'; |
| break; |
| } |
| |
| switch (in[inPtr++]) { |
| case 'a': |
| r[rPtr++] = 0x07 /* \a = BEL */; |
| continue; |
| case 'b': |
| r[rPtr++] = '\b'; |
| continue; |
| case 'f': |
| r[rPtr++] = '\f'; |
| continue; |
| case 'n': |
| r[rPtr++] = '\n'; |
| continue; |
| case 'r': |
| r[rPtr++] = '\r'; |
| continue; |
| case 't': |
| r[rPtr++] = '\t'; |
| continue; |
| case 'v': |
| r[rPtr++] = 0x0B/* \v = VT */; |
| continue; |
| |
| case '\\': |
| case '"': |
| r[rPtr++] = in[inPtr - 1]; |
| continue; |
| |
| case '0': |
| case '1': |
| case '2': |
| case '3': { |
| int cp = in[inPtr - 1] - '0'; |
| for (int n = 1; n < 3 && inPtr < inEnd; n++) { |
| final byte c = in[inPtr]; |
| if ('0' <= c && c <= '7') { |
| cp <<= 3; |
| cp |= c - '0'; |
| inPtr++; |
| } else { |
| break; |
| } |
| } |
| r[rPtr++] = (byte) cp; |
| continue; |
| } |
| |
| default: |
| // Any other code is taken literally. |
| // |
| r[rPtr++] = '\\'; |
| r[rPtr++] = in[inPtr - 1]; |
| continue; |
| } |
| } |
| |
| return RawParseUtils.decode(UTF_8, r, 0, rPtr); |
| } |
| |
| private GitPathStyle(boolean doQuote) { |
| quoteHigh = doQuote; |
| } |
| } |
| } |