blob: 6df88668c1b26dd0788798cef87167fe100f702c [file] [log] [blame]
// Copyright 2010 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gwtorm.nosql;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
/**
* Encoder support for {@link IndexFunction} computed strings.
*
* <p>This class provides a string that may contain multiple values, using delimiters between fields
* and big-endian encoded numerics. Sorting the resulting strings using unsigned byte orderings
* produces a stable sorting.
*
* <p>The encoding used by this class relies on having 258 tokens. To get the extra 2 tokens within
* a 256 byte range, escapes are used according to the following simple table:
*
* <ul>
* <li>delimiter = \x00\x01
* <li>byte \x00 = \x00\xff
* <li>byte \xff = \xff\x00
* <li>infinity = \xff\xff
* </ul>
*
* <p>Integers are encoded as variable length big-endian values, skipping leading zero bytes,
* prefixed by the number of bytes used to encode them. Therefore 0 is encoded as "\x00", and 256 is
* encoded as "\x02\x01\x00". Negative values are encoded in their twos complement encoding and
* therefore sort after the maximum positive value.
*
* <p>Strings and byte arrays supplied by the caller have their \x00 and \xff values escaped
* according to the table above, but are otherwise written as-is without a length prefix.
*
* <p>Callers are responsible for inserting {@link #delimiter()} markers at the appropriate
* positions in the sequence.
*/
public class IndexKeyBuilder {
private final ByteArrayOutputStream buf = new ByteArrayOutputStream();
/** Add a delimiter marker to the string. */
public void delimiter() {
buf.write(0x00);
buf.write(0x01);
}
/**
* Add the special infinity symbol to the string.
*
* <p>The infinity symbol sorts after all other values in the same position.
*/
public void infinity() {
buf.write(0xff);
buf.write(0xff);
}
/**
* Add \0 to the string.
*
* <p>\0 can be used during searches to enforce greater then or less then clauses in a query.
*/
public void nul() {
buf.write(0x00);
}
/**
* Add a raw sequence of bytes.
*
* <p>The bytes 0x00 and 0xff are escaped by this method according to the encoding table described
* in the class documentation.
*
* @param bin array to copy from.
* @param pos first index to copy.
* @param cnt number of bytes to copy.
*/
public void add(byte[] bin, int pos, int cnt) {
while (0 < cnt--) {
byte b = bin[pos++];
if (b == 0x00) {
buf.write(0x00);
buf.write(0xff);
} else if (b == -1) {
buf.write(0xff);
buf.write(0x00);
} else {
buf.write(b);
}
}
}
public void desc(byte[] bin, int pos, int cnt) {
while (0 < cnt--) {
int b = 0xff - (bin[pos++] & 0xff);
if (b == 0x00) {
buf.write(0x00);
buf.write(0xff);
} else if (b == 0xff) {
buf.write(0xff);
buf.write(0x00);
} else {
buf.write(b);
}
}
}
/**
* Add a raw sequence of bytes.
*
* <p>The bytes 0x00 and 0xff are escaped by this method according to the encoding table described
* in the class documentation.
*
* @param bin the complete array to copy.
*/
public void add(byte[] bin) {
add(bin, 0, bin.length);
}
public void desc(byte[] bin) {
desc(bin, 0, bin.length);
}
/**
* Encode a string into UTF-8 and append as a sequence of bytes.
*
* @param str the string to encode and append.
*/
public void add(String str) {
try {
add(str.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("JVM does not support UTF-8", e);
}
}
public void desc(String str) {
try {
desc(str.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("JVM does not support UTF-8", e);
}
}
/**
* Add a single character as though it were part of a UTF-8 string.
*
* @param ch the character to encode and append.
*/
public void add(char ch) {
if (ch == 0x00) {
buf.write(0x00);
buf.write(0xff);
} else if (ch <= 255) {
buf.write(ch);
} else {
add(Character.toString(ch));
}
}
public void desc(char ch) {
desc(Character.toString(ch));
}
/**
* Add an integer value as a big-endian variable length integer.
*
* @param val the value to add.
*/
public void add(long val) {
final byte[] t = new byte[9];
int i = t.length;
while (val != 0) {
t[--i] = (byte) (val & 0xff);
val >>>= 8;
}
t[i - 1] = (byte) (t.length - i);
buf.write(t, i - 1, t.length - i + 1);
}
public void desc(long val) {
add(~val);
}
/**
* Add a byte array as-is, without escaping.
*
* <p>This should only be used the byte array came from a prior index key and the caller is trying
* to create a new key with this key embedded at the end.
*
* @param bin the binary to append as-is, without further escaping.
*/
public void addRaw(byte[] bin) {
buf.write(bin, 0, bin.length);
}
/**
* Obtain a copy of the internal storage array.
*
* @return the current state of this, converted into a flat byte array.
*/
public byte[] toByteArray() {
return buf.toByteArray();
}
}