/** * Assigns the specified byte values to elements of the specified range of the specified array of * bytes. The range to be filled extends from index fromIndex, inclusive, to index toIndex, * exclusive. (If fromIndex==toIndex, the range to be filled is empty.) * * @param str the array to be filled * @param strFromIdx the index of the first element (inclusive) to be filled with the fill values * @param strToIdx the index of the last element (exclusive) to be filled with the fill values * @param fillArray the values to be stored in all elements of the array * @param fillFromIdx the index of the first element (inclusive) to be used as fill values * @param filToIdx the index of the last element (exclusive) to be used as fill value * @param invertFill if true inverts the bits in fill before filling the array */ public static void fill( byte[] str, int strFromIdx, int strToIdx, byte[] fillArray, int fillFromIdx, int fillToIdx, boolean invertFill) { rangeCheck(str.length, strFromIdx, strToIdx); rangeCheck(fillArray.length, fillFromIdx, fillToIdx); int strIdx = strFromIdx; byte[] fill = fillArray; int fillLen = fillToIdx - fillFromIdx; if (invertFill) fill = SortOrder.invert(fillArray, fillFromIdx, fillLen); while (strIdx < strToIdx) { int fillIdx = fillFromIdx; while (fillIdx < fillToIdx && strIdx < strToIdx) { if (strIdx + fillLen < fillToIdx) { System.arraycopy(fill, fillFromIdx, str, strIdx, fillLen); } else { str[strIdx++] = fill[fillIdx++]; } } } }
private static int getBytesInCharNoException(byte b, SortOrder sortOrder) { Preconditions.checkNotNull(sortOrder); if (sortOrder == SortOrder.DESC) { b = SortOrder.invert(b); } int c = b & 0xff; if ((c & BYTES_1_MASK) == 0) return 1; if ((c & BYTES_2_MASK) == 0xC0) return 2; if ((c & BYTES_3_MASK) == 0xE0) return 3; if ((c & BYTES_4_MASK) == 0xF0) return 4; return -1; }
public class StringUtil { public static final String EMPTY_STRING = ""; // Masks to determine how many bytes are in each character // From http://tools.ietf.org/html/rfc3629#section-3 public static final byte SPACE_UTF8 = 0x20; private static final int BYTES_1_MASK = 0xFF << 7; // 0xxxxxxx is a single byte char private static final int BYTES_2_MASK = 0xFF << 5; // 110xxxxx is a double byte char private static final int BYTES_3_MASK = 0xFF << 4; // 1110xxxx is a triple byte char private static final int BYTES_4_MASK = 0xFF << 3; // 11110xxx is a quadruple byte char public static final byte INVERTED_SPACE_UTF8 = SortOrder.invert(new byte[] {SPACE_UTF8}, 0, new byte[1], 0, 1)[0]; public static final char SINGLE_CHAR_WILDCARD = '?'; public static final char SINGLE_CHAR_LIKE = '_'; public static final char MULTI_CHAR_WILDCARD = '*'; public static final char MULTI_CHAR_LIKE = '%'; public static final String[] LIKE_ESCAPE_SEQS = new String[] {"\\" + SINGLE_CHAR_LIKE, "\\" + MULTI_CHAR_LIKE}; public static final String[] LIKE_UNESCAPED_SEQS = new String[] {"" + SINGLE_CHAR_LIKE, "" + MULTI_CHAR_LIKE}; private StringUtil() {} /** Replace instances of character ch in String value with String replacement */ public static String replaceChar(String value, char ch, CharSequence replacement) { if (value == null) return null; int i = value.indexOf(ch); if (i == -1) return value; // nothing to do // we've got at least one character to replace StringBuilder buf = new StringBuilder(value.length() + 16); // some extra space int j = 0; while (i != -1) { buf.append(value, j, i).append(replacement); j = i + 1; i = value.indexOf(ch, j); } if (j < value.length()) buf.append(value, j, value.length()); return buf.toString(); } /** * @return the replacement of all occurrences of src[i] with target[i] in s. Src and target are * not regex's so this uses simple searching with indexOf() */ public static String replace(String s, String[] src, String[] target) { assert src != null && target != null && src.length > 0 && src.length == target.length; if (src.length == 1 && src[0].length() == 1) { return replaceChar(s, src[0].charAt(0), target[0]); } if (s == null) return null; StringBuilder sb = new StringBuilder(s.length()); int pos = 0; int limit = s.length(); int lastMatch = 0; while (pos < limit) { boolean matched = false; for (int i = 0; i < src.length; i++) { if (s.startsWith(src[i], pos) && src[i].length() > 0) { // we found a matching pattern - append the acculumation plus the replacement sb.append(s.substring(lastMatch, pos)).append(target[i]); pos += src[i].length(); lastMatch = pos; matched = true; break; } } if (!matched) { // we didn't match any patterns, so move forward 1 character pos++; } } // see if we found any matches if (lastMatch == 0) { // we didn't match anything, so return the source string return s; } // apppend the trailing portion sb.append(s.substring(lastMatch)); return sb.toString(); } public static int getBytesInChar(byte b, SortOrder sortOrder) { int ret = getBytesInCharNoException(b, sortOrder); if (ret == -1) throw new UndecodableByteException(b); return ret; } private static int getBytesInCharNoException(byte b, SortOrder sortOrder) { Preconditions.checkNotNull(sortOrder); if (sortOrder == SortOrder.DESC) { b = SortOrder.invert(b); } int c = b & 0xff; if ((c & BYTES_1_MASK) == 0) return 1; if ((c & BYTES_2_MASK) == 0xC0) return 2; if ((c & BYTES_3_MASK) == 0xE0) return 3; if ((c & BYTES_4_MASK) == 0xF0) return 4; return -1; } public static int calculateUTF8Length(byte[] bytes, int offset, int length, SortOrder sortOrder) { int i = offset, endOffset = offset + length; length = 0; while (i < endOffset) { int charLength = getBytesInChar(bytes[i], sortOrder); i += charLength; length++; } return length; } // given an array of bytes containing utf-8 encoded strings, starting from curPos, ending before // range, and return the next character offset, -1 if no next character available or // UndecodableByteException private static int calculateNextCharOffset( byte[] bytes, int curPos, int range, SortOrder sortOrder) { int ret = getBytesInCharNoException(bytes[curPos], sortOrder); if (ret == -1) return -1; ret += curPos; if (ret >= range) return -1; return ret; } // given an array of bytes containing utf-8 encoded strings, starting from offset, and return // the previous character offset , -1 if UndecodableByteException. curPos points to current // character starting offset. private static int calculatePreCharOffset( byte[] bytes, int curPos, int offset, SortOrder sortOrder) { --curPos; for (int i = 1, pos = curPos - i + 1; i <= 4 && offset <= pos; ++i, --pos) { int ret = getBytesInCharNoException(bytes[pos], sortOrder); if (ret == i) return pos; } return -1; } // return actural offsetInBytes corresponding to offsetInStr in utf-8 encoded strings bytes // containing // @param bytes an array of bytes containing utf-8 encoded strings // @param offset // @param length // @param sortOrder // @param offsetInStr offset for utf-8 encoded strings bytes array containing. Can be negative // meaning counting from the end of encoded strings // @return actural offsetInBytes corresponding to offsetInStr. -1 if offsetInStr is out of index public static int calculateUTF8Offset( byte[] bytes, int offset, int length, SortOrder sortOrder, int offsetInStr) { if (offsetInStr == 0) return offset; int ret, range = offset + length; if (offsetInStr > 0) { ret = offset; while (offsetInStr > 0) { ret = calculateNextCharOffset(bytes, ret, range, sortOrder); if (ret == -1) return -1; --offsetInStr; } } else { ret = offset + length; while (offsetInStr < 0) { ret = calculatePreCharOffset(bytes, ret, offset, sortOrder); // if calculateCurCharOffset returns -1, ret must be smaller than offset if (ret < offset) return -1; ++offsetInStr; } } return ret; } // Given an array of bytes containing encoding utf-8 encoded strings, the offset and a length // parameter, return the actual index into the byte array which would represent a substring // of <length> starting from the character at <offset>. We assume the <offset> is the start // byte of an UTF-8 character. public static int getByteLengthForUtf8SubStr( byte[] bytes, int offset, int length, SortOrder sortOrder) { int byteLength = 0; while (length > 0 && offset + byteLength < bytes.length) { int charLength = getBytesInChar(bytes[offset + byteLength], sortOrder); byteLength += charLength; length--; } return byteLength; } public static boolean hasMultiByteChars(String s) { for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (c > 0x007F) { return true; } } return false; } public static int getFirstNonBlankCharIdxFromStart( byte[] string, int offset, int length, SortOrder sortOrder) { int i = offset; byte space = sortOrder == SortOrder.ASC ? SPACE_UTF8 : INVERTED_SPACE_UTF8; for (; i < offset + length; i++) { if (string[i] != space) { break; } } return i; } public static int getFirstNonBlankCharIdxFromEnd( byte[] string, int offset, int length, SortOrder sortOrder) { int i = offset + length - 1; byte space = sortOrder == SortOrder.ASC ? SPACE_UTF8 : INVERTED_SPACE_UTF8; for (; i >= offset; i--) { if (string[i] != space) { break; } } return i; } // A toBytes function backed up HBase's utility function, but would accept null input, in which // case it returns an empty byte array. public static byte[] toBytes(String input) { if (input == null) { return ByteUtil.EMPTY_BYTE_ARRAY; } return Bytes.toBytes(input); } public static String escapeLike(String s) { return replace(s, LIKE_UNESCAPED_SEQS, LIKE_ESCAPE_SEQS); } public static int getUnpaddedCharLength(byte[] b, int offset, int length, SortOrder sortOrder) { return getFirstNonBlankCharIdxFromEnd(b, offset, length, sortOrder) - offset + 1; } public static byte[] padChar(byte[] value, int offset, int length, int paddedLength) { byte[] key = new byte[paddedLength]; System.arraycopy(value, offset, key, 0, length); Arrays.fill(key, length, paddedLength, SPACE_UTF8); return key; } public static byte[] padChar(byte[] value, Integer byteSize) { byte[] newValue = Arrays.copyOf(value, byteSize); if (newValue.length > value.length) { Arrays.fill(newValue, value.length, newValue.length, SPACE_UTF8); } return newValue; } /** * Lame - StringBuilder.equals is retarded. * * @param b1 * @param b2 * @return whether or not the two builders consist the same sequence of characters */ public static boolean equals(StringBuilder b1, StringBuilder b2) { if (b1.length() != b2.length()) { return false; } for (int i = 0; i < b1.length(); i++) { if (b1.charAt(i) != b2.charAt(i)) { return false; } } return true; } /** * LPAD implementation * * @param str array containing string to be left padded * @param strOffset byte offset of string * @param strLength byte length of string * @param fill array containing fill values * @param fillOffset byte offset of fill * @param fillLength byte length of fill * @param invertFill if true inverts the bits in fill before filling the array * @param strWithPaddingLen length of the string that is returned with fill values left padded * @return byte[] containing left padded string */ public static byte[] lpad( byte[] str, int strOffset, int strLength, byte[] fill, int fillOffset, int fillLength, boolean invertFill, int strWithPaddingLen) { byte[] paddedStr = new byte[strWithPaddingLen]; int fillStopIdx = strWithPaddingLen - strLength; // copy fill into the start of paddedStr fill(paddedStr, 0, fillStopIdx, fill, fillOffset, fillOffset + fillLength, invertFill); // fill remaining characters with original string System.arraycopy(str, strOffset, paddedStr, fillStopIdx, strLength); return paddedStr; } /** * Assigns the specified byte values to elements of the specified range of the specified array of * bytes. The range to be filled extends from index fromIndex, inclusive, to index toIndex, * exclusive. (If fromIndex==toIndex, the range to be filled is empty.) * * @param str the array to be filled * @param strFromIdx the index of the first element (inclusive) to be filled with the fill values * @param strToIdx the index of the last element (exclusive) to be filled with the fill values * @param fillArray the values to be stored in all elements of the array * @param fillFromIdx the index of the first element (inclusive) to be used as fill values * @param filToIdx the index of the last element (exclusive) to be used as fill value * @param invertFill if true inverts the bits in fill before filling the array */ public static void fill( byte[] str, int strFromIdx, int strToIdx, byte[] fillArray, int fillFromIdx, int fillToIdx, boolean invertFill) { rangeCheck(str.length, strFromIdx, strToIdx); rangeCheck(fillArray.length, fillFromIdx, fillToIdx); int strIdx = strFromIdx; byte[] fill = fillArray; int fillLen = fillToIdx - fillFromIdx; if (invertFill) fill = SortOrder.invert(fillArray, fillFromIdx, fillLen); while (strIdx < strToIdx) { int fillIdx = fillFromIdx; while (fillIdx < fillToIdx && strIdx < strToIdx) { if (strIdx + fillLen < fillToIdx) { System.arraycopy(fill, fillFromIdx, str, strIdx, fillLen); } else { str[strIdx++] = fill[fillIdx++]; } } } } /** * Checks that fromIndex and toIndex are in the range and throws an appropriate exception, if they * are not */ private static void rangeCheck(int length, int fromIndex, int toIndex) { if (fromIndex > toIndex) { throw new IllegalArgumentException("fromIndex(" + fromIndex + ") > toIndex(" + toIndex + ")"); } if (fromIndex < 0) { throw new ArrayIndexOutOfBoundsException(fromIndex); } if (toIndex > length) { throw new ArrayIndexOutOfBoundsException(toIndex); } } public static String escapeStringConstant(String pattern) { return StringEscapeUtils.escapeSql(pattern); // Need to escape double quotes } public static String escapeBackslash(String input) { // see // http://stackoverflow.com/questions/4653831/regex-how-to-escape-backslashes-and-special-characters return input.replaceAll("\\\\", "\\\\\\\\"); } }
@Override protected RegionScanner doPostScannerOpen( final ObserverContext<RegionCoprocessorEnvironment> c, final Scan scan, final RegionScanner s) throws IOException { byte[] isUngroupedAgg = scan.getAttribute(BaseScannerRegionObserver.UNGROUPED_AGG); if (isUngroupedAgg == null) { return s; } final ScanProjector p = ScanProjector.deserializeProjectorFromScan(scan); final HashJoinInfo j = HashJoinInfo.deserializeHashJoinFromScan(scan); RegionScanner theScanner = s; if (p != null || j != null) { theScanner = new HashJoinRegionScanner(s, p, j, ScanUtil.getTenantId(scan), c.getEnvironment()); } final RegionScanner innerScanner = theScanner; byte[] indexUUID = scan.getAttribute(PhoenixIndexCodec.INDEX_UUID); PTable projectedTable = null; List<Expression> selectExpressions = null; byte[] upsertSelectTable = scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_TABLE); boolean isUpsert = false; boolean isDelete = false; byte[] deleteCQ = null; byte[] deleteCF = null; byte[][] values = null; byte[] emptyCF = null; ImmutableBytesWritable ptr = null; if (upsertSelectTable != null) { isUpsert = true; projectedTable = deserializeTable(upsertSelectTable); selectExpressions = deserializeExpressions(scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_EXPRS)); values = new byte[projectedTable.getPKColumns().size()][]; ptr = new ImmutableBytesWritable(); } else { byte[] isDeleteAgg = scan.getAttribute(BaseScannerRegionObserver.DELETE_AGG); isDelete = isDeleteAgg != null && Bytes.compareTo(PDataType.TRUE_BYTES, isDeleteAgg) == 0; if (!isDelete) { deleteCF = scan.getAttribute(BaseScannerRegionObserver.DELETE_CF); deleteCQ = scan.getAttribute(BaseScannerRegionObserver.DELETE_CQ); } emptyCF = scan.getAttribute(BaseScannerRegionObserver.EMPTY_CF); } int batchSize = 0; long ts = scan.getTimeRange().getMax(); HRegion region = c.getEnvironment().getRegion(); List<Mutation> mutations = Collections.emptyList(); if (isDelete || isUpsert || (deleteCQ != null && deleteCF != null) || emptyCF != null) { // TODO: size better mutations = Lists.newArrayListWithExpectedSize(1024); batchSize = c.getEnvironment() .getConfiguration() .getInt(MUTATE_BATCH_SIZE_ATTRIB, QueryServicesOptions.DEFAULT_MUTATE_BATCH_SIZE); } Aggregators aggregators = ServerAggregators.deserialize( scan.getAttribute(BaseScannerRegionObserver.AGGREGATORS), c.getEnvironment().getConfiguration()); Aggregator[] rowAggregators = aggregators.getAggregators(); boolean hasMore; boolean hasAny = false; MultiKeyValueTuple result = new MultiKeyValueTuple(); if (logger.isInfoEnabled()) { logger.info("Starting ungrouped coprocessor scan " + scan); } long rowCount = 0; region.startRegionOperation(); try { do { List<Cell> results = new ArrayList<Cell>(); // Results are potentially returned even when the return value of s.next is false // since this is an indication of whether or not there are more values after the // ones returned hasMore = innerScanner.nextRaw(results); if (!results.isEmpty()) { rowCount++; result.setKeyValues(results); try { if (isDelete) { // FIXME: the version of the Delete constructor without the lock args was introduced // in 0.94.4, thus if we try to use it here we can no longer use the 0.94.2 version // of the client. Cell firstKV = results.get(0); Delete delete = new Delete( firstKV.getRowArray(), firstKV.getRowOffset(), firstKV.getRowLength(), ts); mutations.add(delete); } else if (isUpsert) { Arrays.fill(values, null); int i = 0; List<PColumn> projectedColumns = projectedTable.getColumns(); for (; i < projectedTable.getPKColumns().size(); i++) { Expression expression = selectExpressions.get(i); if (expression.evaluate(result, ptr)) { values[i] = ptr.copyBytes(); // If SortOrder from expression in SELECT doesn't match the // column being projected into then invert the bits. if (expression.getSortOrder() != projectedColumns.get(i).getSortOrder()) { SortOrder.invert(values[i], 0, values[i], 0, values[i].length); } } } projectedTable.newKey(ptr, values); PRow row = projectedTable.newRow(kvBuilder, ts, ptr); for (; i < projectedColumns.size(); i++) { Expression expression = selectExpressions.get(i); if (expression.evaluate(result, ptr)) { PColumn column = projectedColumns.get(i); Object value = expression.getDataType().toObject(ptr, column.getSortOrder()); // We are guaranteed that the two column will have the same type. if (!column .getDataType() .isSizeCompatible( ptr, value, column.getDataType(), expression.getMaxLength(), expression.getScale(), column.getMaxLength(), column.getScale())) { throw new ValueTypeIncompatibleException( column.getDataType(), column.getMaxLength(), column.getScale()); } column .getDataType() .coerceBytes( ptr, value, expression.getDataType(), expression.getMaxLength(), expression.getScale(), expression.getSortOrder(), column.getMaxLength(), column.getScale(), column.getSortOrder()); byte[] bytes = ByteUtil.copyKeyBytesIfNecessary(ptr); row.setValue(column, bytes); } } for (Mutation mutation : row.toRowMutations()) { mutations.add(mutation); } } else if (deleteCF != null && deleteCQ != null) { // No need to search for delete column, since we project only it // if no empty key value is being set if (emptyCF == null || result.getValue(deleteCF, deleteCQ) != null) { Delete delete = new Delete( results.get(0).getRowArray(), results.get(0).getRowOffset(), results.get(0).getRowLength()); delete.deleteColumns(deleteCF, deleteCQ, ts); mutations.add(delete); } } if (emptyCF != null) { /* * If we've specified an emptyCF, then we need to insert an empty * key value "retroactively" for any key value that is visible at * the timestamp that the DDL was issued. Key values that are not * visible at this timestamp will not ever be projected up to * scans past this timestamp, so don't need to be considered. * We insert one empty key value per row per timestamp. */ Set<Long> timeStamps = Sets.newHashSetWithExpectedSize(results.size()); for (Cell kv : results) { long kvts = kv.getTimestamp(); if (!timeStamps.contains(kvts)) { Put put = new Put(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength()); put.add( emptyCF, QueryConstants.EMPTY_COLUMN_BYTES, kvts, ByteUtil.EMPTY_BYTE_ARRAY); mutations.add(put); } } } // Commit in batches based on UPSERT_BATCH_SIZE_ATTRIB in config if (!mutations.isEmpty() && batchSize > 0 && mutations.size() % batchSize == 0) { commitBatch(region, mutations, indexUUID); mutations.clear(); } } catch (ConstraintViolationException e) { // Log and ignore in count logger.error( "Failed to create row in " + region.getRegionNameAsString() + " with values " + SchemaUtil.toString(values), e); continue; } aggregators.aggregate(rowAggregators, result); hasAny = true; } } while (hasMore); } finally { innerScanner.close(); region.closeRegionOperation(); } if (logger.isInfoEnabled()) { logger.info("Finished scanning " + rowCount + " rows for ungrouped coprocessor scan " + scan); } if (!mutations.isEmpty()) { commitBatch(region, mutations, indexUUID); } final boolean hadAny = hasAny; KeyValue keyValue = null; if (hadAny) { byte[] value = aggregators.toBytes(rowAggregators); keyValue = KeyValueUtil.newKeyValue( UNGROUPED_AGG_ROW_KEY, SINGLE_COLUMN_FAMILY, SINGLE_COLUMN, AGG_TIMESTAMP, value, 0, value.length); } final KeyValue aggKeyValue = keyValue; RegionScanner scanner = new BaseRegionScanner() { private boolean done = !hadAny; @Override public HRegionInfo getRegionInfo() { return innerScanner.getRegionInfo(); } @Override public boolean isFilterDone() { return done; } @Override public void close() throws IOException { innerScanner.close(); } @Override public boolean next(List<Cell> results) throws IOException { if (done) return false; done = true; results.add(aggKeyValue); return false; } @Override public long getMaxResultSize() { return scan.getMaxResultSize(); } }; return scanner; }