@SqlNullable @Description("extract query parameter from url") @ScalarFunction @LiteralParameters({"x", "y"}) @SqlType("varchar(x)") public static Slice urlExtractParameter( @SqlType("varchar(x)") Slice url, @SqlType("varchar(y)") Slice parameterName) { URI uri = parseUrl(url); if ((uri == null) || (uri.getQuery() == null)) { return null; } Slice query = slice(uri.getQuery()); String parameter = parameterName.toStringUtf8(); Iterable<String> queryArgs = QUERY_SPLITTER.split(query.toStringUtf8()); for (String queryArg : queryArgs) { Iterator<String> arg = ARG_SPLITTER.split(queryArg).iterator(); if (arg.next().equals(parameter)) { if (arg.hasNext()) { return utf8Slice(arg.next()); } // first matched key is empty return Slices.EMPTY_SLICE; } } // no key matched return null; }
@Override public void serialize(MaxOrMinByState state, BlockBuilder out) { SliceOutput sliceOutput = new DynamicSliceOutput((int) state.getEstimatedSize()); int keyLength = 0; if (state.getKey() != null && !state.getKey().isNull(0)) { keyLength = state.getKey().getLength(0); } sliceOutput.writeInt(keyLength); int valueLength = 0; if (state.getValue() != null && !state.getValue().isNull(0)) { valueLength = state.getValue().getLength(0); } sliceOutput.writeInt(valueLength); if (state.getKey() != null && !state.getKey().isNull(0)) { appendTo(keyType, sliceOutput, state.getKey()); } if (state.getValue() != null && !state.getValue().isNull(0)) { appendTo(valueType, sliceOutput, state.getValue()); } Slice slice = sliceOutput.slice(); out.writeBytes(slice, 0, slice.length()); out.closeEntry(); }
@Description("substring of given length starting at an index") @ScalarFunction @SqlType(VarcharType.class) public static Slice substr( @SqlType(VarcharType.class) Slice slice, @SqlType(BigintType.class) long start, @SqlType(BigintType.class) long length) { if ((start == 0) || (length <= 0)) { return Slices.EMPTY_SLICE; } if (start > 0) { // make start zero-based start--; } else { // negative start is relative to end of string start += slice.length(); if (start < 0) { return Slices.EMPTY_SLICE; } } if ((start + length) > slice.length()) { length = slice.length() - start; } if (start >= slice.length()) { return Slices.EMPTY_SLICE; } return slice.slice((int) start, (int) length); }
@Description("removes spaces from the beginning of a string") @ScalarFunction("ltrim") @SqlType(VarcharType.class) public static Slice leftTrim(@SqlType(VarcharType.class) Slice slice) { int start = firstNonSpace(slice); return slice.slice(start, slice.length() - start); }
private static int firstNonSpace(Slice slice) { for (int i = 0; i < slice.length(); i++) { if (slice.getByte(i) != ' ') { return i; } } return slice.length(); }
@Description("convert ASCII character code to string") @ScalarFunction @SqlType(VarcharType.class) public static Slice chr(@SqlType(BigintType.class) long n) { Slice slice = Slices.allocate(1); slice.setByte(0, Ints.saturatedCast(n)); return slice; }
private static int lastNonSpace(Slice slice) { for (int i = slice.length() - 1; i >= 0; i--) { if (slice.getByte(i) != ' ') { return i; } } return -1; }
@Description("converts all the alphabets in the string to upper case") @ScalarFunction @SqlType(VarcharType.class) public static Slice upper(@SqlType(VarcharType.class) Slice slice) { Slice upper = Slices.allocate(slice.length()); for (int i = 0; i < slice.length(); i++) { upper.setByte(i, Ascii.toUpperCase((char) slice.getByte(i))); } return upper; }
private static Object nativeContainerToOrcValue(Type type, Object nativeValue) { if (nativeValue == null) { return null; } if (type instanceof DecimalType) { BigInteger unscaledValue; DecimalType decimalType = (DecimalType) type; if (decimalType.isShort()) { unscaledValue = BigInteger.valueOf((long) nativeValue); } else { unscaledValue = Decimals.decodeUnscaledValue((Slice) nativeValue); } return HiveDecimal.create(unscaledValue, decimalType.getScale()); } if (type.getJavaType() == boolean.class) { return nativeValue; } if (type.getJavaType() == long.class) { return nativeValue; } if (type.getJavaType() == double.class) { return nativeValue; } if (type.getJavaType() == Slice.class) { Slice slice = (Slice) nativeValue; return type instanceof VarcharType ? slice.toStringUtf8() : slice.getBytes(); } if (isArrayType(type)) { Block arrayBlock = (Block) nativeValue; Type elementType = type.getTypeParameters().get(0); List<Object> list = new ArrayList<>(); for (int i = 0; i < arrayBlock.getPositionCount(); i++) { list.add( nativeContainerToOrcValue( elementType, getNativeContainerValue(elementType, arrayBlock, i))); } return list; } if (isMapType(type)) { Block mapBlock = (Block) nativeValue; Type keyType = type.getTypeParameters().get(0); Type valueType = type.getTypeParameters().get(1); Map<Object, Object> map = new HashMap<>(); for (int i = 0; i < mapBlock.getPositionCount(); i += 2) { Object key = nativeContainerToOrcValue(keyType, getNativeContainerValue(keyType, mapBlock, i)); Object value = nativeContainerToOrcValue( valueType, getNativeContainerValue(valueType, mapBlock, i + 1)); map.put(key, value); } return map; } throw new PrestoException(INTERNAL_ERROR, "Unimplemented type: " + type); }
@Description("greedily replaces occurrences of a pattern with a string") @ScalarFunction @SqlType(VarcharType.class) public static Slice replace( @SqlType(VarcharType.class) Slice str, @SqlType(VarcharType.class) Slice search, @SqlType(VarcharType.class) Slice replace) { String replaced = str.toString(Charsets.UTF_8) .replace(search.toString(Charsets.UTF_8), replace.toString(Charsets.UTF_8)); return Slices.copiedBuffer(replaced, Charsets.UTF_8); }
public void setString(int columnIndex, String value) { Integer reuseIndex = stringReferences.get(value); if (reuseIndex != null) { bufferSlice.setInt(getOffset(columnIndex), reuseIndex); } else { int index = stringReferences.size(); stringReferences.put(value, index); bufferSlice.setInt(getOffset(columnIndex), index); stringReferenceSize += value.length() * 2 + 4; // assuming size of char = size of byte * 2 + length } clearNull(columnIndex); }
@Description("removes spaces from the beginning and end of a string") @ScalarFunction @SqlType(VarcharType.class) public static Slice trim(@SqlType(VarcharType.class) Slice slice) { int start = firstNonSpace(slice); if (start == slice.length()) { return Slices.EMPTY_SLICE; } int end = lastNonSpace(slice); assert (end >= 0) && (end >= start); return slice.slice(start, (end - start) + 1); }
@Description("removes spaces from the end of a string") @ScalarFunction("rtrim") @SqlType(VarcharType.class) public static Slice rightTrim(@SqlType(VarcharType.class) Slice slice) { int end = lastNonSpace(slice); return slice.slice(0, end + 1); }
public void addRecord() { // record header bufferSlice.setInt( position, nextVariableLengthDataOffset); // nextVariableLengthDataOffset means record size bufferSlice.setBytes(position + 4, nullBitSet); count++; this.position += nextVariableLengthDataOffset; this.nextVariableLengthDataOffset = fixedRecordSize; Arrays.fill(nullBitSet, (byte) -1); // flush if next record will not fit in this buffer if (buffer.capacity() < position + nextVariableLengthDataOffset + stringReferenceSize) { flush(); } }
@Description("suffix starting at given index") @ScalarFunction @SqlType(VarcharType.class) public static Slice substr( @SqlType(VarcharType.class) Slice slice, @SqlType(BigintType.class) long start) { return substr(slice, start, slice.length()); }
@ScalarFunction(value = "at_time_zone", hidden = true) @SqlType(TimestampWithTimeZoneType.class) public static long timestampAtTimeZone( @SqlType(TimestampWithTimeZoneType.class) long timestampWithTimeZone, @SqlType(VarcharType.class) Slice zoneId) { return packDateTimeWithZone(unpackMillisUtc(timestampWithTimeZone), zoneId.toStringUtf8()); }
public ChannelBuilder(Type type, int blockSize) { checkNotNull(type, "type is null"); this.type = type; this.slice = Slices.allocate(blockSize); this.sliceOutput = slice.getOutput(); this.positionOffsets = new IntArrayList(1024); }
@Nullable private static URI parseUrl(Slice url) { try { return new URI(url.toStringUtf8()); } catch (URISyntaxException e) { return null; } }
@Description("parses the specified date/time by the given format") @ScalarFunction @SqlType(TimestampWithTimeZoneType.class) public static long parseDatetime( ConnectorSession session, @SqlType(VarcharType.class) Slice datetime, @SqlType(VarcharType.class) Slice formatString) { String pattern = formatString.toString(Charsets.UTF_8); DateTimeFormatter formatter = DateTimeFormat.forPattern(pattern) .withChronology(getChronology(session.getTimeZoneKey())) .withOffsetParsed() .withLocale(session.getLocale()); String datetimeString = datetime.toString(Charsets.UTF_8); return DateTimeZoneIndex.packDateTimeWithZone(parseDateTimeHelper(formatter, datetimeString)); }
private static boolean valueEquals( Type type, Slice leftSlice, int leftOffset, Slice rightSlice, int rightOffset) { // check if null flags are the same boolean leftIsNull = leftSlice.getByte(leftOffset) != 0; boolean rightIsNull = rightSlice.getByte(rightOffset) != 0; if (leftIsNull != rightIsNull) { return false; } // if values are both null, they are equal if (leftIsNull) { return true; } if (type == Type.FIXED_INT_64 || type == Type.DOUBLE) { long leftValue = leftSlice.getLong(leftOffset + SIZE_OF_BYTE); long rightValue = rightSlice.getLong(rightOffset + SIZE_OF_BYTE); return leftValue == rightValue; } else if (type == Type.BOOLEAN) { boolean leftValue = leftSlice.getByte(leftOffset + SIZE_OF_BYTE) != 0; boolean rightValue = rightSlice.getByte(rightOffset + SIZE_OF_BYTE) != 0; return leftValue == rightValue; } else if (type == Type.VARIABLE_BINARY) { int leftLength = getVariableBinaryLength(leftSlice, leftOffset); int rightLength = getVariableBinaryLength(rightSlice, rightOffset); return leftSlice.equals( leftOffset + SIZE_OF_BYTE + SIZE_OF_INT, leftLength, rightSlice, rightOffset + SIZE_OF_BYTE + SIZE_OF_INT, rightLength); } else { throw new IllegalArgumentException("Unsupported type " + type); } }
@Description("escape a string for use in URL query parameter names and values") @ScalarFunction @LiteralParameters({"x", "y"}) @Constraint(variable = "y", expression = "min(2147483647, x * 12)") @SqlType("varchar(y)") public static Slice urlEncode(@SqlType("varchar(x)") Slice value) { Escaper escaper = UrlEscapers.urlFormParameterEscaper(); return slice(escaper.escape(value.toStringUtf8())); }
private static Slice formatDatetime( ISOChronology chronology, Locale locale, long timestamp, Slice formatString) { String pattern = formatString.toString(Charsets.UTF_8); DateTimeFormatter formatter = DateTimeFormat.forPattern(pattern).withChronology(chronology).withLocale(locale); String datetimeString = formatter.print(timestamp); return Slices.wrappedBuffer(datetimeString.getBytes(Charsets.UTF_8)); }
public void appendTo(int position, BlockBuilder builder) { checkArgument(position >= 0 && position < positionOffsets.size()); int offset = positionOffsets.getInt(position); if (slice.getByte(offset) != 0) { builder.appendNull(); } else if (type == Type.FIXED_INT_64) { builder.append(slice.getLong(offset + SIZE_OF_BYTE)); } else if (type == Type.DOUBLE) { builder.append(slice.getDouble(offset + SIZE_OF_BYTE)); } else if (type == Type.BOOLEAN) { builder.append(slice.getByte(offset + SIZE_OF_BYTE) != 0); } else if (type == Type.VARIABLE_BINARY) { int sliceLength = getVariableBinaryLength(slice, offset); builder.append(slice.slice(offset + SIZE_OF_BYTE + SIZE_OF_INT, sliceLength)); } else { throw new IllegalArgumentException("Unsupported type " + type); } }
private static int valueHashCode(Type type, Slice slice, int offset) { boolean isNull = slice.getByte(offset) != 0; if (isNull) { return 0; } if (type == Type.FIXED_INT_64) { return Longs.hashCode(slice.getLong(offset + SIZE_OF_BYTE)); } else if (type == Type.DOUBLE) { long longValue = Double.doubleToLongBits(slice.getDouble(offset + SIZE_OF_BYTE)); return Longs.hashCode(longValue); } else if (type == Type.BOOLEAN) { return slice.getByte(offset + SIZE_OF_BYTE) != 0 ? 1 : 0; } else if (type == Type.VARIABLE_BINARY) { int sliceLength = getVariableBinaryLength(slice, offset); return slice.hashCode(offset + SIZE_OF_BYTE + SIZE_OF_INT, sliceLength); } else { throw new IllegalArgumentException("Unsupported type " + type); } }
@Test public void testRewriter() throws Exception { OrcStorageManager manager = createOrcStorageManager(); long transactionId = TRANSACTION_ID; List<Long> columnIds = ImmutableList.of(3L, 7L); List<Type> columnTypes = ImmutableList.<Type>of(BIGINT, createVarcharType(10)); // create file with 2 rows StoragePageSink sink = createStoragePageSink(manager, columnIds, columnTypes); List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello").row(456L, "bye").build(); sink.appendPages(pages); List<ShardInfo> shards = getFutureValue(sink.commit()); assertEquals(shardRecorder.getShards().size(), 1); // delete one row BitSet rowsToDelete = new BitSet(); rowsToDelete.set(0); Collection<Slice> fragments = manager.rewriteShard( transactionId, OptionalInt.empty(), shards.get(0).getShardUuid(), rowsToDelete); Slice shardDelta = Iterables.getOnlyElement(fragments); ShardDelta shardDeltas = jsonCodec(ShardDelta.class).fromJson(shardDelta.getBytes()); ShardInfo shardInfo = Iterables.getOnlyElement(shardDeltas.getNewShards()); // check that output file has one row assertEquals(shardInfo.getRowCount(), 1); // check that storage file is same as backup file File storageFile = storageService.getStorageFile(shardInfo.getShardUuid()); File backupFile = fileBackupStore.getBackupFile(shardInfo.getShardUuid()); assertFileEquals(storageFile, backupFile); // verify recorded shard List<RecordedShard> recordedShards = shardRecorder.getShards(); assertEquals(recordedShards.size(), 2); assertEquals(recordedShards.get(1).getTransactionId(), TRANSACTION_ID); assertEquals(recordedShards.get(1).getShardUuid(), shardInfo.getShardUuid()); }
private Block createZeroBlock(Type type, int rowsCount, Slice constantSlice) { checkArgument(isSupportedType(type), "Unsupported type [%s]", type); Slice slice; // do not exceed varchar limit if (isVarcharType(type)) { slice = constantSlice.slice( 0, Math.min(((VarcharType) type).getLength(), constantSlice.length())); } else if (isLongDecimal(type)) { slice = encodeScaledValue(ZERO); } else { slice = constantSlice; } BlockBuilder builder; if (type instanceof FixedWidthType) { builder = type.createBlockBuilder(new BlockBuilderStatus(), rowsCount); } else { builder = type.createBlockBuilder(new BlockBuilderStatus(), rowsCount, slice.length()); } for (int i = 0; i < rowsCount; i++) { Class<?> javaType = type.getJavaType(); if (javaType == boolean.class) { type.writeBoolean(builder, false); } else if (javaType == long.class) { type.writeLong(builder, 0); } else if (javaType == double.class) { type.writeDouble(builder, 0.0); } else if (javaType == Slice.class) { requireNonNull(slice, "slice is null"); type.writeSlice(builder, slice, 0, slice.length()); } else { throw new UnsupportedOperationException("Unknown javaType: " + javaType.getName()); } } return builder.build(); }
@Description("unescape a URL-encoded string") @ScalarFunction @LiteralParameters("x") @SqlType("varchar(x)") public static Slice urlDecode(@SqlType("varchar(x)") Slice value) { try { return slice(URLDecoder.decode(value.toStringUtf8(), UTF_8.name())); } catch (UnsupportedEncodingException e) { throw new AssertionError(e); } catch (IllegalArgumentException e) { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, e); } }
private void doFlush() { if (buffer != null && count > 0) { // write page header bufferSlice.setInt(0, count); buffer.limit(position); // flush page Page page = Page.wrap(buffer).setStringReferences(getSortedStringReferences()); buffer = null; bufferSlice = null; output.add(page); } }
private static DateTimeField getTimeField(ISOChronology chronology, Slice unit) { String unitString = unit.toString(Charsets.UTF_8).toLowerCase(); switch (unitString) { case "second": return chronology.secondOfMinute(); case "minute": return chronology.minuteOfHour(); case "hour": return chronology.hourOfDay(); default: throw new IllegalArgumentException("'" + unitString + "' is not a valid Time field"); } }
@Description("reverses the given string") @ScalarFunction @SqlType(VarcharType.class) public static Slice reverse(@SqlType(VarcharType.class) Slice slice) { Slice reverse = Slices.allocate(slice.length()); for (int i = 0, j = slice.length() - 1; i < slice.length(); i++, j--) { reverse.setByte(j, slice.getByte(i)); } return reverse; }