/** * Parse a string value and covert it to its proper data type. * * @param value * @return The parsed value. * @throws ParsingException Thrown if not Advisory is available and there was an error parsing. */ @SuppressWarnings("unchecked") public static <T> T parseValue(String value) throws ParsingException { final T result; assert value != null; /* * todo Restructure the whole class so I've got parse and parseArray as * static methods and the share code properly. */ if ("null".equals(value)) { result = null; } else { final List<Object> list = new ArrayList<>(); final Text t = new Text(); t.append(value); if (any(t, list) && t.isEof()) { result = (T) list.get(0); } else { error("Count not parse value: " + value); result = null; } } return result; }
private static boolean number(Text t, DataType requiredType, List<Object> valueList) { // posNumber // : cardinality // | '0b' BinaryDigits+ [zZ]? // | '0x' HexDigit+ [zZ]? // | '-'? Pint? ( '.' Digit+ ) ('e' Pint) [fF] // | '-'? Pint [zZfF]? // ; final boolean result; assert !t.isEof() : "Should have been checked by caller"; if (binary(t, valueList, requiredType) || hex(t, valueList, requiredType)) { // ?todo I could allow negatives here // ( binary | hex | '0' ) result = true; } else { // ( Int DecPart | Int | DecPart ) ( 'e' Int )? [zZfF]? final int start = t.cursor(); if ((t.consumeInt() && (decPart(t) || true) && (exponent(t) || true)) || (t.consume('-') || true) && decPart(t) && (exponent(t) || true)) { final String string = t.getString(start); final Object value = deriveType(string, requiredType, t, 10); valueList.add(value); result = true; } else { result = false; } } return result; }
/** * Parse a value. * * @param tb Source. * @param type The type of the array, may be any.. * @param valueList Result added here. * @return Return true if a value has been added to the valueList and the cursor advanced or false * and the cursor is as it was. */ private static boolean value(Text t, DataType type, List<Object> valueList) { boolean result; if (t.isEof()) { result = false; } else if (t.consume("null")) { valueList.add(null); result = true; } else { switch (type) { case z: case Z: case f: case F: result = number(t, type, valueList); break; case text: result = text(t, valueList); break; case identifier: result = identifier(t, valueList); break; case path: result = path(t, valueList); break; case datetime: result = datetime(t, valueList); break; case date: result = date(t, valueList); break; case time: result = time(t, valueList); break; case bool: result = bool(t, valueList); break; case cardinality: result = cardinality(t, valueList); break; case any: result = any(t, valueList); break; default: throw new UnexpectedException("value: " + type); } } return result; }
/** * Given the parsing context and what numerical data type is expected convert a string to the * correct type. Note no attempt is made to let the magnitude of the number influence our choice. * * @param string The string to convert to a number. E.g. "123.3e2". If it contains a '.' or an 'e' * then the type must either be f or F. * @param requiredType Either z, Z, f, F or any. * @param tb The source. The cursor will be at the end of the number but any type specifier will * not have been consumed. If there is one then we'll eat it. * @return The derived type. * @throws ParsingException If there is a clash of types. */ private static Object deriveType(String string, DataType requiredType, Text t, int radix) { final Object result; // Figure out the correct type... final DataType derivedType; if (t.isEof()) { if (requiredType == DataType.any) { if (string.indexOf('.') >= 0 || string.indexOf('e') >= 0) { derivedType = DataType.f; } else { derivedType = DataType.z; } } else { derivedType = requiredType; } } else { final char c = t.peek(); if (c == 'z' || c == 'Z' || c == 'f' || c == 'F') { t.consume(c); derivedType = DataType.valueOf(String.valueOf(c)); if (!(requiredType == DataType.any || requiredType == derivedType)) { throw new ParsingException("Incompatible type: " + string + c); } } else { if (requiredType == DataType.any) { if (string.indexOf('.') >= 0 || string.indexOf('e') >= 0) { derivedType = DataType.f; } else { derivedType = DataType.z; } } else { derivedType = requiredType; } } } switch (derivedType) { case z: result = new Long(Long.parseLong(string, radix)); break; case Z: result = new BigInteger(string, radix); break; case f: result = new Double(string); break; case F: result = new BigDecimal(string); break; // $CASES-OMITTED$ default: throw new UnexpectedException("toType: " + derivedType); } return result; }
private static boolean decPart(Text t) { final boolean result; // '.' Digits+ final int save = t.cursor(); if (t.consume('.') && t.consumeAscii(Text.ASCII_0_9)) { result = true; } else { t.setCursor(save); result = false; } return result; }
private static boolean exponent(Text t) { final boolean result; // 'e' Int final int save = t.cursor(); if (t.consume('e') && t.consumeInt()) { result = true; } else { t.setCursor(save); result = false; } return result; }
private static boolean identifier(Text t, List<Object> valueList) { final boolean result; final int start = t.cursor(); if (Identifier.consume(t)) { final String string = t.getString(start); valueList.add(new Identifier(string)); result = true; } else { result = false; } return result; }
private static boolean bool(Text t, List<Object> valueList) { final boolean result; if (t.consume("true")) { valueList.add(Boolean.TRUE); result = true; } else if (t.consume("false")) { valueList.add(Boolean.FALSE); result = true; } else { result = false; } return result; }
private static boolean date(Text t, List<Object> valueList) { final boolean result; final int start = t.cursor(); if (t.consume('@') && date(t)) { final String string = t.getString(start + 1); // Jump the'@' final LocalDate value = DateU.parseStandardDate(string); valueList.add(value); result = true; } else { result = false; } return result; }
private static boolean text(Text t, List<Object> valueList) { final boolean result; final int start = t.cursor(); if (t.consumeString()) { // +/- 1s: drop the quotes final String textValue = unescape(t, start + 1, t.cursor() - 1); valueList.add(textValue); result = true; } else { // ?todo Could easily detect unterminated string and throw here result = false; } return result; }
private static boolean binary(Text t, List<Object> valueList, DataType requiredType) { final boolean result; // '0b' [01]+ [zZfF]? final int save = t.cursor(); if (t.consume("0b") && t.consumeAscii(Text.ASCII_0_1)) { final String string = t.getString(save + 2); final Object value = deriveType(string, requiredType, t, 2); valueList.add(value); result = true; } else { t.setCursor(save); result = false; } return result; }
/** * Convert a value to an Oak markup representation. * * @param value The value to represent. * @param tb The buffer in which to write the markup. */ public void toMarkup(Object value, Text t) { if (value == null) { t.append("null"); } else { switch (this) { case z: case Z: case f: case F: case bool: t.append(value.toString()); break; case cardinality: ((Cardinality) value).toString(t); break; case text: escapeForOak((String) value, true, t); break; case identifier: t.append(value.toString()); break; case path: ((Path) value).toString(t); break; case datetime: t.append('@'); t.append(DateU.formatStandardDatetime((LocalDateTime) value)); break; case date: t.append('@'); t.append(DateU.formatStandardDate((LocalDate) value)); break; case time: t.append('@'); t.append(DateU.formatStandardTime((LocalTime) value)); break; case any: default: throw new UnexpectedException("asString: " + this); } } }
public static void toString(Object value, Text t) { if (value == null) { t.append("null"); } else { final Class<? extends Object> clazz = value.getClass(); final DataType type = getDataType(clazz); if (type == null) { throw new RuntimeException("Invalid type: " + clazz.getName()); } type.toMarkup(value, t); } }
/** * Convert a potentially escaped string to text. * * @param t The source string. * @param start Starting offset. * @param end Ending offset. * @return Return the un-escaped string which may be zero-length but not null. */ private static String unescape(Text t, int start, int end) { final Text result = new Text(); /* * We know that there's a starting and ending quote that's been removed * from the string but otherwise we can't trust the contents. */ for (int i = start; i < end; i++) { final char c = t.charAt(i); final char escaped; if (c == '\\') { if (i < end - 1) { i++; final char next = t.charAt(i); if (next == 't') { escaped = '\t'; } else if (next == 'n') { escaped = '\n'; } else if (next == '"') { escaped = '"'; } else if (next == '\\') { escaped = '\\'; } else { error("Invalid escape: '\\" + next + '\''); escaped = next; } } else { error("Unterminated string"); escaped = c; } } else { escaped = c; } result.append(escaped); } return result.toString(); }
/** * There's a bug in this as it requires seconds in the string whereas the time() function does * not. It seems to be in the Java parser. * * @param tb Source to parse. * @return A Temporal or null if none found. */ @Nullable private static Temporal temporal(Text t) { Temporal result; final int save = t.cursor(); t.consume('@'); final int start = save + 1; // yyyy/MM/dd HH:mm:ss try { if (date(t)) { final int save2 = t.cursor(); t.ws(); if (time(t)) { final String string = t.getString(start); result = DateU.parseStandardDatetime(string); } else { t.setCursor(save2); final String string = t.getString(start); result = DateU.parseStandardDate(string); } } else if (time(t)) { final String string = t.getString(start); result = DateU.parseStandardTime(string); } else { result = null; t.setCursor(save); } } catch (final DateTimeParseException e) { result = null; t.setCursor(save); } return result; }
private static boolean time(Text t) { final boolean result; // HH ':' mm ( : ss )? // Try for HH:mm... int reset = t.cursor(); if (t.consumeAscii(Text.ASCII_0_9) && t.consume(':') && t.consumeAscii(Text.ASCII_0_9)) { result = true; reset = t.cursor(); // Try for :ss... if (t.consume(':') && t.consumeAscii(Text.ASCII_0_9)) { reset = t.cursor(); } } else { result = false; } t.setCursor(reset); return result; }
/** * Escape a string for Oak. If the string is null then "null" is returned otherwise the string is * returned with [tn\"] escaped \r discarded and the whole thing surrounded in quotes if * requested. * * @param string The String to process. * @param quote Double quotes are added if true. * @param result Where to place the processed String. */ public static void escapeForOak(String string, boolean quote, Text result) { if (string == null) { result.append("null"); } else { if (quote) { result.append('"'); } final char[] ca = string.toCharArray(); for (final char c : ca) { switch (c) { case '\t': result.append("\\t"); break; case '\n': result.append("\\n"); break; case '\"': result.append("\\\""); break; case '\\': result.append("\\\\"); break; default: if (c < ' ' || c >= '~') { // Must be four digits result.append("\\u" + Integer.toHexString(c | 0x10000).substring(1)); } else { result.append(c); } } } if (quote) { result.append('"'); } } }
/** * Convert a list of values to an Oak markup String, e.g. "[ true, null, false ]". * * @param values The list of values * @return An Oak markup representation of the values. * @see #parse(String) */ public String toString(List<Object> values) { assert values != null : "Arrays can't be null, only empty"; final Text t = new Text(true); t.append('['); if (values.size() > 0) { t.space(); for (final Object value : values) { t.delimit(); toMarkup(value, t); } t.space(); } t.append(']'); return t.toString(); }
private static boolean date(Text t) { final boolean result; // yyyy '/' MM '/' dd final int save = t.cursor(); if (t.consumeAscii(Text.ASCII_0_9) && t.consume('/') && t.consumeAscii(Text.ASCII_0_9) && t.consume('/') && t.consumeAscii(Text.ASCII_0_9)) { result = true; } else { result = false; t.setCursor(save); } return result; }
private static boolean any(Text t, List<Object> valueList) { final boolean result; // value // : Text // | Path // | Date | Time | DateTime // | 'null' // | 'true' | 'false' // | z | Z | f | F // | Identifier // | Cardinality // ; /* * Try and parse a value, we don't know the type so use the first * character as an indicator to jump to the right type. * * We know we're not EOF and "null" has been dealt with by the caller. */ final char c = t.peek(); if (c == '"') { // Text result = text(t, valueList); } else if (c == '`') { // Path result = path(t, valueList); } else if (c == '@') { // Temporal result = datetime(t, valueList) // Must be first || date(t, valueList) || time(t, valueList); } else if (c >= '0' && c <= '9' || c == '.' || c == '-') { result = cardinality(t, valueList) || number(t, DataType.any, valueList); } else { result = bool(t, valueList) // Must be first || identifier(t, valueList); } return result; }
private static List<Object> elementList(Text t, DataType type) { final List<Object> result = new ArrayList<>(); // elementList: ( WS? value ( WS? ',' WS? value )* WS? )? if (t.ws() && value(t, type, result)) { while (true) { final int save = t.cursor(); if (t.ws() && t.consume(',') && t.ws() && value(t, type, result)) { continue; } t.setCursor(save); break; } } return result; }
/** * Parse a string into a list of data values. White space is supported only in between the [square * brackets]. * * @param string A string representation of an Oak property array, e.g. [ true, null, false ] * @param type The type of the array, may be 'any'. * @return A potentially empty but not null list of values. */ @SuppressWarnings("unchecked") public static <T> List<T> parseArray(String string, DataType type) { final List<?> result; final Text t = new Text(); t.append(string); if (t.consume('[') && (result = elementList(t, type)) != null && (t.ws() && t.consume(']')) && t.isEof()) { // OK } else { throw new ParsingException("Invalid array: " + string); } return (List<T>) result; }
/** * Convert a value to an Oak markup representation. * * @param value The value to represent. * @return E.g. `/Some/Path` * @see #parse(String) */ public final String toString(Object value) { final Text t = new Text(); toMarkup(value, t); return t.toString(); }
/** * In Oak it is possible to embed a tab character as an ASCII 8 or as a \t but internally we store * this in ASCII. The same goes for newlines and other tables (see the Oak reference). Here we * convert external format to internal format. * * @param source The text to process, may be null. E.g. "Hello\tworld" (with the quotes) * @return Return an internal format string. */ @Nullable public static String textToInternalFormat(@Nullable String source) throws ParsingException { final String result; assert source == null || source.length() >= 2 && source.charAt(0) == '"' && source.charAt(source.length() - 1) == '"'; if (source == null) { result = null; } else { final Text t = new Text(); final char[] ca = source.toCharArray(); final int length = ca.length - 1; // 1 because remove quotes for (int i = 1; i < length; i++) { // 1 because remove quotes final char c = ca[i]; if (c == '\\') { if (i == length - 1) { error("Invalid text " + source + ", escape at end of line"); } else { i++; final char next = ca[i]; if (next == 't') { t.append('\t'); } else if (next == 'n') { t.append('\n'); } else if (next == '"') { t.append('"'); } else if (next == '\\') { t.append('\\'); } else if (next == 'u') { // Unicode, 1-4 hex characters... i++; final Text hex = new Text(); hex.append(source); hex.setCursor(i); final int start = i; if (hex.consumeAscii(Text.ASCII_0_F)) { final int end = start + Math.min(4, hex.cursor() - start); final String string = hex.getString(start, end); final char u = (char) Integer.parseInt(string, 16); t.append(u); i = end - 1; } else { error("Invalid text " + source + ", incorrect unicode"); } } else { error("Invalid text: \\" + next); } } } else { t.append(c); } } result = t.toString(); } return result; }