private Map<String, Object> consumeMap(PushbackInputStream in, ResourceFieldSchema fieldSchema) throws IOException { int buf; while ((buf = in.read()) != '[') { if (buf == -1) { throw new IOException("Unexpect end of map"); } } HashMap<String, Object> m = new HashMap<String, Object>(); ByteArrayOutputStream mOut = new ByteArrayOutputStream(BUFFER_SIZE); while (true) { // Read key (assume key can not contains special character such as #, (, [, {, }, ], ) while ((buf = in.read()) != '#') { if (buf == -1) { throw new IOException("Unexpect end of map"); } mOut.write(buf); } String key = bytesToCharArray(mOut.toByteArray()); if (key.length() == 0) throw new IOException("Map key can not be null"); // Read value mOut.reset(); Deque<Character> level = new LinkedList< Character>(); // keep track of nested tuple/bag/map. We do not interpret, save them as // bytearray while (true) { buf = in.read(); if (buf == -1) { throw new IOException("Unexpect end of map"); } if (buf == '[' || buf == '{' || buf == '(') { level.push((char) buf); } else if (buf == ']' && level.isEmpty()) // End of map break; else if (buf == ']' || buf == '}' || buf == ')') { if (level.isEmpty()) throw new IOException("Malformed map"); if (level.peek() == findStartChar((char) buf)) level.pop(); } else if (buf == ',' && level.isEmpty()) { // Current map item complete break; } mOut.write(buf); } Object value = null; if (fieldSchema != null && fieldSchema.getSchema() != null && mOut.size() > 0) { value = bytesToObject(mOut.toByteArray(), fieldSchema.getSchema().getFields()[0]); } else if (mOut.size() > 0) { // untyped map value = new DataByteArray(mOut.toByteArray()); } m.put(key, value); mOut.reset(); if (buf == ']') break; } return m; }
private DataBag consumeBag(PushbackInputStream in, ResourceFieldSchema fieldSchema) throws IOException { if (fieldSchema == null) { throw new IOException("Schema is null"); } ResourceFieldSchema[] fss = fieldSchema.getSchema().getFields(); Tuple t; int buf; while ((buf = in.read()) != '{') { if (buf == -1) { throw new IOException("Unexpect end of bag"); } } if (fss.length != 1) throw new IOException("Only tuple is allowed inside bag schema"); ResourceFieldSchema fs = fss[0]; DataBag db = DefaultBagFactory.getInstance().newDefaultBag(); while (true) { t = consumeTuple(in, fs); if (t != null) db.add(t); while ((buf = in.read()) != '}' && buf != ',') { if (buf == -1) { throw new IOException("Unexpect end of bag"); } } if (buf == '}') break; } return db; }
private Tuple consumeTuple(PushbackInputStream in, ResourceFieldSchema fieldSchema) throws IOException { if (fieldSchema == null) { throw new IOException("Schema is null"); } int buf; ByteArrayOutputStream mOut; while ((buf = in.read()) != '(' || buf == '}') { if (buf == -1) { throw new IOException("Unexpect end of tuple"); } if (buf == '}') { in.unread(buf); return null; } } Tuple t = TupleFactory.getInstance().newTuple(); if (fieldSchema.getSchema() != null && fieldSchema.getSchema().getFields().length != 0) { ResourceFieldSchema[] fss = fieldSchema.getSchema().getFields(); // Interpret item inside tuple one by one based on the inner schema for (int i = 0; i < fss.length; i++) { Object field; ResourceFieldSchema fs = fss[i]; int delimit = ','; if (i == fss.length - 1) delimit = ')'; if (DataType.isComplex(fs.getType())) { field = consumeComplexType(in, fs); while ((buf = in.read()) != delimit) { if (buf == -1) { throw new IOException("Unexpect end of tuple"); } } } else { mOut = new ByteArrayOutputStream(BUFFER_SIZE); while ((buf = in.read()) != delimit) { if (buf == -1) { throw new IOException("Unexpect end of tuple"); } if (buf == delimit) break; mOut.write(buf); } field = parseSimpleType(mOut.toByteArray(), fs); } t.append(field); } } else { // No inner schema, treat everything inside tuple as bytearray Deque<Character> level = new LinkedList< Character>(); // keep track of nested tuple/bag/map. We do not interpret, save them as // bytearray mOut = new ByteArrayOutputStream(BUFFER_SIZE); while (true) { buf = in.read(); if (buf == -1) { throw new IOException("Unexpect end of tuple"); } if (buf == '[' || buf == '{' || buf == '(') { level.push((char) buf); mOut.write(buf); } else if (buf == ')' && level.isEmpty()) // End of tuple { DataByteArray value = new DataByteArray(mOut.toByteArray()); t.append(value); break; } else if (buf == ',' && level.isEmpty()) { DataByteArray value = new DataByteArray(mOut.toByteArray()); t.append(value); mOut.reset(); } else if (buf == ']' || buf == '}' || buf == ')') { if (level.peek() == findStartChar((char) buf)) level.pop(); else throw new IOException("Malformed tuple"); mOut.write(buf); } else mOut.write(buf); } } return t; }
public boolean write( Object object, ResourceFieldSchema field, Generator generator, boolean writeFieldName) { byte type = (field != null ? field.getType() : DataType.findType(object)); if (writeFieldName) { generator.writeFieldName(alias.toES(field.getName())); } if (object == null) { generator.writeNull(); return true; } switch (type) { case DataType.ERROR: case DataType.UNKNOWN: return handleUnknown(object, field, generator); case DataType.NULL: generator.writeNull(); break; case DataType.BOOLEAN: generator.writeBoolean((Boolean) object); break; case DataType.INTEGER: generator.writeNumber(((Number) object).intValue()); break; case DataType.LONG: generator.writeNumber(((Number) object).longValue()); break; case DataType.FLOAT: generator.writeNumber(((Number) object).floatValue()); break; case DataType.DOUBLE: generator.writeNumber(((Number) object).doubleValue()); break; case DataType.BYTE: generator.writeNumber((Byte) object); break; case DataType.CHARARRAY: generator.writeString(object.toString()); break; case DataType.BYTEARRAY: generator.writeBinary(((DataByteArray) object).get()); break; // DateTime introduced in Pig 11 case 30: // DataType.DATETIME generator.writeString(PigUtils.convertDateToES(object)); break; // DateTime introduced in Pig 12 case 65: // DataType.BIGINTEGER throw new SerializationException( "Big integers are not supported by Elasticsearch - consider using a different type (such as string)"); // DateTime introduced in Pig 12 case 70: // DataType.BIGDECIMAL throw new SerializationException( "Big decimals are not supported by Elasticsearch - consider using a different type (such as string)"); case DataType.MAP: ResourceSchema nestedSchema = field.getSchema(); // empty tuple shortcut if (nestedSchema == null) { generator.writeBeginObject(); generator.writeEndObject(); break; } ResourceFieldSchema[] nestedFields = nestedSchema.getFields(); generator.writeBeginObject(); // Pig maps are actually String -> Object association so we can save the key right away for (Map.Entry<?, ?> entry : ((Map<?, ?>) object).entrySet()) { generator.writeFieldName(alias.toES(entry.getKey().toString())); write(entry.getValue(), nestedFields[0], generator, false); } generator.writeEndObject(); break; case DataType.TUPLE: nestedSchema = field.getSchema(); // empty tuple shortcut if (nestedSchema == null) { generator.writeBeginObject(); generator.writeEndObject(); break; } nestedFields = nestedSchema.getFields(); // use getAll instead of get(int) to avoid having to handle Exception... List<Object> tuples = ((Tuple) object).getAll(); generator.writeBeginObject(); for (int i = 0; i < nestedFields.length; i++) { String name = nestedFields[i].getName(); // handle schemas without names name = (StringUtils.hasText(name) ? alias.toES(name) : Integer.toString(i)); generator.writeFieldName(name); write(tuples.get(i), nestedFields[i], generator, false); } generator.writeEndObject(); break; case DataType.BAG: nestedSchema = field.getSchema(); // empty tuple shortcut if (nestedSchema == null) { generator.writeBeginArray(); generator.writeEndArray(); break; } ResourceFieldSchema bagType = nestedSchema.getFields()[0]; generator.writeBeginArray(); for (Tuple tuple : (DataBag) object) { write(tuple, bagType, generator, false); } generator.writeEndArray(); break; default: if (writeUnknownTypes) { return handleUnknown(object, field, generator); } return false; } return true; }