private void writeContainerless(Record src, OutputStream dst) { try { GenericDatumWriter datumWriter = new GenericDatumWriter(); Encoder encoder = null; Schema schema = null; for (Object attachment : src.get(Fields.ATTACHMENT_BODY)) { Preconditions.checkNotNull(attachment); GenericContainer datum = (GenericContainer) attachment; schema = getSchema(datum, schema); assert schema != null; datumWriter.setSchema(schema); if (encoder == null) { // init if (format == Format.containerlessJSON) { encoder = EncoderFactory.get().jsonEncoder(schema, dst); } else { encoder = EncoderFactory.get().binaryEncoder(dst, null); } assert encoder != null; } datumWriter.write(datum, encoder); } encoder.flush(); } catch (IOException e) { throw new MorphlineRuntimeException(e); } }
public void toStream(final OutputStream stream, final Object object) throws IOException { final Object generic = encodeNode(object); final Encoder encoder = EncoderFactory.get().directBinaryEncoder(stream, null); final DatumWriter<Object> writer = new GenericDatumWriter<Object>(Schemas.NODE); writer.write(generic, encoder); encoder.flush(); }
public static void tweetToAvro(ByteArrayOutputStream out, Status status, Schema schema) throws IOException { GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<>(schema); Encoder e = EncoderFactory.get().binaryEncoder(out, null); GenericRecord tweet = buildTweet(schema, status); gdw.write(tweet, e); e.flush(); }
public void toStream( final OutputStream stream, final Record object, @Nullable final Set<URI> propertiesToSerialize) throws IOException { final Object generic = encodeRecord(object, propertiesToSerialize); final Encoder encoder = EncoderFactory.get().directBinaryEncoder(stream, null); final DatumWriter<Object> writer = new GenericDatumWriter<Object>(Schemas.NODE); writer.write(generic, encoder); encoder.flush(); }
@Override @SuppressWarnings({"rawtypes", "unchecked"}) protected void writeMap(Schema schema, Object datum, Encoder out) throws IOException { if (writeDirtyBits) { // write extra state information for maps StatefulMap<Utf8, ?> map = (StatefulMap) datum; out.writeInt(map.states().size()); for (Entry<Utf8, State> e2 : map.states().entrySet()) { out.writeString(e2.getKey()); out.writeInt(e2.getValue().ordinal()); } } super.writeMap(schema, datum, out); }
public static void checkBlockingBinary( Schema schema, Object datum, DatumWriter<Object> writer, DatumReader<Object> reader) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); writer.setSchema(schema); Encoder encoder = EncoderFactory.get().blockingBinaryEncoder(out, null); writer.write(datum, encoder); encoder.flush(); byte[] data = out.toByteArray(); reader.setSchema(schema); Object decoded = reader.read(null, DecoderFactory.get().binaryDecoder(data, null)); assertEquals("Decoded data does not match.", datum, decoded); }
/** * Given an entity, an avro schema, and an encoder, write the entity to the encoder's underlying * output stream. * * @param entity The entity we want to encode. * @param encoder The Avro Encoder we will write to. * @param writer The DatumWriter we'll use to encode the entity to the encoder. */ public static <T> void writeAvroEntity(T entity, Encoder encoder, DatumWriter<T> writer) { try { writer.write(entity, encoder); encoder.flush(); } catch (IOException e) { throw new SerializationException("Could not serialize Avro entity", e); } }
public static void checkBinaryJson(String json) throws Exception { Object node = Json.parseJson(json); ByteArrayOutputStream out = new ByteArrayOutputStream(); DatumWriter<Object> writer = new Json.ObjectWriter(); Encoder encoder = EncoderFactory.get().binaryEncoder(out, null); encoder = EncoderFactory.get().validatingEncoder(Json.SCHEMA, encoder); writer.write(node, encoder); encoder.flush(); byte[] bytes = out.toByteArray(); DatumReader<Object> reader = new Json.ObjectReader(); Decoder decoder = DecoderFactory.get().binaryDecoder(bytes, null); decoder = DecoderFactory.get().validatingDecoder(Json.SCHEMA, decoder); Object decoded = reader.read(null, decoder); assertEquals("Decoded json does not match.", Json.toString(node), Json.toString(decoded)); }
private static void checkJson(Schema schema, Object datum, String json) throws Exception { ByteArrayOutputStream out = new ByteArrayOutputStream(); Encoder encoder = EncoderFactory.get().jsonEncoder(schema, out); DatumWriter<Object> writer = new GenericDatumWriter<Object>(); writer.setSchema(schema); writer.write(datum, encoder); encoder.flush(); byte[] data = out.toByteArray(); String encoded = new String(data, "UTF-8"); assertEquals("Encoded data does not match.", json, encoded); DatumReader<Object> reader = new GenericDatumReader<Object>(); reader.setSchema(schema); Object decoded = reader.read(null, DecoderFactory.get().jsonDecoder(schema, new ByteArrayInputStream(data))); assertEquals("Decoded data does not match.", datum, decoded); }
private static void checkJson( Schema schema, Object datum, DatumWriter<Object> writer, DatumReader<Object> reader) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); Encoder encoder = EncoderFactory.get().jsonEncoder(schema, out); writer.setSchema(schema); writer.write(datum, encoder); writer.write(datum, encoder); encoder.flush(); byte[] data = out.toByteArray(); reader.setSchema(schema); Decoder decoder = DecoderFactory.get().jsonDecoder(schema, new ByteArrayInputStream(data)); Object decoded = reader.read(null, decoder); assertEquals("Decoded data does not match.", datum, decoded); decoded = reader.read(decoded, decoder); assertEquals("Decoded data does not match.", datum, decoded); }
@Test public void testEnumMismatch() throws Exception { Schema actual = Schema.parse("{\"type\":\"enum\",\"name\":\"E\",\"symbols\":[\"X\",\"Y\"]}"); Schema expected = Schema.parse("{\"type\":\"enum\",\"name\":\"E\",\"symbols\":[\"Y\",\"Z\"]}"); ByteArrayOutputStream out = new ByteArrayOutputStream(); DatumWriter<Object> writer = new GenericDatumWriter<Object>(actual); Encoder encoder = EncoderFactory.get().directBinaryEncoder(out, null); writer.write(new GenericData.EnumSymbol(actual, "Y"), encoder); writer.write(new GenericData.EnumSymbol(actual, "X"), encoder); encoder.flush(); byte[] data = out.toByteArray(); Decoder decoder = DecoderFactory.get().binaryDecoder(data, null); DatumReader<String> in = new GenericDatumReader<String>(actual, expected); assertEquals("Wrong value", new GenericData.EnumSymbol(expected, "Y"), in.read(null, decoder)); try { in.read(null, decoder); fail("Should have thrown exception."); } catch (AvroTypeException e) { // expected } }
public void serializeGeneric() throws IOException { // Create a datum to serialize. Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("/MyPair.avsc")); GenericRecord datum = new GenericData.Record(schema); datum.put("left", new Utf8("dog")); datum.put("right", new Utf8("cat")); // Serialize it. ByteArrayOutputStream out = new ByteArrayOutputStream(); DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema); Encoder encoder = EncoderFactory.get().binaryEncoder(out, null); writer.write(datum, encoder); encoder.flush(); out.close(); System.out.println("Serialization: " + out); // Deserialize it. DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema); BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(out.toByteArray(), null); GenericRecord result = reader.read(null, decoder); System.out.printf("Left: %s, Right: %s\n", result.get("left"), result.get("right")); }
/** * Gives the output message * * @param outputType output data type * @param result mapping result * @return the output as a String * @throws IOException */ public String getOutputMessage(String outputType, GenericRecord result) throws SynapseException, IOException { DatumWriter<GenericRecord> writer = null; ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); Encoder encoder = new DummyEncoder(byteArrayOutputStream); // OMElement outMessage = null; String outMessage = null; try { writer = WriterRegistry.getInstance().get(outputType).newInstance(); writer.setSchema(result.getSchema()); writer.write(result, encoder); if (log.isDebugEnabled()) { log.debug("Output received from datum writer.." + byteArrayOutputStream.toString()); } } catch (Exception e) { handleException("Data coversion Failed at JSONWriter..", e); } finally { encoder.flush(); } /*try { // Converts the result into an OMElement outMessage = getOutputResult(byteArrayOutputStream.toString()); } catch (XMLStreamException e) { handleException( "Failed at generating the OMElement for the JSON output received...", e); }*/ outMessage = byteArrayOutputStream.toString(); return outMessage; }
@Test public void experimentWithAvro() throws Exception { String root = "org/hibernate/search/remote/codex/avro/v1_1/"; parseSchema(root + "attribute/TokenTrackingAttribute.avro", "attribute/TokenTrackingAttribute"); parseSchema(root + "attribute/CharTermAttribute.avro", "attribute/CharTermAttribute"); parseSchema(root + "attribute/PayloadAttribute.avro", "attribute/PayloadAttribute"); parseSchema(root + "attribute/KeywordAttribute.avro", "attribute/KeywordAttribute"); parseSchema( root + "attribute/PositionIncrementAttribute.avro", "attribute/PositionIncrementAttribute"); parseSchema(root + "attribute/FlagsAttribute.avro", "attribute/FlagsAttribute"); parseSchema(root + "attribute/TypeAttribute.avro", "attribute/TypeAttribute"); parseSchema(root + "attribute/OffsetAttribute.avro", "attribute/OffsetAttribute"); parseSchema(root + "field/TermVector.avro", "field/TermVector"); parseSchema(root + "field/Index.avro", "field/Index"); parseSchema(root + "field/Store.avro", "field/Store"); parseSchema(root + "field/TokenStreamField.avro", "field/TokenStreamField"); parseSchema(root + "field/ReaderField.avro", "field/ReaderField"); parseSchema(root + "field/StringField.avro", "field/StringField"); parseSchema(root + "field/BinaryField.avro", "field/BinaryField"); parseSchema(root + "field/NumericIntField.avro", "field/NumericIntField"); parseSchema(root + "field/NumericLongField.avro", "field/NumericLongField"); parseSchema(root + "field/NumericFloatField.avro", "field/NumericFloatField"); parseSchema(root + "field/NumericDoubleField.avro", "field/NumericDoubleField"); parseSchema(root + "field/CustomFieldable.avro", "field/CustomFieldable"); parseSchema(root + "Document.avro", "Document"); parseSchema(root + "operation/Id.avro", "operation/Id"); parseSchema(root + "operation/OptimizeAll.avro", "operation/OptimizeAll"); parseSchema(root + "operation/PurgeAll.avro", "operation/PurgeAll"); parseSchema(root + "operation/Flush.avro", "operation/Flush"); parseSchema(root + "operation/Delete.avro", "operation/Delete"); parseSchema(root + "operation/Add.avro", "operation/Add"); parseSchema(root + "operation/Update.avro", "operation/Update"); parseSchema(root + "Message.avro", "Message"); String filename = root + "Works.avpr"; Protocol protocol = parseProtocol(filename, "Works"); final Schema termVectorSchema = protocol.getType("TermVector"); final Schema indexSchema = protocol.getType("Index"); final Schema storeSchema = protocol.getType("Store"); final Schema tokenTrackingAttribute = protocol.getType("TokenTrackingAttribute"); final Schema tokenStreamSchema = protocol.getType("TokenStreamField"); final Schema readerSchema = protocol.getType("ReaderField"); final Schema stringSchema = protocol.getType("StringField"); final Schema binarySchema = protocol.getType("BinaryField"); final Schema intFieldSchema = protocol.getType("NumericIntField"); final Schema longFieldSchema = protocol.getType("NumericLongField"); final Schema floatFieldSchema = protocol.getType("NumericFloatField"); final Schema doubleFieldSchema = protocol.getType("NumericDoubleField"); final Schema custonFieldableSchema = protocol.getType("CustomFieldable"); final Schema documentSchema = protocol.getType("Document"); final Schema idSchema = protocol.getType("Id"); final Schema optimizeAllSchema = protocol.getType("OptimizeAll"); final Schema purgeAllSchema = protocol.getType("PurgeAll"); final Schema flushSchema = protocol.getType("Flush"); final Schema deleteSchema = protocol.getType("Delete"); final Schema addSchema = protocol.getType("Add"); final Schema updateSchema = protocol.getType("Update"); Schema messageSchema = protocol.getType("Message"); final ByteArrayOutputStream out = new ByteArrayOutputStream(); GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(messageSchema); Encoder encoder = EncoderFactory.get().directBinaryEncoder(out, null); byte[] serializableSample = new byte[10]; for (int i = 0; i < 10; i++) { serializableSample[i] = (byte) i; } List<String> classReferences = new ArrayList<String>(); classReferences.add(AvroTest.class.getName()); List<GenericRecord> fieldables = new ArrayList<GenericRecord>(1); // custom fieldable GenericRecord customFieldable = new GenericData.Record(custonFieldableSchema); customFieldable.put("instance", ByteBuffer.wrap(serializableSample)); fieldables.add(customFieldable); // numeric fields GenericRecord numericField = createNumeric(intFieldSchema); numericField.put("value", 3); fieldables.add(numericField); numericField = createNumeric(longFieldSchema); numericField.put("value", 3l); fieldables.add(numericField); numericField = createNumeric(floatFieldSchema); numericField.put("value", 2.3f); fieldables.add(numericField); numericField = createNumeric(doubleFieldSchema); numericField.put("value", 2.3d); fieldables.add(numericField); // fields GenericRecord field = createField(binarySchema); field.put("offset", 0); field.put("length", 10); field.put("value", ByteBuffer.wrap(serializableSample)); fieldables.add(field); field = createField(stringSchema); field.put("value", stringSchema.getName()); field.put("store", "YES"); field.put("index", "ANALYZED"); field.put("termVector", "WITH_OFFSETS"); fieldables.add(field); field = createField(tokenStreamSchema); List<List<Object>> tokens = new ArrayList<List<Object>>(); List<Object> attrs = new ArrayList<Object>(); tokens.add(attrs); GenericData.Record attr = new GenericData.Record(tokenTrackingAttribute); List<Integer> positions = new ArrayList<Integer>(); positions.add(1); positions.add(2); positions.add(3); positions.add(4); attr.put("positions", positions); attrs.add(attr); attrs.add(ByteBuffer.wrap(serializableSample)); field.put("value", tokens); field.put("termVector", "WITH_OFFSETS"); fieldables.add(field); field = createField(readerSchema); field.put("value", ByteBuffer.wrap(serializableSample)); field.put("termVector", "WITH_OFFSETS"); fieldables.add(field); GenericRecord doc = new GenericData.Record(documentSchema); doc.put("boost", 2.3f); doc.put("fieldables", fieldables); GenericRecord add = new GenericData.Record(addSchema); add.put("class", classReferences.indexOf(AvroTest.class.getName())); GenericRecord id = new GenericData.Record(idSchema); id.put("value", ByteBuffer.wrap(serializableSample)); add.put("id", id); add.put("document", doc); Map<String, String> analyzers = new HashMap<String, String>(); analyzers.put("name", "ngram"); analyzers.put("description", "porter"); add.put("fieldToAnalyzerMap", analyzers); GenericRecord delete = new GenericData.Record(deleteSchema); delete.put("class", classReferences.indexOf(AvroTest.class.getName())); id = new GenericData.Record(idSchema); id.put("value", new Long(30)); delete.put("id", id); GenericRecord purgeAll = new GenericData.Record(purgeAllSchema); purgeAll.put("class", classReferences.indexOf(AvroTest.class.getName())); GenericRecord optimizeAll = new GenericData.Record(optimizeAllSchema); GenericRecord flush = new GenericData.Record(flushSchema); List<GenericRecord> operations = new ArrayList<GenericRecord>(1); operations.add(purgeAll); operations.add(optimizeAll); operations.add(flush); operations.add(delete); operations.add(add); GenericRecord message = new GenericData.Record(messageSchema); message.put("classReferences", classReferences); message.put("operations", operations); writer.write(message, encoder); encoder.flush(); ByteArrayInputStream inputStream = new ByteArrayInputStream(out.toByteArray()); Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(messageSchema); while (true) { try { GenericRecord result = reader.read(null, decoder); System.out.println(result); assertThat(result).isNotNull(); // operations assertThat(result.get("operations")).isNotNull().isInstanceOf(List.class); List<?> ops = (List<?>) result.get("operations"); assertThat(ops).hasSize(5); // Flush assertThat(ops.get(2)).isInstanceOf(GenericRecord.class); GenericRecord flushOp = (GenericRecord) ops.get(2); assertThat(flushOp.getSchema().getName()).isEqualTo("Flush"); // Delete assertThat(ops.get(3)).isInstanceOf(GenericRecord.class); GenericRecord deleteOp = (GenericRecord) ops.get(3); assertThat(deleteOp.getSchema().getName()).isEqualTo("Delete"); Object actual = ((GenericRecord) deleteOp.get("id")).get("value"); assertThat(actual).isInstanceOf(Long.class); assertThat(actual).isEqualTo(Long.valueOf(30)); // Add assertThat(ops.get(4)).isInstanceOf(GenericRecord.class); GenericRecord addOp = (GenericRecord) ops.get(4); assertThat(addOp.getSchema().getName()).isEqualTo("Add"); actual = ((GenericRecord) addOp.get("id")).get("value"); assertThat(actual).isInstanceOf(ByteBuffer.class); ByteBuffer bb = (ByteBuffer) actual; assertThat(bb.hasArray()).isTrue(); byte[] copy = new byte[bb.remaining()]; bb.get(copy); assertThat(serializableSample).isEqualTo(copy); // fieldToAnalyzerMap assertThat(addOp.get("fieldToAnalyzerMap")).isInstanceOf(Map.class); assertThat((Map) addOp.get("fieldToAnalyzerMap")).hasSize(2); // document assertThat(addOp.get("document")).isNotNull(); GenericRecord document = (GenericRecord) addOp.get("document"); assertThat(document.get("boost")).isEqualTo(2.3f); // numeric fields assertThat(document.get("fieldables")).isNotNull().isInstanceOf(List.class); List<?> fields = (List<?>) document.get("fieldables"); assertThat(fields).hasSize(9); // custom + 4 numerics + 4 fields field = (GenericRecord) fields.get(0); assertThat(field.getSchema().getName()).isEqualTo("CustomFieldable"); field = (GenericRecord) fields.get(1); assertThat(field.getSchema().getName()).isEqualTo("NumericIntField"); assertThat(field.get("value")).isEqualTo(3); assertNumericField(field); field = (GenericRecord) fields.get(2); assertThat(field.getSchema().getName()).isEqualTo("NumericLongField"); assertThat(field.get("value")).isEqualTo(3l); assertNumericField(field); field = (GenericRecord) fields.get(3); assertThat(field.getSchema().getName()).isEqualTo("NumericFloatField"); assertThat(field.get("value")).isEqualTo(2.3f); assertNumericField(field); field = (GenericRecord) fields.get(4); assertThat(field.getSchema().getName()).isEqualTo("NumericDoubleField"); assertThat(field.get("value")).isEqualTo(2.3d); assertNumericField(field); // fields field = (GenericRecord) fields.get(5); assertThat(field.getSchema().getName()).isEqualTo("BinaryField"); assertThat(field.get("value")).isInstanceOf(ByteBuffer.class); assertField(field); field = (GenericRecord) fields.get(6); assertThat(field.getSchema().getName()).isEqualTo("StringField"); assertThat(field.get("value")).isInstanceOf(Utf8.class); assertTermVector(field); assertIndexAndStore(field); assertField(field); field = (GenericRecord) fields.get(7); assertThat(field.getSchema().getName()).isEqualTo("TokenStreamField"); assertThat(field.get("value")).isInstanceOf(List.class); List<List<Object>> l1 = (List<List<Object>>) field.get("value"); assertThat(l1.get(0)).as("Wrong attribute impl list").hasSize(2); Object object = l1.get(0).get(0); assertThat(object).isNotNull(); assertTermVector(field); assertField(field); field = (GenericRecord) fields.get(8); assertThat(field.getSchema().getName()).isEqualTo("ReaderField"); assertThat(field.get("value")).isInstanceOf(ByteBuffer.class); assertTermVector(field); assertField(field); } catch (EOFException eof) { break; } catch (Exception ex) { ex.printStackTrace(); throw ex; } } }