private CommonRecord createCommonRecord(GenericRecord avroRecord) { GenericFixed uuidFixed = (GenericFixed) avroRecord.get(UUID); if (uuidFixed != null) { UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed); CommonRecord newRecord = commonFactory.createCommonRecord(uuid, avroRecord.getSchema()); records.put(uuid, newRecord); return newRecord; } else { return commonFactory.createCommonRecord(avroRecord.getSchema()); } }
private void processRecordField( CommonRecord record, GenericRecord deltaRecord, String fieldName) { CommonRecord nextRecord = null; CommonValue nextValue = record.getField(fieldName); if (nextValue != null && nextValue.isRecord() && nextValue .getRecord() .getSchema() .getFullName() .equals(deltaRecord.getSchema().getFullName())) { nextRecord = nextValue.getRecord(); GenericFixed uuidFixed = (GenericFixed) deltaRecord.get(UUID); if (uuidFixed != null) { UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed); // Checking if the uuid was changed if (!uuid.equals(nextRecord.getUuid())) { records.remove(nextRecord.getUuid()); records.put(uuid, nextRecord); nextRecord.setUuid(uuid); } } } else { nextRecord = createCommonRecord(deltaRecord); record.setField(fieldName, commonFactory.createCommonValue(nextRecord)); } updateRecord(nextRecord, deltaRecord); }
private void assertField(GenericRecord field) { assertThat(field.get("name")).isInstanceOf(Utf8.class); assertThat(field.get("name").toString()).isEqualTo(field.getSchema().getName()); assertThat(field.get("boost")).isEqualTo(2.3f); assertThat(field.get("omitNorms")).isEqualTo(true); assertThat(field.get("omitTermFreqAndPositions")).isEqualTo(true); }
private void buildAttribute(Object element, LuceneWorksBuilder hydrator) { if (element instanceof GenericRecord) { GenericRecord record = (GenericRecord) element; String name = record.getSchema().getName(); if ("TokenTrackingAttribute".equals(name)) { @SuppressWarnings("unchecked") List<Integer> positionList = (List<Integer>) record.get("positions"); hydrator.addTokenTrackingAttribute(positionList); } else if ("CharTermAttribute".equals(name)) { hydrator.addCharTermAttribute((CharSequence) record.get("sequence")); } else if ("PayloadAttribute".equals(name)) { hydrator.addPayloadAttribute(asByteArray(record, "payload")); } else if ("KeywordAttribute".equals(name)) { hydrator.addKeywordAttribute(asBoolean(record, "isKeyword")); } else if ("PositionIncrementAttribute".equals(name)) { hydrator.addPositionIncrementAttribute(asInt(record, "positionIncrement")); } else if ("FlagsAttribute".equals(name)) { hydrator.addFlagsAttribute(asInt(record, "flags")); } else if ("TypeAttribute".equals(name)) { hydrator.addTypeAttribute(asString(record, "type")); } else if ("OffsetAttribute".equals(name)) { hydrator.addOffsetAttribute(asInt(record, "startOffset"), asInt(record, "endOffset")); } else { throw log.unknownAttributeSerializedRepresentation(name); } } else if (element instanceof ByteBuffer) { hydrator.addSerializedAttribute(asByteArray((ByteBuffer) element)); } else { throw log.unknownAttributeSerializedRepresentation(element.getClass().getName()); } }
@Override public void deserialize(byte[] data, LuceneWorksBuilder hydrator) { final ByteArrayInputStream inputStream = new ByteArrayInputStream(data); final int majorVersion = inputStream.read(); final int minorVersion = inputStream.read(); final Protocol protocol = protocols.getProtocol(majorVersion, minorVersion); Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(protocol.getType("Message")); GenericRecord result; try { result = reader.read(null, decoder); } catch (IOException e) { throw log.unableToDeserializeAvroStream(e); } classReferences = asListOfString(result, "classReferences"); final List<GenericRecord> operations = asListOfGenericRecords(result, "operations"); final ConversionContext conversionContext = new ContextualExceptionBridgeHelper(); for (GenericRecord operation : operations) { String schema = operation.getSchema().getName(); if ("OptimizeAll".equals(schema)) { hydrator.addOptimizeAll(); } else if ("PurgeAll".equals(schema)) { hydrator.addPurgeAllLuceneWork(asClass(operation, "class")); } else if ("Flush".equals(schema)) { hydrator.addFlush(); } else if ("Delete".equals(schema)) { processId(operation, hydrator); hydrator.addDeleteLuceneWork(asClass(operation, "class"), conversionContext); } else if ("DeleteByQuery".equals(schema)) { String entityClassName = asClass(operation, "class"); int queryKey = asInt(operation, "key"); DeleteByQuerySupport.StringToQueryMapper mapper = DeleteByQuerySupport.getStringToQueryMapper(queryKey); List<Utf8> stringList = asListOfString(operation, "query"); String[] query = new String[stringList.size()]; for (int i = 0; i < stringList.size(); ++i) { query[i] = stringList.get(i).toString(); } hydrator.addDeleteByQueryLuceneWork(entityClassName, mapper.fromString(query)); } else if ("Add".equals(schema)) { buildLuceneDocument(asGenericRecord(operation, "document"), hydrator); Map<String, String> analyzers = getAnalyzers(operation); processId(operation, hydrator); hydrator.addAddLuceneWork(asClass(operation, "class"), analyzers, conversionContext); } else if ("Update".equals(schema)) { buildLuceneDocument(asGenericRecord(operation, "document"), hydrator); Map<String, String> analyzers = getAnalyzers(operation); processId(operation, hydrator); hydrator.addUpdateLuceneWork(asClass(operation, "class"), analyzers, conversionContext); } else { throw log.cannotDeserializeOperation(schema); } } }
private Value decodeValue(final Object generic) { if (generic instanceof GenericRecord) { final GenericRecord record = (GenericRecord) generic; final Schema schema = record.getSchema(); if (schema.equals(Schemas.COMPRESSED_IDENTIFIER) || schema.equals(Schemas.PLAIN_IDENTIFIER)) { return decodeIdentifier(record); } } return decodeLiteral(generic); }
private List<Map.Entry> previewAvroBatch(FileStatus fileStatus, int batchSize) throws IOException, InterruptedException { SeekableInput input = new FsInput(fileStatus.getPath(), hadoopConf); DatumReader<GenericRecord> reader = new GenericDatumReader<>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader); List<Map.Entry> batch = new ArrayList<>(); int count = 0; while (fileReader.hasNext() && batch.size() < batchSize) { GenericRecord datum = fileReader.next(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>( new GenericDatumWriter<GenericRecord>(datum.getSchema())); dataFileWriter.create(datum.getSchema(), out); dataFileWriter.append(datum); dataFileWriter.close(); out.close(); batch.add(new Pair(fileStatus.getPath().toUri().getPath() + "::" + count, out.toByteArray())); count++; } return batch; }
private Object decodeNode(final Object generic) { if (generic instanceof GenericRecord) { final GenericRecord record = (GenericRecord) generic; final Schema schema = record.getSchema(); if (schema.equals(Schemas.RECORD)) { return decodeRecord(record, null); } else if (schema.equals(Schemas.PLAIN_IDENTIFIER) || schema.equals(Schemas.COMPRESSED_IDENTIFIER)) { return decodeIdentifier(record); } else if (schema.equals(Schemas.STATEMENT)) { return decodeStatement(record); } } return decodeLiteral(generic); }
public void testWrite() throws IOException { URL url = this.getClass().getClassLoader().getResource("input/Company.avsc"); assertNotNull(url); Schema schema = new Schema.Parser().parse(new File(url.getFile())); assertNotNull(schema); DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); // Another way of loading a file File file = new File("src/test/resources/input/companies.avro"); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, datumReader); File fileOut = new File("target/companies2.avro"); Schema schemaOut = new Schema.Parser().parse(new File("src/test/resources/input/Company2.avsc")); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schemaOut); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); GenericRecord company = null; int count = 0; while (dataFileReader.hasNext()) { company = dataFileReader.next(company); if (company.get("name").toString().equals("aol")) { dataFileWriter.create(schemaOut, fileOut); GenericRecord recordOut = new GenericData.Record(schemaOut); recordOut.put("id", company.get("id")); recordOut.put("name", company.get("name")); assertTrue(recordOut.getSchema().getField("address") != null); assertTrue(recordOut.getSchema().getField("employeeCount") == null); // address is of complex type GenericRecord address = new GenericData.Record((GenericData.Record) company.get("address"), true); recordOut.put("address", address); dataFileWriter.append(recordOut); count++; } } assertTrue(count > 0); dataFileWriter.close(); }
private Resource decodeIdentifier(final GenericRecord record) { final Schema schema = record.getSchema(); if (schema.equals(Schemas.COMPRESSED_IDENTIFIER)) { try { return this.dictionary.objectFor((Integer) record.get(0)); } catch (final IOException ex) { throw new IllegalStateException("Cannot access dictionary: " + ex.getMessage(), ex); } } else if (schema.equals(Schemas.PLAIN_IDENTIFIER)) { final String string = record.get(0).toString(); if (string.startsWith("_:")) { return this.factory.createBNode(string.substring(2)); } else { return this.factory.createURI(string); } } throw new IllegalArgumentException("Unsupported encoded identifier: " + record); }
private Literal decodeLiteral(final Object generic) { if (generic instanceof GenericRecord) { final GenericRecord record = (GenericRecord) generic; final Schema schema = record.getSchema(); if (schema.equals(Schemas.STRING_LANG)) { final String label = record.get(0).toString(); // Utf8 class used final Object language = record.get(1); return this.factory.createLiteral(label, language.toString()); } else if (schema.equals(Schemas.SHORT)) { return this.factory.createLiteral(((Integer) record.get(0)).shortValue()); } else if (schema.equals(Schemas.BYTE)) { return this.factory.createLiteral(((Integer) record.get(0)).byteValue()); } else if (schema.equals(Schemas.BIGINTEGER)) { return this.factory.createLiteral(record.get(0).toString(), XMLSchema.INTEGER); } else if (schema.equals(Schemas.BIGDECIMAL)) { return this.factory.createLiteral(record.get(0).toString(), XMLSchema.DECIMAL); } else if (schema.equals(Schemas.CALENDAR)) { final int tz = (Integer) record.get(0); final GregorianCalendar calendar = new GregorianCalendar(); calendar.setTimeInMillis((Long) record.get(1)); calendar.setTimeZone( TimeZone.getTimeZone( String.format( "GMT%s%02d:%02d", tz >= 0 ? "+" : "-", Math.abs(tz) / 60, Math.abs(tz) % 60))); return this.factory.createLiteral(this.datatypeFactory.newXMLGregorianCalendar(calendar)); } } else if (generic instanceof CharSequence) { return this.factory.createLiteral(generic.toString()); // Utf8 class used } else if (generic instanceof Boolean) { return this.factory.createLiteral((Boolean) generic); } else if (generic instanceof Long) { return this.factory.createLiteral((Long) generic); } else if (generic instanceof Integer) { return this.factory.createLiteral((Integer) generic); } else if (generic instanceof Double) { return this.factory.createLiteral((Double) generic); } else if (generic instanceof Float) { return this.factory.createLiteral((Float) generic); } Preconditions.checkNotNull(generic); throw new IllegalArgumentException("Unsupported generic data: " + generic); }
/** * Gives the output message * * @param outputType output data type * @param result mapping result * @return the output as a String * @throws IOException */ public String getOutputMessage(String outputType, GenericRecord result) throws SynapseException, IOException { DatumWriter<GenericRecord> writer = null; ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); Encoder encoder = new DummyEncoder(byteArrayOutputStream); // OMElement outMessage = null; String outMessage = null; try { writer = WriterRegistry.getInstance().get(outputType).newInstance(); writer.setSchema(result.getSchema()); writer.write(result, encoder); if (log.isDebugEnabled()) { log.debug("Output received from datum writer.." + byteArrayOutputStream.toString()); } } catch (Exception e) { handleException("Data coversion Failed at JSONWriter..", e); } finally { encoder.flush(); } /*try { // Converts the result into an OMElement outMessage = getOutputResult(byteArrayOutputStream.toString()); } catch (XMLStreamException e) { handleException( "Failed at generating the OMElement for the JSON output received...", e); }*/ outMessage = byteArrayOutputStream.toString(); return outMessage; }
/** * Describe <code>reduce</code> method here. * * @param object an <code>Object</code> value * @param iterable an <code>Iterable</code> value * @param context a <code>Reducer.Context</code> value * @exception IOException if an error occurs * @exception InterruptedException if an error occurs */ @Override public final void reduce(Text key, Iterable<AvroValue<GenericRecord>> values, Context context) throws IOException, InterruptedException { GenericRecord output = new GenericData.Record(outputSchema); for (AvroValue<GenericRecord> value : values) { GenericRecord datum = value.datum(); for (Schema.Field field : datum.getSchema().getFields()) { String fieldName = field.name(); Object fieldValue = datum.get(fieldName); if (fieldValue != null) { output.put(fieldName, fieldValue); } } } CharSequence psam = (CharSequence) output.get(PSAM); CharSequence longitude = (CharSequence) output.get(LONGITUDE); CharSequence latitude = (CharSequence) output.get(LATITUDE); if (psam != null && longitude != null && latitude != null) { context.write(new AvroKey<GenericRecord>(output), NullWritable.get()); } }
private void updateRecord(CommonRecord record, GenericRecord delta) { List<Field> deltaFields = delta.getSchema().getFields(); for (Field deltaField : deltaFields) { String fieldName = deltaField.name(); Object rawDeltaField = delta.get(fieldName); if (LOG.isDebugEnabled()) { LOG.debug( "Processing field \"{}\", current value: {}", fieldName, record.getField(fieldName) != null ? record.getField(fieldName).toString() : null); } if (AvroGenericUtils.isRecord(rawDeltaField)) { processRecordField(record, (GenericRecord) rawDeltaField, fieldName); } else if (AvroGenericUtils.isArray(rawDeltaField)) { processArrayField(record, (GenericArray) rawDeltaField, fieldName); } else if (AvroGenericUtils.isEnum(rawDeltaField)) { processEnumField(record, (GenericEnumSymbol) rawDeltaField, fieldName); } else if (AvroGenericUtils.isFixed(rawDeltaField)) { processFixedField(record, (GenericFixed) rawDeltaField, fieldName); } else { record.setField(fieldName, commonFactory.createCommonValue(rawDeltaField)); } } }
/** * @param dbUpdates The dbUpdates present in the current transaction * @param ti The meta information about the transaction. (See TransactionInfo class for more * details). * @throws DatabusException * @throws UnsupportedKeyException */ protected void addEventToBuffer( List<TransactionState.PerSourceTransactionalUpdate> dbUpdates, TransactionInfo ti) throws DatabusException, UnsupportedKeyException { if (dbUpdates.size() == 0) throw new DatabusException("Cannot handle empty dbUpdates"); long scn = ti.getScn(); long timestamp = ti.getTransactionTimeStampNs(); EventSourceStatistics globalStats = getSource(GLOBAL_SOURCE_ID).getStatisticsBean(); /** * We skip the start scn of the relay, we have already added a EOP for this SCN in the buffer. * Why is this not a problem ? There are two cases: 1. When we use the earliest/latest scn if * there is no maxScn (We don't really have a start point). So it's really OK to miss the first * event. 2. If it's the maxSCN, then event was already seen by the relay. */ if (scn == _startPrevScn.get()) { _log.info("Skipping this transaction, EOP already send for this event"); return; } getEventBuffer().startEvents(); int eventsInTransactionCount = 0; List<EventReaderSummary> summaries = new ArrayList<EventReaderSummary>(); for (int i = 0; i < dbUpdates.size(); ++i) { GenericRecord record = null; TransactionState.PerSourceTransactionalUpdate perSourceUpdate = dbUpdates.get(i); short sourceId = (short) perSourceUpdate.getSourceId(); // prepare stats collection per source EventSourceStatistics perSourceStats = getSource(sourceId).getStatisticsBean(); Iterator<DbUpdateState.DBUpdateImage> dbUpdateIterator = perSourceUpdate.getDbUpdatesSet().iterator(); int eventsInDbUpdate = 0; long dbUpdatesEventsSize = 0; long startDbUpdatesMs = System.currentTimeMillis(); while (dbUpdateIterator .hasNext()) // TODO verify if there is any case where we need to rollback. { DbUpdateState.DBUpdateImage dbUpdate = dbUpdateIterator.next(); // Construct the Databus Event key, determine the key type and construct the key Object keyObj = obtainKey(dbUpdate); DbusEventKey eventKey = new DbusEventKey(keyObj); // Get the logicalparition id PartitionFunction partitionFunction = _partitionFunctionHashMap.get((int) sourceId); short lPartitionId = partitionFunction.getPartition(eventKey); record = dbUpdate.getGenericRecord(); // Write the event to the buffer if (record == null) throw new DatabusException("Cannot write event to buffer because record = " + record); if (record.getSchema() == null) throw new DatabusException("The record does not have a schema (null schema)"); try { // Collect stats on number of dbUpdates for one source eventsInDbUpdate++; // Count of all the events in the current transaction eventsInTransactionCount++; // Serialize the row ByteArrayOutputStream bos = new ByteArrayOutputStream(); Encoder encoder = new BinaryEncoder(bos); GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema()); writer.write(record, encoder); byte[] serializedValue = bos.toByteArray(); // Get the md5 for the schema SchemaId schemaId = SchemaId.createWithMd5(dbUpdate.getSchema()); // Determine the operation type and convert to dbus opcode DbusOpcode opCode; if (dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.INSERT || dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.UPDATE) { opCode = DbusOpcode.UPSERT; if (_log.isDebugEnabled()) _log.debug("The event with scn " + scn + " is INSERT/UPDATE"); } else if (dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.DELETE) { opCode = DbusOpcode.DELETE; if (_log.isDebugEnabled()) _log.debug("The event with scn " + scn + " is DELETE"); } else { throw new DatabusException("Unknown opcode from dbUpdate for event with scn:" + scn); } // Construct the dbusEvent info DbusEventInfo dbusEventInfo = new DbusEventInfo( opCode, scn, (short) _pConfig.getId(), lPartitionId, timestamp, sourceId, schemaId.getByteArray(), serializedValue, false, false); dbusEventInfo.setReplicated(dbUpdate.isReplicated()); perSourceStats.addEventCycle(1, ti.getTransactionTimeRead(), serializedValue.length, scn); globalStats.addEventCycle(1, ti.getTransactionTimeRead(), serializedValue.length, scn); long tsEnd = System.currentTimeMillis(); perSourceStats.addTimeOfLastDBAccess(tsEnd); globalStats.addTimeOfLastDBAccess(tsEnd); // Append to the event buffer getEventBuffer().appendEvent(eventKey, dbusEventInfo, _statsCollector); _rc.incrementEventCount(); dbUpdatesEventsSize += serializedValue.length; } catch (IOException io) { perSourceStats.addError(); globalStats.addEmptyEventCycle(); _log.error("Cannot create byte stream payload: " + dbUpdates.get(i).getSourceId()); } } long endDbUpdatesMs = System.currentTimeMillis(); long dbUpdatesElapsedTimeMs = endDbUpdatesMs - startDbUpdatesMs; // Log Event Summary at logical source level EventReaderSummary summary = new EventReaderSummary( sourceId, _monitoredSources.get(sourceId).getSourceName(), scn, eventsInDbUpdate, dbUpdatesEventsSize, -1L /* Not supported */, dbUpdatesElapsedTimeMs, timestamp, timestamp, -1L /* Not supported */); if (_eventsLog.isInfoEnabled()) { _eventsLog.info(summary.toString()); } summaries.add(summary); if (_log.isDebugEnabled()) _log.debug("There are " + eventsInDbUpdate + " events seen in the current dbUpdate"); } // update stats _ggParserStats.addTransactionInfo(ti, eventsInTransactionCount); // Log Event Summary at Physical source level ReadEventCycleSummary summary = new ReadEventCycleSummary( _pConfig.getName(), summaries, scn, -1 /* Overall time including query time not calculated */); if (_eventsLog.isInfoEnabled()) { _eventsLog.info(summary.toString()); } _log.info("Writing " + eventsInTransactionCount + " events from transaction with scn: " + scn); if (scn <= 0) throw new DatabusException( "Unable to write events to buffer because of negative/zero scn: " + scn); getEventBuffer().endEvents(scn, _statsCollector); _scn.set(scn); if (getMaxScnReaderWriter() != null) { try { getMaxScnReaderWriter().saveMaxScn(_scn.get()); } catch (DatabusException e) { _log.error("Cannot save scn = " + _scn + " for physical source = " + getName(), e); } } }
@Test public void experimentWithAvro() throws Exception { String root = "org/hibernate/search/remote/codex/avro/v1_1/"; parseSchema(root + "attribute/TokenTrackingAttribute.avro", "attribute/TokenTrackingAttribute"); parseSchema(root + "attribute/CharTermAttribute.avro", "attribute/CharTermAttribute"); parseSchema(root + "attribute/PayloadAttribute.avro", "attribute/PayloadAttribute"); parseSchema(root + "attribute/KeywordAttribute.avro", "attribute/KeywordAttribute"); parseSchema( root + "attribute/PositionIncrementAttribute.avro", "attribute/PositionIncrementAttribute"); parseSchema(root + "attribute/FlagsAttribute.avro", "attribute/FlagsAttribute"); parseSchema(root + "attribute/TypeAttribute.avro", "attribute/TypeAttribute"); parseSchema(root + "attribute/OffsetAttribute.avro", "attribute/OffsetAttribute"); parseSchema(root + "field/TermVector.avro", "field/TermVector"); parseSchema(root + "field/Index.avro", "field/Index"); parseSchema(root + "field/Store.avro", "field/Store"); parseSchema(root + "field/TokenStreamField.avro", "field/TokenStreamField"); parseSchema(root + "field/ReaderField.avro", "field/ReaderField"); parseSchema(root + "field/StringField.avro", "field/StringField"); parseSchema(root + "field/BinaryField.avro", "field/BinaryField"); parseSchema(root + "field/NumericIntField.avro", "field/NumericIntField"); parseSchema(root + "field/NumericLongField.avro", "field/NumericLongField"); parseSchema(root + "field/NumericFloatField.avro", "field/NumericFloatField"); parseSchema(root + "field/NumericDoubleField.avro", "field/NumericDoubleField"); parseSchema(root + "field/CustomFieldable.avro", "field/CustomFieldable"); parseSchema(root + "Document.avro", "Document"); parseSchema(root + "operation/Id.avro", "operation/Id"); parseSchema(root + "operation/OptimizeAll.avro", "operation/OptimizeAll"); parseSchema(root + "operation/PurgeAll.avro", "operation/PurgeAll"); parseSchema(root + "operation/Flush.avro", "operation/Flush"); parseSchema(root + "operation/Delete.avro", "operation/Delete"); parseSchema(root + "operation/Add.avro", "operation/Add"); parseSchema(root + "operation/Update.avro", "operation/Update"); parseSchema(root + "Message.avro", "Message"); String filename = root + "Works.avpr"; Protocol protocol = parseProtocol(filename, "Works"); final Schema termVectorSchema = protocol.getType("TermVector"); final Schema indexSchema = protocol.getType("Index"); final Schema storeSchema = protocol.getType("Store"); final Schema tokenTrackingAttribute = protocol.getType("TokenTrackingAttribute"); final Schema tokenStreamSchema = protocol.getType("TokenStreamField"); final Schema readerSchema = protocol.getType("ReaderField"); final Schema stringSchema = protocol.getType("StringField"); final Schema binarySchema = protocol.getType("BinaryField"); final Schema intFieldSchema = protocol.getType("NumericIntField"); final Schema longFieldSchema = protocol.getType("NumericLongField"); final Schema floatFieldSchema = protocol.getType("NumericFloatField"); final Schema doubleFieldSchema = protocol.getType("NumericDoubleField"); final Schema custonFieldableSchema = protocol.getType("CustomFieldable"); final Schema documentSchema = protocol.getType("Document"); final Schema idSchema = protocol.getType("Id"); final Schema optimizeAllSchema = protocol.getType("OptimizeAll"); final Schema purgeAllSchema = protocol.getType("PurgeAll"); final Schema flushSchema = protocol.getType("Flush"); final Schema deleteSchema = protocol.getType("Delete"); final Schema addSchema = protocol.getType("Add"); final Schema updateSchema = protocol.getType("Update"); Schema messageSchema = protocol.getType("Message"); final ByteArrayOutputStream out = new ByteArrayOutputStream(); GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(messageSchema); Encoder encoder = EncoderFactory.get().directBinaryEncoder(out, null); byte[] serializableSample = new byte[10]; for (int i = 0; i < 10; i++) { serializableSample[i] = (byte) i; } List<String> classReferences = new ArrayList<String>(); classReferences.add(AvroTest.class.getName()); List<GenericRecord> fieldables = new ArrayList<GenericRecord>(1); // custom fieldable GenericRecord customFieldable = new GenericData.Record(custonFieldableSchema); customFieldable.put("instance", ByteBuffer.wrap(serializableSample)); fieldables.add(customFieldable); // numeric fields GenericRecord numericField = createNumeric(intFieldSchema); numericField.put("value", 3); fieldables.add(numericField); numericField = createNumeric(longFieldSchema); numericField.put("value", 3l); fieldables.add(numericField); numericField = createNumeric(floatFieldSchema); numericField.put("value", 2.3f); fieldables.add(numericField); numericField = createNumeric(doubleFieldSchema); numericField.put("value", 2.3d); fieldables.add(numericField); // fields GenericRecord field = createField(binarySchema); field.put("offset", 0); field.put("length", 10); field.put("value", ByteBuffer.wrap(serializableSample)); fieldables.add(field); field = createField(stringSchema); field.put("value", stringSchema.getName()); field.put("store", "YES"); field.put("index", "ANALYZED"); field.put("termVector", "WITH_OFFSETS"); fieldables.add(field); field = createField(tokenStreamSchema); List<List<Object>> tokens = new ArrayList<List<Object>>(); List<Object> attrs = new ArrayList<Object>(); tokens.add(attrs); GenericData.Record attr = new GenericData.Record(tokenTrackingAttribute); List<Integer> positions = new ArrayList<Integer>(); positions.add(1); positions.add(2); positions.add(3); positions.add(4); attr.put("positions", positions); attrs.add(attr); attrs.add(ByteBuffer.wrap(serializableSample)); field.put("value", tokens); field.put("termVector", "WITH_OFFSETS"); fieldables.add(field); field = createField(readerSchema); field.put("value", ByteBuffer.wrap(serializableSample)); field.put("termVector", "WITH_OFFSETS"); fieldables.add(field); GenericRecord doc = new GenericData.Record(documentSchema); doc.put("boost", 2.3f); doc.put("fieldables", fieldables); GenericRecord add = new GenericData.Record(addSchema); add.put("class", classReferences.indexOf(AvroTest.class.getName())); GenericRecord id = new GenericData.Record(idSchema); id.put("value", ByteBuffer.wrap(serializableSample)); add.put("id", id); add.put("document", doc); Map<String, String> analyzers = new HashMap<String, String>(); analyzers.put("name", "ngram"); analyzers.put("description", "porter"); add.put("fieldToAnalyzerMap", analyzers); GenericRecord delete = new GenericData.Record(deleteSchema); delete.put("class", classReferences.indexOf(AvroTest.class.getName())); id = new GenericData.Record(idSchema); id.put("value", new Long(30)); delete.put("id", id); GenericRecord purgeAll = new GenericData.Record(purgeAllSchema); purgeAll.put("class", classReferences.indexOf(AvroTest.class.getName())); GenericRecord optimizeAll = new GenericData.Record(optimizeAllSchema); GenericRecord flush = new GenericData.Record(flushSchema); List<GenericRecord> operations = new ArrayList<GenericRecord>(1); operations.add(purgeAll); operations.add(optimizeAll); operations.add(flush); operations.add(delete); operations.add(add); GenericRecord message = new GenericData.Record(messageSchema); message.put("classReferences", classReferences); message.put("operations", operations); writer.write(message, encoder); encoder.flush(); ByteArrayInputStream inputStream = new ByteArrayInputStream(out.toByteArray()); Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(messageSchema); while (true) { try { GenericRecord result = reader.read(null, decoder); System.out.println(result); assertThat(result).isNotNull(); // operations assertThat(result.get("operations")).isNotNull().isInstanceOf(List.class); List<?> ops = (List<?>) result.get("operations"); assertThat(ops).hasSize(5); // Flush assertThat(ops.get(2)).isInstanceOf(GenericRecord.class); GenericRecord flushOp = (GenericRecord) ops.get(2); assertThat(flushOp.getSchema().getName()).isEqualTo("Flush"); // Delete assertThat(ops.get(3)).isInstanceOf(GenericRecord.class); GenericRecord deleteOp = (GenericRecord) ops.get(3); assertThat(deleteOp.getSchema().getName()).isEqualTo("Delete"); Object actual = ((GenericRecord) deleteOp.get("id")).get("value"); assertThat(actual).isInstanceOf(Long.class); assertThat(actual).isEqualTo(Long.valueOf(30)); // Add assertThat(ops.get(4)).isInstanceOf(GenericRecord.class); GenericRecord addOp = (GenericRecord) ops.get(4); assertThat(addOp.getSchema().getName()).isEqualTo("Add"); actual = ((GenericRecord) addOp.get("id")).get("value"); assertThat(actual).isInstanceOf(ByteBuffer.class); ByteBuffer bb = (ByteBuffer) actual; assertThat(bb.hasArray()).isTrue(); byte[] copy = new byte[bb.remaining()]; bb.get(copy); assertThat(serializableSample).isEqualTo(copy); // fieldToAnalyzerMap assertThat(addOp.get("fieldToAnalyzerMap")).isInstanceOf(Map.class); assertThat((Map) addOp.get("fieldToAnalyzerMap")).hasSize(2); // document assertThat(addOp.get("document")).isNotNull(); GenericRecord document = (GenericRecord) addOp.get("document"); assertThat(document.get("boost")).isEqualTo(2.3f); // numeric fields assertThat(document.get("fieldables")).isNotNull().isInstanceOf(List.class); List<?> fields = (List<?>) document.get("fieldables"); assertThat(fields).hasSize(9); // custom + 4 numerics + 4 fields field = (GenericRecord) fields.get(0); assertThat(field.getSchema().getName()).isEqualTo("CustomFieldable"); field = (GenericRecord) fields.get(1); assertThat(field.getSchema().getName()).isEqualTo("NumericIntField"); assertThat(field.get("value")).isEqualTo(3); assertNumericField(field); field = (GenericRecord) fields.get(2); assertThat(field.getSchema().getName()).isEqualTo("NumericLongField"); assertThat(field.get("value")).isEqualTo(3l); assertNumericField(field); field = (GenericRecord) fields.get(3); assertThat(field.getSchema().getName()).isEqualTo("NumericFloatField"); assertThat(field.get("value")).isEqualTo(2.3f); assertNumericField(field); field = (GenericRecord) fields.get(4); assertThat(field.getSchema().getName()).isEqualTo("NumericDoubleField"); assertThat(field.get("value")).isEqualTo(2.3d); assertNumericField(field); // fields field = (GenericRecord) fields.get(5); assertThat(field.getSchema().getName()).isEqualTo("BinaryField"); assertThat(field.get("value")).isInstanceOf(ByteBuffer.class); assertField(field); field = (GenericRecord) fields.get(6); assertThat(field.getSchema().getName()).isEqualTo("StringField"); assertThat(field.get("value")).isInstanceOf(Utf8.class); assertTermVector(field); assertIndexAndStore(field); assertField(field); field = (GenericRecord) fields.get(7); assertThat(field.getSchema().getName()).isEqualTo("TokenStreamField"); assertThat(field.get("value")).isInstanceOf(List.class); List<List<Object>> l1 = (List<List<Object>>) field.get("value"); assertThat(l1.get(0)).as("Wrong attribute impl list").hasSize(2); Object object = l1.get(0).get(0); assertThat(object).isNotNull(); assertTermVector(field); assertField(field); field = (GenericRecord) fields.get(8); assertThat(field.getSchema().getName()).isEqualTo("ReaderField"); assertThat(field.get("value")).isInstanceOf(ByteBuffer.class); assertTermVector(field); assertField(field); } catch (EOFException eof) { break; } catch (Exception ex) { ex.printStackTrace(); throw ex; } } }
private void buildLuceneDocument(GenericRecord document, LuceneWorksBuilder hydrator) { hydrator.defineDocument(); List<GenericRecord> fieldables = asListOfGenericRecords(document, "fieldables"); for (GenericRecord field : fieldables) { String schema = field.getSchema().getName(); if ("CustomFieldable".equals(schema)) { hydrator.addFieldable(asByteArray(field, "instance")); } else if ("NumericIntField".equals(schema)) { hydrator.addIntNumericField( asInt(field, "value"), asString(field, "name"), asInt(field, "precisionStep"), asStore(field), asBoolean(field, "indexed"), asFloat(field, "boost"), asBoolean(field, "omitNorms"), asBoolean(field, "omitTermFreqAndPositions")); } else if ("NumericFloatField".equals(schema)) { hydrator.addFloatNumericField( asFloat(field, "value"), asString(field, "name"), asInt(field, "precisionStep"), asStore(field), asBoolean(field, "indexed"), asFloat(field, "boost"), asBoolean(field, "omitNorms"), asBoolean(field, "omitTermFreqAndPositions")); } else if ("NumericLongField".equals(schema)) { hydrator.addLongNumericField( asLong(field, "value"), asString(field, "name"), asInt(field, "precisionStep"), asStore(field), asBoolean(field, "indexed"), asFloat(field, "boost"), asBoolean(field, "omitNorms"), asBoolean(field, "omitTermFreqAndPositions")); } else if ("NumericDoubleField".equals(schema)) { hydrator.addDoubleNumericField( asDouble(field, "value"), asString(field, "name"), asInt(field, "precisionStep"), asStore(field), asBoolean(field, "indexed"), asFloat(field, "boost"), asBoolean(field, "omitNorms"), asBoolean(field, "omitTermFreqAndPositions")); } else if ("BinaryField".equals(schema)) { hydrator.addFieldWithBinaryData( asString(field, "name"), asByteArray(field, "value"), asInt(field, "offset"), asInt(field, "length")); } else if ("StringField".equals(schema)) { hydrator.addFieldWithStringData( asString(field, "name"), asString(field, "value"), asStore(field), asIndex(field), asTermVector(field), asFloat(field, "boost"), asBoolean(field, "omitNorms"), asBoolean(field, "omitTermFreqAndPositions")); } else if ("TokenStreamField".equals(schema)) { buildAttributes(field, "value", hydrator); hydrator.addFieldWithTokenStreamData( asString(field, "name"), asTermVector(field), asFloat(field, "boost"), asBoolean(field, "omitNorms"), asBoolean(field, "omitTermFreqAndPositions")); } else if ("ReaderField".equals(schema)) { hydrator.addFieldWithSerializableReaderData( asString(field, "name"), asByteArray(field, "value"), asTermVector(field), asFloat(field, "boost"), asBoolean(field, "omitNorms"), asBoolean(field, "omitTermFreqAndPositions")); } else if ("BinaryDocValuesField".equals(schema)) { hydrator.addDocValuesFieldWithBinaryData( asString(field, "name"), asString(field, "type"), asByteArray(field, "value"), asInt(field, "offset"), asInt(field, "length")); } else if ("NumericDocValuesField".equals(schema)) { hydrator.addDocValuesFieldWithNumericData( asString(field, "name"), asString(field, "type"), asLong(field, "value")); } else { throw log.cannotDeserializeField(schema); } } }