private CommonRecord createCommonRecord(GenericRecord avroRecord) {
   GenericFixed uuidFixed = (GenericFixed) avroRecord.get(UUID);
   if (uuidFixed != null) {
     UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed);
     CommonRecord newRecord = commonFactory.createCommonRecord(uuid, avroRecord.getSchema());
     records.put(uuid, newRecord);
     return newRecord;
   } else {
     return commonFactory.createCommonRecord(avroRecord.getSchema());
   }
 }
 private void processRecordField(
     CommonRecord record, GenericRecord deltaRecord, String fieldName) {
   CommonRecord nextRecord = null;
   CommonValue nextValue = record.getField(fieldName);
   if (nextValue != null
       && nextValue.isRecord()
       && nextValue
           .getRecord()
           .getSchema()
           .getFullName()
           .equals(deltaRecord.getSchema().getFullName())) {
     nextRecord = nextValue.getRecord();
     GenericFixed uuidFixed = (GenericFixed) deltaRecord.get(UUID);
     if (uuidFixed != null) {
       UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed);
       // Checking if the uuid was changed
       if (!uuid.equals(nextRecord.getUuid())) {
         records.remove(nextRecord.getUuid());
         records.put(uuid, nextRecord);
         nextRecord.setUuid(uuid);
       }
     }
   } else {
     nextRecord = createCommonRecord(deltaRecord);
     record.setField(fieldName, commonFactory.createCommonValue(nextRecord));
   }
   updateRecord(nextRecord, deltaRecord);
 }
Exemplo n.º 3
0
 private void assertField(GenericRecord field) {
   assertThat(field.get("name")).isInstanceOf(Utf8.class);
   assertThat(field.get("name").toString()).isEqualTo(field.getSchema().getName());
   assertThat(field.get("boost")).isEqualTo(2.3f);
   assertThat(field.get("omitNorms")).isEqualTo(true);
   assertThat(field.get("omitTermFreqAndPositions")).isEqualTo(true);
 }
Exemplo n.º 4
0
 private void buildAttribute(Object element, LuceneWorksBuilder hydrator) {
   if (element instanceof GenericRecord) {
     GenericRecord record = (GenericRecord) element;
     String name = record.getSchema().getName();
     if ("TokenTrackingAttribute".equals(name)) {
       @SuppressWarnings("unchecked")
       List<Integer> positionList = (List<Integer>) record.get("positions");
       hydrator.addTokenTrackingAttribute(positionList);
     } else if ("CharTermAttribute".equals(name)) {
       hydrator.addCharTermAttribute((CharSequence) record.get("sequence"));
     } else if ("PayloadAttribute".equals(name)) {
       hydrator.addPayloadAttribute(asByteArray(record, "payload"));
     } else if ("KeywordAttribute".equals(name)) {
       hydrator.addKeywordAttribute(asBoolean(record, "isKeyword"));
     } else if ("PositionIncrementAttribute".equals(name)) {
       hydrator.addPositionIncrementAttribute(asInt(record, "positionIncrement"));
     } else if ("FlagsAttribute".equals(name)) {
       hydrator.addFlagsAttribute(asInt(record, "flags"));
     } else if ("TypeAttribute".equals(name)) {
       hydrator.addTypeAttribute(asString(record, "type"));
     } else if ("OffsetAttribute".equals(name)) {
       hydrator.addOffsetAttribute(asInt(record, "startOffset"), asInt(record, "endOffset"));
     } else {
       throw log.unknownAttributeSerializedRepresentation(name);
     }
   } else if (element instanceof ByteBuffer) {
     hydrator.addSerializedAttribute(asByteArray((ByteBuffer) element));
   } else {
     throw log.unknownAttributeSerializedRepresentation(element.getClass().getName());
   }
 }
Exemplo n.º 5
0
  @Override
  public void deserialize(byte[] data, LuceneWorksBuilder hydrator) {
    final ByteArrayInputStream inputStream = new ByteArrayInputStream(data);
    final int majorVersion = inputStream.read();
    final int minorVersion = inputStream.read();
    final Protocol protocol = protocols.getProtocol(majorVersion, minorVersion);

    Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
    GenericDatumReader<GenericRecord> reader =
        new GenericDatumReader<>(protocol.getType("Message"));
    GenericRecord result;
    try {
      result = reader.read(null, decoder);
    } catch (IOException e) {
      throw log.unableToDeserializeAvroStream(e);
    }

    classReferences = asListOfString(result, "classReferences");
    final List<GenericRecord> operations = asListOfGenericRecords(result, "operations");
    final ConversionContext conversionContext = new ContextualExceptionBridgeHelper();
    for (GenericRecord operation : operations) {
      String schema = operation.getSchema().getName();
      if ("OptimizeAll".equals(schema)) {
        hydrator.addOptimizeAll();
      } else if ("PurgeAll".equals(schema)) {
        hydrator.addPurgeAllLuceneWork(asClass(operation, "class"));
      } else if ("Flush".equals(schema)) {
        hydrator.addFlush();
      } else if ("Delete".equals(schema)) {
        processId(operation, hydrator);
        hydrator.addDeleteLuceneWork(asClass(operation, "class"), conversionContext);
      } else if ("DeleteByQuery".equals(schema)) {
        String entityClassName = asClass(operation, "class");
        int queryKey = asInt(operation, "key");
        DeleteByQuerySupport.StringToQueryMapper mapper =
            DeleteByQuerySupport.getStringToQueryMapper(queryKey);
        List<Utf8> stringList = asListOfString(operation, "query");
        String[] query = new String[stringList.size()];
        for (int i = 0; i < stringList.size(); ++i) {
          query[i] = stringList.get(i).toString();
        }
        hydrator.addDeleteByQueryLuceneWork(entityClassName, mapper.fromString(query));
      } else if ("Add".equals(schema)) {
        buildLuceneDocument(asGenericRecord(operation, "document"), hydrator);
        Map<String, String> analyzers = getAnalyzers(operation);
        processId(operation, hydrator);
        hydrator.addAddLuceneWork(asClass(operation, "class"), analyzers, conversionContext);
      } else if ("Update".equals(schema)) {
        buildLuceneDocument(asGenericRecord(operation, "document"), hydrator);
        Map<String, String> analyzers = getAnalyzers(operation);
        processId(operation, hydrator);
        hydrator.addUpdateLuceneWork(asClass(operation, "class"), analyzers, conversionContext);
      } else {
        throw log.cannotDeserializeOperation(schema);
      }
    }
  }
Exemplo n.º 6
0
 private Value decodeValue(final Object generic) {
   if (generic instanceof GenericRecord) {
     final GenericRecord record = (GenericRecord) generic;
     final Schema schema = record.getSchema();
     if (schema.equals(Schemas.COMPRESSED_IDENTIFIER) || schema.equals(Schemas.PLAIN_IDENTIFIER)) {
       return decodeIdentifier(record);
     }
   }
   return decodeLiteral(generic);
 }
 private List<Map.Entry> previewAvroBatch(FileStatus fileStatus, int batchSize)
     throws IOException, InterruptedException {
   SeekableInput input = new FsInput(fileStatus.getPath(), hadoopConf);
   DatumReader<GenericRecord> reader = new GenericDatumReader<>();
   FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
   List<Map.Entry> batch = new ArrayList<>();
   int count = 0;
   while (fileReader.hasNext() && batch.size() < batchSize) {
     GenericRecord datum = fileReader.next();
     ByteArrayOutputStream out = new ByteArrayOutputStream();
     DataFileWriter<GenericRecord> dataFileWriter =
         new DataFileWriter<GenericRecord>(
             new GenericDatumWriter<GenericRecord>(datum.getSchema()));
     dataFileWriter.create(datum.getSchema(), out);
     dataFileWriter.append(datum);
     dataFileWriter.close();
     out.close();
     batch.add(new Pair(fileStatus.getPath().toUri().getPath() + "::" + count, out.toByteArray()));
     count++;
   }
   return batch;
 }
Exemplo n.º 8
0
 private Object decodeNode(final Object generic) {
   if (generic instanceof GenericRecord) {
     final GenericRecord record = (GenericRecord) generic;
     final Schema schema = record.getSchema();
     if (schema.equals(Schemas.RECORD)) {
       return decodeRecord(record, null);
     } else if (schema.equals(Schemas.PLAIN_IDENTIFIER)
         || schema.equals(Schemas.COMPRESSED_IDENTIFIER)) {
       return decodeIdentifier(record);
     } else if (schema.equals(Schemas.STATEMENT)) {
       return decodeStatement(record);
     }
   }
   return decodeLiteral(generic);
 }
  public void testWrite() throws IOException {

    URL url = this.getClass().getClassLoader().getResource("input/Company.avsc");
    assertNotNull(url);
    Schema schema = new Schema.Parser().parse(new File(url.getFile()));
    assertNotNull(schema);

    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
    // Another way of loading a file
    File file = new File("src/test/resources/input/companies.avro");
    DataFileReader<GenericRecord> dataFileReader =
        new DataFileReader<GenericRecord>(file, datumReader);

    File fileOut = new File("target/companies2.avro");
    Schema schemaOut =
        new Schema.Parser().parse(new File("src/test/resources/input/Company2.avsc"));
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schemaOut);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);

    GenericRecord company = null;
    int count = 0;
    while (dataFileReader.hasNext()) {
      company = dataFileReader.next(company);
      if (company.get("name").toString().equals("aol")) {
        dataFileWriter.create(schemaOut, fileOut);

        GenericRecord recordOut = new GenericData.Record(schemaOut);
        recordOut.put("id", company.get("id"));
        recordOut.put("name", company.get("name"));
        assertTrue(recordOut.getSchema().getField("address") != null);
        assertTrue(recordOut.getSchema().getField("employeeCount") == null);

        // address is of complex type
        GenericRecord address =
            new GenericData.Record((GenericData.Record) company.get("address"), true);
        recordOut.put("address", address);

        dataFileWriter.append(recordOut);

        count++;
      }
    }
    assertTrue(count > 0);

    dataFileWriter.close();
  }
Exemplo n.º 10
0
 private Resource decodeIdentifier(final GenericRecord record) {
   final Schema schema = record.getSchema();
   if (schema.equals(Schemas.COMPRESSED_IDENTIFIER)) {
     try {
       return this.dictionary.objectFor((Integer) record.get(0));
     } catch (final IOException ex) {
       throw new IllegalStateException("Cannot access dictionary: " + ex.getMessage(), ex);
     }
   } else if (schema.equals(Schemas.PLAIN_IDENTIFIER)) {
     final String string = record.get(0).toString();
     if (string.startsWith("_:")) {
       return this.factory.createBNode(string.substring(2));
     } else {
       return this.factory.createURI(string);
     }
   }
   throw new IllegalArgumentException("Unsupported encoded identifier: " + record);
 }
Exemplo n.º 11
0
 private Literal decodeLiteral(final Object generic) {
   if (generic instanceof GenericRecord) {
     final GenericRecord record = (GenericRecord) generic;
     final Schema schema = record.getSchema();
     if (schema.equals(Schemas.STRING_LANG)) {
       final String label = record.get(0).toString(); // Utf8 class used
       final Object language = record.get(1);
       return this.factory.createLiteral(label, language.toString());
     } else if (schema.equals(Schemas.SHORT)) {
       return this.factory.createLiteral(((Integer) record.get(0)).shortValue());
     } else if (schema.equals(Schemas.BYTE)) {
       return this.factory.createLiteral(((Integer) record.get(0)).byteValue());
     } else if (schema.equals(Schemas.BIGINTEGER)) {
       return this.factory.createLiteral(record.get(0).toString(), XMLSchema.INTEGER);
     } else if (schema.equals(Schemas.BIGDECIMAL)) {
       return this.factory.createLiteral(record.get(0).toString(), XMLSchema.DECIMAL);
     } else if (schema.equals(Schemas.CALENDAR)) {
       final int tz = (Integer) record.get(0);
       final GregorianCalendar calendar = new GregorianCalendar();
       calendar.setTimeInMillis((Long) record.get(1));
       calendar.setTimeZone(
           TimeZone.getTimeZone(
               String.format(
                   "GMT%s%02d:%02d", tz >= 0 ? "+" : "-", Math.abs(tz) / 60, Math.abs(tz) % 60)));
       return this.factory.createLiteral(this.datatypeFactory.newXMLGregorianCalendar(calendar));
     }
   } else if (generic instanceof CharSequence) {
     return this.factory.createLiteral(generic.toString()); // Utf8 class used
   } else if (generic instanceof Boolean) {
     return this.factory.createLiteral((Boolean) generic);
   } else if (generic instanceof Long) {
     return this.factory.createLiteral((Long) generic);
   } else if (generic instanceof Integer) {
     return this.factory.createLiteral((Integer) generic);
   } else if (generic instanceof Double) {
     return this.factory.createLiteral((Double) generic);
   } else if (generic instanceof Float) {
     return this.factory.createLiteral((Float) generic);
   }
   Preconditions.checkNotNull(generic);
   throw new IllegalArgumentException("Unsupported generic data: " + generic);
 }
Exemplo n.º 12
0
  /**
   * Gives the output message
   *
   * @param outputType output data type
   * @param result mapping result
   * @return the output as a String
   * @throws IOException
   */
  public String getOutputMessage(String outputType, GenericRecord result)
      throws SynapseException, IOException {

    DatumWriter<GenericRecord> writer = null;
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    Encoder encoder = new DummyEncoder(byteArrayOutputStream);
    // OMElement outMessage = null;
    String outMessage = null;

    try {

      writer = WriterRegistry.getInstance().get(outputType).newInstance();
      writer.setSchema(result.getSchema());
      writer.write(result, encoder);

      if (log.isDebugEnabled()) {
        log.debug("Output received from datum writer.." + byteArrayOutputStream.toString());
      }

    } catch (Exception e) {
      handleException("Data coversion Failed at JSONWriter..", e);
    } finally {
      encoder.flush();
    }

    /*try {
    	// Converts the result into an OMElement
    	outMessage = getOutputResult(byteArrayOutputStream.toString());
    } catch (XMLStreamException e) {
    	handleException(
    			"Failed at generating the OMElement for the JSON output received...",
    			e);
    }*/

    outMessage = byteArrayOutputStream.toString();

    return outMessage;
  }
Exemplo n.º 13
0
    /**
     * Describe <code>reduce</code> method here.
     *
     * @param object an <code>Object</code> value
     * @param iterable an <code>Iterable</code> value
     * @param context a <code>Reducer.Context</code> value
     * @exception IOException if an error occurs
     * @exception InterruptedException if an error occurs
     */
    @Override
    public final void reduce(Text key, Iterable<AvroValue<GenericRecord>> values, Context context)
        throws IOException, InterruptedException {
      GenericRecord output = new GenericData.Record(outputSchema);

      for (AvroValue<GenericRecord> value : values) {
        GenericRecord datum = value.datum();
        for (Schema.Field field : datum.getSchema().getFields()) {
          String fieldName = field.name();
          Object fieldValue = datum.get(fieldName);
          if (fieldValue != null) {
            output.put(fieldName, fieldValue);
          }
        }
      }

      CharSequence psam = (CharSequence) output.get(PSAM);
      CharSequence longitude = (CharSequence) output.get(LONGITUDE);
      CharSequence latitude = (CharSequence) output.get(LATITUDE);
      if (psam != null && longitude != null && latitude != null) {
        context.write(new AvroKey<GenericRecord>(output), NullWritable.get());
      }
    }
Exemplo n.º 14
0
 private void updateRecord(CommonRecord record, GenericRecord delta) {
   List<Field> deltaFields = delta.getSchema().getFields();
   for (Field deltaField : deltaFields) {
     String fieldName = deltaField.name();
     Object rawDeltaField = delta.get(fieldName);
     if (LOG.isDebugEnabled()) {
       LOG.debug(
           "Processing field \"{}\", current value: {}",
           fieldName,
           record.getField(fieldName) != null ? record.getField(fieldName).toString() : null);
     }
     if (AvroGenericUtils.isRecord(rawDeltaField)) {
       processRecordField(record, (GenericRecord) rawDeltaField, fieldName);
     } else if (AvroGenericUtils.isArray(rawDeltaField)) {
       processArrayField(record, (GenericArray) rawDeltaField, fieldName);
     } else if (AvroGenericUtils.isEnum(rawDeltaField)) {
       processEnumField(record, (GenericEnumSymbol) rawDeltaField, fieldName);
     } else if (AvroGenericUtils.isFixed(rawDeltaField)) {
       processFixedField(record, (GenericFixed) rawDeltaField, fieldName);
     } else {
       record.setField(fieldName, commonFactory.createCommonValue(rawDeltaField));
     }
   }
 }
Exemplo n.º 15
0
  /**
   * @param dbUpdates The dbUpdates present in the current transaction
   * @param ti The meta information about the transaction. (See TransactionInfo class for more
   *     details).
   * @throws DatabusException
   * @throws UnsupportedKeyException
   */
  protected void addEventToBuffer(
      List<TransactionState.PerSourceTransactionalUpdate> dbUpdates, TransactionInfo ti)
      throws DatabusException, UnsupportedKeyException {
    if (dbUpdates.size() == 0) throw new DatabusException("Cannot handle empty dbUpdates");

    long scn = ti.getScn();
    long timestamp = ti.getTransactionTimeStampNs();
    EventSourceStatistics globalStats = getSource(GLOBAL_SOURCE_ID).getStatisticsBean();

    /**
     * We skip the start scn of the relay, we have already added a EOP for this SCN in the buffer.
     * Why is this not a problem ? There are two cases: 1. When we use the earliest/latest scn if
     * there is no maxScn (We don't really have a start point). So it's really OK to miss the first
     * event. 2. If it's the maxSCN, then event was already seen by the relay.
     */
    if (scn == _startPrevScn.get()) {
      _log.info("Skipping this transaction, EOP already send for this event");
      return;
    }

    getEventBuffer().startEvents();

    int eventsInTransactionCount = 0;

    List<EventReaderSummary> summaries = new ArrayList<EventReaderSummary>();

    for (int i = 0; i < dbUpdates.size(); ++i) {
      GenericRecord record = null;
      TransactionState.PerSourceTransactionalUpdate perSourceUpdate = dbUpdates.get(i);
      short sourceId = (short) perSourceUpdate.getSourceId();
      // prepare stats collection per source
      EventSourceStatistics perSourceStats = getSource(sourceId).getStatisticsBean();

      Iterator<DbUpdateState.DBUpdateImage> dbUpdateIterator =
          perSourceUpdate.getDbUpdatesSet().iterator();
      int eventsInDbUpdate = 0;
      long dbUpdatesEventsSize = 0;
      long startDbUpdatesMs = System.currentTimeMillis();

      while (dbUpdateIterator
          .hasNext()) // TODO verify if there is any case where we need to rollback.
      {
        DbUpdateState.DBUpdateImage dbUpdate = dbUpdateIterator.next();

        // Construct the Databus Event key, determine the key type and construct the key
        Object keyObj = obtainKey(dbUpdate);
        DbusEventKey eventKey = new DbusEventKey(keyObj);

        // Get the logicalparition id
        PartitionFunction partitionFunction = _partitionFunctionHashMap.get((int) sourceId);
        short lPartitionId = partitionFunction.getPartition(eventKey);

        record = dbUpdate.getGenericRecord();
        // Write the event to the buffer
        if (record == null)
          throw new DatabusException("Cannot write event to buffer because record = " + record);

        if (record.getSchema() == null)
          throw new DatabusException("The record does not have a schema (null schema)");

        try {
          // Collect stats on number of dbUpdates for one source
          eventsInDbUpdate++;

          // Count of all the events in the current transaction
          eventsInTransactionCount++;
          // Serialize the row
          ByteArrayOutputStream bos = new ByteArrayOutputStream();
          Encoder encoder = new BinaryEncoder(bos);
          GenericDatumWriter<GenericRecord> writer =
              new GenericDatumWriter<GenericRecord>(record.getSchema());
          writer.write(record, encoder);
          byte[] serializedValue = bos.toByteArray();

          // Get the md5 for the schema
          SchemaId schemaId = SchemaId.createWithMd5(dbUpdate.getSchema());

          // Determine the operation type and convert to dbus opcode
          DbusOpcode opCode;
          if (dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.INSERT
              || dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.UPDATE) {
            opCode = DbusOpcode.UPSERT;
            if (_log.isDebugEnabled())
              _log.debug("The event with scn " + scn + " is INSERT/UPDATE");
          } else if (dbUpdate.getOpType() == DbUpdateState.DBUpdateImage.OpType.DELETE) {
            opCode = DbusOpcode.DELETE;
            if (_log.isDebugEnabled()) _log.debug("The event with scn " + scn + " is DELETE");
          } else {
            throw new DatabusException("Unknown opcode from dbUpdate for event with scn:" + scn);
          }

          // Construct the dbusEvent info
          DbusEventInfo dbusEventInfo =
              new DbusEventInfo(
                  opCode,
                  scn,
                  (short) _pConfig.getId(),
                  lPartitionId,
                  timestamp,
                  sourceId,
                  schemaId.getByteArray(),
                  serializedValue,
                  false,
                  false);
          dbusEventInfo.setReplicated(dbUpdate.isReplicated());

          perSourceStats.addEventCycle(1, ti.getTransactionTimeRead(), serializedValue.length, scn);
          globalStats.addEventCycle(1, ti.getTransactionTimeRead(), serializedValue.length, scn);

          long tsEnd = System.currentTimeMillis();
          perSourceStats.addTimeOfLastDBAccess(tsEnd);
          globalStats.addTimeOfLastDBAccess(tsEnd);

          // Append to the event buffer
          getEventBuffer().appendEvent(eventKey, dbusEventInfo, _statsCollector);
          _rc.incrementEventCount();
          dbUpdatesEventsSize += serializedValue.length;
        } catch (IOException io) {
          perSourceStats.addError();
          globalStats.addEmptyEventCycle();
          _log.error("Cannot create byte stream payload: " + dbUpdates.get(i).getSourceId());
        }
      }
      long endDbUpdatesMs = System.currentTimeMillis();
      long dbUpdatesElapsedTimeMs = endDbUpdatesMs - startDbUpdatesMs;

      // Log Event Summary at logical source level
      EventReaderSummary summary =
          new EventReaderSummary(
              sourceId,
              _monitoredSources.get(sourceId).getSourceName(),
              scn,
              eventsInDbUpdate,
              dbUpdatesEventsSize,
              -1L /* Not supported */,
              dbUpdatesElapsedTimeMs,
              timestamp,
              timestamp,
              -1L /* Not supported */);
      if (_eventsLog.isInfoEnabled()) {
        _eventsLog.info(summary.toString());
      }
      summaries.add(summary);

      if (_log.isDebugEnabled())
        _log.debug("There are " + eventsInDbUpdate + " events seen in the current dbUpdate");
    }

    // update stats
    _ggParserStats.addTransactionInfo(ti, eventsInTransactionCount);

    // Log Event Summary at Physical source level
    ReadEventCycleSummary summary =
        new ReadEventCycleSummary(
            _pConfig.getName(),
            summaries,
            scn,
            -1 /* Overall time including query time not calculated */);

    if (_eventsLog.isInfoEnabled()) {
      _eventsLog.info(summary.toString());
    }

    _log.info("Writing " + eventsInTransactionCount + " events from transaction with scn: " + scn);
    if (scn <= 0)
      throw new DatabusException(
          "Unable to write events to buffer because of negative/zero scn: " + scn);

    getEventBuffer().endEvents(scn, _statsCollector);
    _scn.set(scn);

    if (getMaxScnReaderWriter() != null) {
      try {
        getMaxScnReaderWriter().saveMaxScn(_scn.get());
      } catch (DatabusException e) {
        _log.error("Cannot save scn = " + _scn + " for physical source = " + getName(), e);
      }
    }
  }
Exemplo n.º 16
0
  @Test
  public void experimentWithAvro() throws Exception {
    String root = "org/hibernate/search/remote/codex/avro/v1_1/";
    parseSchema(root + "attribute/TokenTrackingAttribute.avro", "attribute/TokenTrackingAttribute");
    parseSchema(root + "attribute/CharTermAttribute.avro", "attribute/CharTermAttribute");
    parseSchema(root + "attribute/PayloadAttribute.avro", "attribute/PayloadAttribute");
    parseSchema(root + "attribute/KeywordAttribute.avro", "attribute/KeywordAttribute");
    parseSchema(
        root + "attribute/PositionIncrementAttribute.avro", "attribute/PositionIncrementAttribute");
    parseSchema(root + "attribute/FlagsAttribute.avro", "attribute/FlagsAttribute");
    parseSchema(root + "attribute/TypeAttribute.avro", "attribute/TypeAttribute");
    parseSchema(root + "attribute/OffsetAttribute.avro", "attribute/OffsetAttribute");
    parseSchema(root + "field/TermVector.avro", "field/TermVector");
    parseSchema(root + "field/Index.avro", "field/Index");
    parseSchema(root + "field/Store.avro", "field/Store");
    parseSchema(root + "field/TokenStreamField.avro", "field/TokenStreamField");
    parseSchema(root + "field/ReaderField.avro", "field/ReaderField");
    parseSchema(root + "field/StringField.avro", "field/StringField");
    parseSchema(root + "field/BinaryField.avro", "field/BinaryField");
    parseSchema(root + "field/NumericIntField.avro", "field/NumericIntField");
    parseSchema(root + "field/NumericLongField.avro", "field/NumericLongField");
    parseSchema(root + "field/NumericFloatField.avro", "field/NumericFloatField");
    parseSchema(root + "field/NumericDoubleField.avro", "field/NumericDoubleField");
    parseSchema(root + "field/CustomFieldable.avro", "field/CustomFieldable");
    parseSchema(root + "Document.avro", "Document");
    parseSchema(root + "operation/Id.avro", "operation/Id");
    parseSchema(root + "operation/OptimizeAll.avro", "operation/OptimizeAll");
    parseSchema(root + "operation/PurgeAll.avro", "operation/PurgeAll");
    parseSchema(root + "operation/Flush.avro", "operation/Flush");
    parseSchema(root + "operation/Delete.avro", "operation/Delete");
    parseSchema(root + "operation/Add.avro", "operation/Add");
    parseSchema(root + "operation/Update.avro", "operation/Update");
    parseSchema(root + "Message.avro", "Message");

    String filename = root + "Works.avpr";
    Protocol protocol = parseProtocol(filename, "Works");
    final Schema termVectorSchema = protocol.getType("TermVector");
    final Schema indexSchema = protocol.getType("Index");
    final Schema storeSchema = protocol.getType("Store");
    final Schema tokenTrackingAttribute = protocol.getType("TokenTrackingAttribute");
    final Schema tokenStreamSchema = protocol.getType("TokenStreamField");
    final Schema readerSchema = protocol.getType("ReaderField");
    final Schema stringSchema = protocol.getType("StringField");
    final Schema binarySchema = protocol.getType("BinaryField");
    final Schema intFieldSchema = protocol.getType("NumericIntField");
    final Schema longFieldSchema = protocol.getType("NumericLongField");
    final Schema floatFieldSchema = protocol.getType("NumericFloatField");
    final Schema doubleFieldSchema = protocol.getType("NumericDoubleField");
    final Schema custonFieldableSchema = protocol.getType("CustomFieldable");
    final Schema documentSchema = protocol.getType("Document");
    final Schema idSchema = protocol.getType("Id");
    final Schema optimizeAllSchema = protocol.getType("OptimizeAll");
    final Schema purgeAllSchema = protocol.getType("PurgeAll");
    final Schema flushSchema = protocol.getType("Flush");
    final Schema deleteSchema = protocol.getType("Delete");
    final Schema addSchema = protocol.getType("Add");
    final Schema updateSchema = protocol.getType("Update");
    Schema messageSchema = protocol.getType("Message");

    final ByteArrayOutputStream out = new ByteArrayOutputStream();
    GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(messageSchema);
    Encoder encoder = EncoderFactory.get().directBinaryEncoder(out, null);

    byte[] serializableSample = new byte[10];
    for (int i = 0; i < 10; i++) {
      serializableSample[i] = (byte) i;
    }

    List<String> classReferences = new ArrayList<String>();
    classReferences.add(AvroTest.class.getName());

    List<GenericRecord> fieldables = new ArrayList<GenericRecord>(1);
    // custom fieldable
    GenericRecord customFieldable = new GenericData.Record(custonFieldableSchema);
    customFieldable.put("instance", ByteBuffer.wrap(serializableSample));
    fieldables.add(customFieldable);

    // numeric fields
    GenericRecord numericField = createNumeric(intFieldSchema);
    numericField.put("value", 3);
    fieldables.add(numericField);
    numericField = createNumeric(longFieldSchema);
    numericField.put("value", 3l);
    fieldables.add(numericField);
    numericField = createNumeric(floatFieldSchema);
    numericField.put("value", 2.3f);
    fieldables.add(numericField);
    numericField = createNumeric(doubleFieldSchema);
    numericField.put("value", 2.3d);
    fieldables.add(numericField);

    // fields
    GenericRecord field = createField(binarySchema);
    field.put("offset", 0);
    field.put("length", 10);
    field.put("value", ByteBuffer.wrap(serializableSample));
    fieldables.add(field);
    field = createField(stringSchema);
    field.put("value", stringSchema.getName());
    field.put("store", "YES");
    field.put("index", "ANALYZED");
    field.put("termVector", "WITH_OFFSETS");
    fieldables.add(field);
    field = createField(tokenStreamSchema);

    List<List<Object>> tokens = new ArrayList<List<Object>>();
    List<Object> attrs = new ArrayList<Object>();
    tokens.add(attrs);
    GenericData.Record attr = new GenericData.Record(tokenTrackingAttribute);
    List<Integer> positions = new ArrayList<Integer>();
    positions.add(1);
    positions.add(2);
    positions.add(3);
    positions.add(4);
    attr.put("positions", positions);
    attrs.add(attr);
    attrs.add(ByteBuffer.wrap(serializableSample));

    field.put("value", tokens);
    field.put("termVector", "WITH_OFFSETS");
    fieldables.add(field);
    field = createField(readerSchema);
    field.put("value", ByteBuffer.wrap(serializableSample));
    field.put("termVector", "WITH_OFFSETS");
    fieldables.add(field);

    GenericRecord doc = new GenericData.Record(documentSchema);
    doc.put("boost", 2.3f);
    doc.put("fieldables", fieldables);

    GenericRecord add = new GenericData.Record(addSchema);
    add.put("class", classReferences.indexOf(AvroTest.class.getName()));
    GenericRecord id = new GenericData.Record(idSchema);
    id.put("value", ByteBuffer.wrap(serializableSample));
    add.put("id", id);
    add.put("document", doc);
    Map<String, String> analyzers = new HashMap<String, String>();
    analyzers.put("name", "ngram");
    analyzers.put("description", "porter");
    add.put("fieldToAnalyzerMap", analyzers);

    GenericRecord delete = new GenericData.Record(deleteSchema);
    delete.put("class", classReferences.indexOf(AvroTest.class.getName()));
    id = new GenericData.Record(idSchema);
    id.put("value", new Long(30));
    delete.put("id", id);

    GenericRecord purgeAll = new GenericData.Record(purgeAllSchema);
    purgeAll.put("class", classReferences.indexOf(AvroTest.class.getName()));
    GenericRecord optimizeAll = new GenericData.Record(optimizeAllSchema);

    GenericRecord flush = new GenericData.Record(flushSchema);

    List<GenericRecord> operations = new ArrayList<GenericRecord>(1);
    operations.add(purgeAll);
    operations.add(optimizeAll);
    operations.add(flush);
    operations.add(delete);
    operations.add(add);

    GenericRecord message = new GenericData.Record(messageSchema);
    message.put("classReferences", classReferences);
    message.put("operations", operations);

    writer.write(message, encoder);
    encoder.flush();

    ByteArrayInputStream inputStream = new ByteArrayInputStream(out.toByteArray());
    Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
    GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(messageSchema);
    while (true) {
      try {
        GenericRecord result = reader.read(null, decoder);
        System.out.println(result);

        assertThat(result).isNotNull();
        // operations
        assertThat(result.get("operations")).isNotNull().isInstanceOf(List.class);
        List<?> ops = (List<?>) result.get("operations");
        assertThat(ops).hasSize(5);

        // Flush
        assertThat(ops.get(2)).isInstanceOf(GenericRecord.class);
        GenericRecord flushOp = (GenericRecord) ops.get(2);
        assertThat(flushOp.getSchema().getName()).isEqualTo("Flush");

        // Delete
        assertThat(ops.get(3)).isInstanceOf(GenericRecord.class);
        GenericRecord deleteOp = (GenericRecord) ops.get(3);
        assertThat(deleteOp.getSchema().getName()).isEqualTo("Delete");
        Object actual = ((GenericRecord) deleteOp.get("id")).get("value");
        assertThat(actual).isInstanceOf(Long.class);
        assertThat(actual).isEqualTo(Long.valueOf(30));

        // Add
        assertThat(ops.get(4)).isInstanceOf(GenericRecord.class);
        GenericRecord addOp = (GenericRecord) ops.get(4);
        assertThat(addOp.getSchema().getName()).isEqualTo("Add");
        actual = ((GenericRecord) addOp.get("id")).get("value");
        assertThat(actual).isInstanceOf(ByteBuffer.class);
        ByteBuffer bb = (ByteBuffer) actual;
        assertThat(bb.hasArray()).isTrue();
        byte[] copy = new byte[bb.remaining()];
        bb.get(copy);
        assertThat(serializableSample).isEqualTo(copy);

        // fieldToAnalyzerMap
        assertThat(addOp.get("fieldToAnalyzerMap")).isInstanceOf(Map.class);
        assertThat((Map) addOp.get("fieldToAnalyzerMap")).hasSize(2);

        // document
        assertThat(addOp.get("document")).isNotNull();
        GenericRecord document = (GenericRecord) addOp.get("document");
        assertThat(document.get("boost")).isEqualTo(2.3f);

        // numeric fields
        assertThat(document.get("fieldables")).isNotNull().isInstanceOf(List.class);
        List<?> fields = (List<?>) document.get("fieldables");

        assertThat(fields).hasSize(9); // custom + 4 numerics + 4 fields

        field = (GenericRecord) fields.get(0);
        assertThat(field.getSchema().getName()).isEqualTo("CustomFieldable");
        field = (GenericRecord) fields.get(1);
        assertThat(field.getSchema().getName()).isEqualTo("NumericIntField");
        assertThat(field.get("value")).isEqualTo(3);
        assertNumericField(field);
        field = (GenericRecord) fields.get(2);
        assertThat(field.getSchema().getName()).isEqualTo("NumericLongField");
        assertThat(field.get("value")).isEqualTo(3l);
        assertNumericField(field);
        field = (GenericRecord) fields.get(3);
        assertThat(field.getSchema().getName()).isEqualTo("NumericFloatField");
        assertThat(field.get("value")).isEqualTo(2.3f);
        assertNumericField(field);
        field = (GenericRecord) fields.get(4);
        assertThat(field.getSchema().getName()).isEqualTo("NumericDoubleField");
        assertThat(field.get("value")).isEqualTo(2.3d);
        assertNumericField(field);

        // fields
        field = (GenericRecord) fields.get(5);
        assertThat(field.getSchema().getName()).isEqualTo("BinaryField");
        assertThat(field.get("value")).isInstanceOf(ByteBuffer.class);
        assertField(field);

        field = (GenericRecord) fields.get(6);
        assertThat(field.getSchema().getName()).isEqualTo("StringField");
        assertThat(field.get("value")).isInstanceOf(Utf8.class);
        assertTermVector(field);
        assertIndexAndStore(field);
        assertField(field);

        field = (GenericRecord) fields.get(7);
        assertThat(field.getSchema().getName()).isEqualTo("TokenStreamField");
        assertThat(field.get("value")).isInstanceOf(List.class);
        List<List<Object>> l1 = (List<List<Object>>) field.get("value");
        assertThat(l1.get(0)).as("Wrong attribute impl list").hasSize(2);
        Object object = l1.get(0).get(0);
        assertThat(object).isNotNull();
        assertTermVector(field);
        assertField(field);

        field = (GenericRecord) fields.get(8);
        assertThat(field.getSchema().getName()).isEqualTo("ReaderField");
        assertThat(field.get("value")).isInstanceOf(ByteBuffer.class);
        assertTermVector(field);
        assertField(field);
      } catch (EOFException eof) {
        break;
      } catch (Exception ex) {
        ex.printStackTrace();
        throw ex;
      }
    }
  }
Exemplo n.º 17
0
 private void buildLuceneDocument(GenericRecord document, LuceneWorksBuilder hydrator) {
   hydrator.defineDocument();
   List<GenericRecord> fieldables = asListOfGenericRecords(document, "fieldables");
   for (GenericRecord field : fieldables) {
     String schema = field.getSchema().getName();
     if ("CustomFieldable".equals(schema)) {
       hydrator.addFieldable(asByteArray(field, "instance"));
     } else if ("NumericIntField".equals(schema)) {
       hydrator.addIntNumericField(
           asInt(field, "value"),
           asString(field, "name"),
           asInt(field, "precisionStep"),
           asStore(field),
           asBoolean(field, "indexed"),
           asFloat(field, "boost"),
           asBoolean(field, "omitNorms"),
           asBoolean(field, "omitTermFreqAndPositions"));
     } else if ("NumericFloatField".equals(schema)) {
       hydrator.addFloatNumericField(
           asFloat(field, "value"),
           asString(field, "name"),
           asInt(field, "precisionStep"),
           asStore(field),
           asBoolean(field, "indexed"),
           asFloat(field, "boost"),
           asBoolean(field, "omitNorms"),
           asBoolean(field, "omitTermFreqAndPositions"));
     } else if ("NumericLongField".equals(schema)) {
       hydrator.addLongNumericField(
           asLong(field, "value"),
           asString(field, "name"),
           asInt(field, "precisionStep"),
           asStore(field),
           asBoolean(field, "indexed"),
           asFloat(field, "boost"),
           asBoolean(field, "omitNorms"),
           asBoolean(field, "omitTermFreqAndPositions"));
     } else if ("NumericDoubleField".equals(schema)) {
       hydrator.addDoubleNumericField(
           asDouble(field, "value"),
           asString(field, "name"),
           asInt(field, "precisionStep"),
           asStore(field),
           asBoolean(field, "indexed"),
           asFloat(field, "boost"),
           asBoolean(field, "omitNorms"),
           asBoolean(field, "omitTermFreqAndPositions"));
     } else if ("BinaryField".equals(schema)) {
       hydrator.addFieldWithBinaryData(
           asString(field, "name"),
           asByteArray(field, "value"),
           asInt(field, "offset"),
           asInt(field, "length"));
     } else if ("StringField".equals(schema)) {
       hydrator.addFieldWithStringData(
           asString(field, "name"),
           asString(field, "value"),
           asStore(field),
           asIndex(field),
           asTermVector(field),
           asFloat(field, "boost"),
           asBoolean(field, "omitNorms"),
           asBoolean(field, "omitTermFreqAndPositions"));
     } else if ("TokenStreamField".equals(schema)) {
       buildAttributes(field, "value", hydrator);
       hydrator.addFieldWithTokenStreamData(
           asString(field, "name"),
           asTermVector(field),
           asFloat(field, "boost"),
           asBoolean(field, "omitNorms"),
           asBoolean(field, "omitTermFreqAndPositions"));
     } else if ("ReaderField".equals(schema)) {
       hydrator.addFieldWithSerializableReaderData(
           asString(field, "name"),
           asByteArray(field, "value"),
           asTermVector(field),
           asFloat(field, "boost"),
           asBoolean(field, "omitNorms"),
           asBoolean(field, "omitTermFreqAndPositions"));
     } else if ("BinaryDocValuesField".equals(schema)) {
       hydrator.addDocValuesFieldWithBinaryData(
           asString(field, "name"),
           asString(field, "type"),
           asByteArray(field, "value"),
           asInt(field, "offset"),
           asInt(field, "length"));
     } else if ("NumericDocValuesField".equals(schema)) {
       hydrator.addDocValuesFieldWithNumericData(
           asString(field, "name"), asString(field, "type"), asLong(field, "value"));
     } else {
       throw log.cannotDeserializeField(schema);
     }
   }
 }