/**
   * Called by {@link #containsRecursiveRecord(Schema)} and it recursively checks whether the input
   * schema contains recursive records.
   */
  protected static boolean containsRecursiveRecord(Schema s, Set<String> definedRecordNames) {

    /* if it is a record, check itself and all fields*/
    if (s.getType().equals(Schema.Type.RECORD)) {
      String name = s.getName();
      if (definedRecordNames.contains(name)) return true;

      /* add its own name into defined record set*/
      definedRecordNames.add(s.getName());

      /* check all fields */
      List<Field> fields = s.getFields();
      for (Field field : fields) {
        Schema fs = field.schema();
        if (containsRecursiveRecord(fs, definedRecordNames)) return true;
      }

      /* remove its own name from the name set */
      definedRecordNames.remove(s.getName());

      return false;
    }

    /* if it is an array, check its element type */
    else if (s.getType().equals(Schema.Type.ARRAY)) {
      Schema fs = s.getElementType();
      return containsRecursiveRecord(fs, definedRecordNames);
    }

    /*if it is a map, check its value type */
    else if (s.getType().equals(Schema.Type.MAP)) {
      Schema vs = s.getValueType();
      return containsRecursiveRecord(vs, definedRecordNames);
    }

    /* if it is a union, check all possible types */
    else if (s.getType().equals(Schema.Type.UNION)) {
      List<Schema> types = s.getTypes();
      for (Schema type : types) {
        if (containsRecursiveRecord(type, definedRecordNames)) return true;
      }
      return false;
    }

    /* return false for other cases */
    else {
      return false;
    }
  }
Beispiel #2
0
  /**
   * @param args
   *     <p>read *.avro file convert into java tree view. return tree
   */
  public Tree generateInputTree() {
    GenericDatumReader<GenericData> genericReader = new GenericDatumReader<GenericData>();
    //		DatumReader inputDatumReader = new SpecificDatumReader(UserInfo.class);
    DataFileReader<GenericData> dataFileReader;
    Tree root = new Tree(); // root tree for Tree data struture
    try {

      ///
      // home/lali/old/svn/trunck/eclipse/data-mapper/org.wso2.developerstudio.visualdatamapper.diagram/resource/inputs.avro
      String path =
          DataMapperCreationWizardPage
              .avroFilePath; // path for avro file selected in Create Datamapper Diagram swizard
      dataFileReader = new DataFileReader<GenericData>(new File(path), genericReader);

      Schema schm = dataFileReader.getSchema();
      multipleData = false;
      root.setName(schm.getName());

      List<Field> list = dataFileReader.getSchema().getFields();
      Iterator<Field> it = list.iterator();

      while (it.hasNext()) {
        Field field = it.next();
        fetchToTree(field, root);
      }

    } catch (IOException e) {

      e.printStackTrace();
    }

    return root;
  }
Beispiel #3
0
 private GenericRecord createField(Schema schema) {
   GenericRecord field = new GenericData.Record(schema);
   field.put("name", schema.getName());
   field.put("boost", 2.3f);
   field.put("omitNorms", true);
   field.put("omitTermFreqAndPositions", true);
   return field;
 }
Beispiel #4
0
 @Override
 public Void record(Schema record, List<String> names, List<Void> fields) {
   String recordName = record.getName();
   for (String name : names) {
     if (!isCompatibleName(name)) {
       incompatible.add(recordName + "." + name);
     }
   }
   return null;
 }
  @Test
  public void testProjection() throws IOException {
    Path path = writeCarsToParquetFile(1, CompressionCodecName.UNCOMPRESSED, false);
    Configuration conf = new Configuration();

    Schema schema = Car.getClassSchema();
    List<Schema.Field> fields = schema.getFields();

    // Schema.Parser parser = new Schema.Parser();
    List<Schema.Field> projectedFields = new ArrayList<Schema.Field>();
    for (Schema.Field field : fields) {
      String name = field.name();
      if ("optionalExtra".equals(name) || "serviceHistory".equals(name)) {
        continue;
      }

      // Schema schemaClone = parser.parse(field.schema().toString(false));
      Schema.Field fieldClone =
          new Schema.Field(name, field.schema(), field.doc(), field.defaultValue());
      projectedFields.add(fieldClone);
    }

    Schema projectedSchema =
        Schema.createRecord(
            schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
    projectedSchema.setFields(projectedFields);
    AvroReadSupport.setRequestedProjection(conf, projectedSchema);

    ParquetReader<Car> reader = new AvroParquetReader<Car>(conf, path);
    for (Car car = reader.read(); car != null; car = reader.read()) {
      assertEquals(car.getDoors() != null, true);
      assertEquals(car.getEngine() != null, true);
      assertEquals(car.getMake() != null, true);
      assertEquals(car.getModel() != null, true);
      assertEquals(car.getYear() != null, true);
      assertEquals(car.getVin() != null, true);
      assertNull(car.getOptionalExtra());
      assertNull(car.getServiceHistory());
    }
  }
 /**
  * Retrieves a CTL schema by its id.
  *
  * @throws Exception
  */
 @Test
 public void downloadCtlSchemaTest() throws Exception {
   ApplicationDto application = createApplication(tenantAdminDto);
   this.loginTenantDeveloper(tenantDeveloperUser);
   String name = this.ctlRandomFieldType();
   CTLSchemaDto saved =
       this.createCTLSchema(
           name,
           CTL_DEFAULT_NAMESPACE,
           1,
           tenantDeveloperDto.getTenantId(),
           application.getApplicationToken(),
           null,
           null);
   FileData fd =
       client.downloadCtlSchemaByAppToken(
           client.getCTLSchemaById(saved.getId()),
           CTLSchemaExportMethod.FLAT,
           application.getApplicationToken());
   Assert.assertNotNull(fd);
   Schema loaded = new Parser().parse(new String(fd.getFileData()));
   Assert.assertEquals(name, loaded.getName());
 }
Beispiel #7
0
  @Test
  public void experimentWithAvro() throws Exception {
    String root = "org/hibernate/search/remote/codex/avro/v1_1/";
    parseSchema(root + "attribute/TokenTrackingAttribute.avro", "attribute/TokenTrackingAttribute");
    parseSchema(root + "attribute/CharTermAttribute.avro", "attribute/CharTermAttribute");
    parseSchema(root + "attribute/PayloadAttribute.avro", "attribute/PayloadAttribute");
    parseSchema(root + "attribute/KeywordAttribute.avro", "attribute/KeywordAttribute");
    parseSchema(
        root + "attribute/PositionIncrementAttribute.avro", "attribute/PositionIncrementAttribute");
    parseSchema(root + "attribute/FlagsAttribute.avro", "attribute/FlagsAttribute");
    parseSchema(root + "attribute/TypeAttribute.avro", "attribute/TypeAttribute");
    parseSchema(root + "attribute/OffsetAttribute.avro", "attribute/OffsetAttribute");
    parseSchema(root + "field/TermVector.avro", "field/TermVector");
    parseSchema(root + "field/Index.avro", "field/Index");
    parseSchema(root + "field/Store.avro", "field/Store");
    parseSchema(root + "field/TokenStreamField.avro", "field/TokenStreamField");
    parseSchema(root + "field/ReaderField.avro", "field/ReaderField");
    parseSchema(root + "field/StringField.avro", "field/StringField");
    parseSchema(root + "field/BinaryField.avro", "field/BinaryField");
    parseSchema(root + "field/NumericIntField.avro", "field/NumericIntField");
    parseSchema(root + "field/NumericLongField.avro", "field/NumericLongField");
    parseSchema(root + "field/NumericFloatField.avro", "field/NumericFloatField");
    parseSchema(root + "field/NumericDoubleField.avro", "field/NumericDoubleField");
    parseSchema(root + "field/CustomFieldable.avro", "field/CustomFieldable");
    parseSchema(root + "Document.avro", "Document");
    parseSchema(root + "operation/Id.avro", "operation/Id");
    parseSchema(root + "operation/OptimizeAll.avro", "operation/OptimizeAll");
    parseSchema(root + "operation/PurgeAll.avro", "operation/PurgeAll");
    parseSchema(root + "operation/Flush.avro", "operation/Flush");
    parseSchema(root + "operation/Delete.avro", "operation/Delete");
    parseSchema(root + "operation/Add.avro", "operation/Add");
    parseSchema(root + "operation/Update.avro", "operation/Update");
    parseSchema(root + "Message.avro", "Message");

    String filename = root + "Works.avpr";
    Protocol protocol = parseProtocol(filename, "Works");
    final Schema termVectorSchema = protocol.getType("TermVector");
    final Schema indexSchema = protocol.getType("Index");
    final Schema storeSchema = protocol.getType("Store");
    final Schema tokenTrackingAttribute = protocol.getType("TokenTrackingAttribute");
    final Schema tokenStreamSchema = protocol.getType("TokenStreamField");
    final Schema readerSchema = protocol.getType("ReaderField");
    final Schema stringSchema = protocol.getType("StringField");
    final Schema binarySchema = protocol.getType("BinaryField");
    final Schema intFieldSchema = protocol.getType("NumericIntField");
    final Schema longFieldSchema = protocol.getType("NumericLongField");
    final Schema floatFieldSchema = protocol.getType("NumericFloatField");
    final Schema doubleFieldSchema = protocol.getType("NumericDoubleField");
    final Schema custonFieldableSchema = protocol.getType("CustomFieldable");
    final Schema documentSchema = protocol.getType("Document");
    final Schema idSchema = protocol.getType("Id");
    final Schema optimizeAllSchema = protocol.getType("OptimizeAll");
    final Schema purgeAllSchema = protocol.getType("PurgeAll");
    final Schema flushSchema = protocol.getType("Flush");
    final Schema deleteSchema = protocol.getType("Delete");
    final Schema addSchema = protocol.getType("Add");
    final Schema updateSchema = protocol.getType("Update");
    Schema messageSchema = protocol.getType("Message");

    final ByteArrayOutputStream out = new ByteArrayOutputStream();
    GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(messageSchema);
    Encoder encoder = EncoderFactory.get().directBinaryEncoder(out, null);

    byte[] serializableSample = new byte[10];
    for (int i = 0; i < 10; i++) {
      serializableSample[i] = (byte) i;
    }

    List<String> classReferences = new ArrayList<String>();
    classReferences.add(AvroTest.class.getName());

    List<GenericRecord> fieldables = new ArrayList<GenericRecord>(1);
    // custom fieldable
    GenericRecord customFieldable = new GenericData.Record(custonFieldableSchema);
    customFieldable.put("instance", ByteBuffer.wrap(serializableSample));
    fieldables.add(customFieldable);

    // numeric fields
    GenericRecord numericField = createNumeric(intFieldSchema);
    numericField.put("value", 3);
    fieldables.add(numericField);
    numericField = createNumeric(longFieldSchema);
    numericField.put("value", 3l);
    fieldables.add(numericField);
    numericField = createNumeric(floatFieldSchema);
    numericField.put("value", 2.3f);
    fieldables.add(numericField);
    numericField = createNumeric(doubleFieldSchema);
    numericField.put("value", 2.3d);
    fieldables.add(numericField);

    // fields
    GenericRecord field = createField(binarySchema);
    field.put("offset", 0);
    field.put("length", 10);
    field.put("value", ByteBuffer.wrap(serializableSample));
    fieldables.add(field);
    field = createField(stringSchema);
    field.put("value", stringSchema.getName());
    field.put("store", "YES");
    field.put("index", "ANALYZED");
    field.put("termVector", "WITH_OFFSETS");
    fieldables.add(field);
    field = createField(tokenStreamSchema);

    List<List<Object>> tokens = new ArrayList<List<Object>>();
    List<Object> attrs = new ArrayList<Object>();
    tokens.add(attrs);
    GenericData.Record attr = new GenericData.Record(tokenTrackingAttribute);
    List<Integer> positions = new ArrayList<Integer>();
    positions.add(1);
    positions.add(2);
    positions.add(3);
    positions.add(4);
    attr.put("positions", positions);
    attrs.add(attr);
    attrs.add(ByteBuffer.wrap(serializableSample));

    field.put("value", tokens);
    field.put("termVector", "WITH_OFFSETS");
    fieldables.add(field);
    field = createField(readerSchema);
    field.put("value", ByteBuffer.wrap(serializableSample));
    field.put("termVector", "WITH_OFFSETS");
    fieldables.add(field);

    GenericRecord doc = new GenericData.Record(documentSchema);
    doc.put("boost", 2.3f);
    doc.put("fieldables", fieldables);

    GenericRecord add = new GenericData.Record(addSchema);
    add.put("class", classReferences.indexOf(AvroTest.class.getName()));
    GenericRecord id = new GenericData.Record(idSchema);
    id.put("value", ByteBuffer.wrap(serializableSample));
    add.put("id", id);
    add.put("document", doc);
    Map<String, String> analyzers = new HashMap<String, String>();
    analyzers.put("name", "ngram");
    analyzers.put("description", "porter");
    add.put("fieldToAnalyzerMap", analyzers);

    GenericRecord delete = new GenericData.Record(deleteSchema);
    delete.put("class", classReferences.indexOf(AvroTest.class.getName()));
    id = new GenericData.Record(idSchema);
    id.put("value", new Long(30));
    delete.put("id", id);

    GenericRecord purgeAll = new GenericData.Record(purgeAllSchema);
    purgeAll.put("class", classReferences.indexOf(AvroTest.class.getName()));
    GenericRecord optimizeAll = new GenericData.Record(optimizeAllSchema);

    GenericRecord flush = new GenericData.Record(flushSchema);

    List<GenericRecord> operations = new ArrayList<GenericRecord>(1);
    operations.add(purgeAll);
    operations.add(optimizeAll);
    operations.add(flush);
    operations.add(delete);
    operations.add(add);

    GenericRecord message = new GenericData.Record(messageSchema);
    message.put("classReferences", classReferences);
    message.put("operations", operations);

    writer.write(message, encoder);
    encoder.flush();

    ByteArrayInputStream inputStream = new ByteArrayInputStream(out.toByteArray());
    Decoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
    GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(messageSchema);
    while (true) {
      try {
        GenericRecord result = reader.read(null, decoder);
        System.out.println(result);

        assertThat(result).isNotNull();
        // operations
        assertThat(result.get("operations")).isNotNull().isInstanceOf(List.class);
        List<?> ops = (List<?>) result.get("operations");
        assertThat(ops).hasSize(5);

        // Flush
        assertThat(ops.get(2)).isInstanceOf(GenericRecord.class);
        GenericRecord flushOp = (GenericRecord) ops.get(2);
        assertThat(flushOp.getSchema().getName()).isEqualTo("Flush");

        // Delete
        assertThat(ops.get(3)).isInstanceOf(GenericRecord.class);
        GenericRecord deleteOp = (GenericRecord) ops.get(3);
        assertThat(deleteOp.getSchema().getName()).isEqualTo("Delete");
        Object actual = ((GenericRecord) deleteOp.get("id")).get("value");
        assertThat(actual).isInstanceOf(Long.class);
        assertThat(actual).isEqualTo(Long.valueOf(30));

        // Add
        assertThat(ops.get(4)).isInstanceOf(GenericRecord.class);
        GenericRecord addOp = (GenericRecord) ops.get(4);
        assertThat(addOp.getSchema().getName()).isEqualTo("Add");
        actual = ((GenericRecord) addOp.get("id")).get("value");
        assertThat(actual).isInstanceOf(ByteBuffer.class);
        ByteBuffer bb = (ByteBuffer) actual;
        assertThat(bb.hasArray()).isTrue();
        byte[] copy = new byte[bb.remaining()];
        bb.get(copy);
        assertThat(serializableSample).isEqualTo(copy);

        // fieldToAnalyzerMap
        assertThat(addOp.get("fieldToAnalyzerMap")).isInstanceOf(Map.class);
        assertThat((Map) addOp.get("fieldToAnalyzerMap")).hasSize(2);

        // document
        assertThat(addOp.get("document")).isNotNull();
        GenericRecord document = (GenericRecord) addOp.get("document");
        assertThat(document.get("boost")).isEqualTo(2.3f);

        // numeric fields
        assertThat(document.get("fieldables")).isNotNull().isInstanceOf(List.class);
        List<?> fields = (List<?>) document.get("fieldables");

        assertThat(fields).hasSize(9); // custom + 4 numerics + 4 fields

        field = (GenericRecord) fields.get(0);
        assertThat(field.getSchema().getName()).isEqualTo("CustomFieldable");
        field = (GenericRecord) fields.get(1);
        assertThat(field.getSchema().getName()).isEqualTo("NumericIntField");
        assertThat(field.get("value")).isEqualTo(3);
        assertNumericField(field);
        field = (GenericRecord) fields.get(2);
        assertThat(field.getSchema().getName()).isEqualTo("NumericLongField");
        assertThat(field.get("value")).isEqualTo(3l);
        assertNumericField(field);
        field = (GenericRecord) fields.get(3);
        assertThat(field.getSchema().getName()).isEqualTo("NumericFloatField");
        assertThat(field.get("value")).isEqualTo(2.3f);
        assertNumericField(field);
        field = (GenericRecord) fields.get(4);
        assertThat(field.getSchema().getName()).isEqualTo("NumericDoubleField");
        assertThat(field.get("value")).isEqualTo(2.3d);
        assertNumericField(field);

        // fields
        field = (GenericRecord) fields.get(5);
        assertThat(field.getSchema().getName()).isEqualTo("BinaryField");
        assertThat(field.get("value")).isInstanceOf(ByteBuffer.class);
        assertField(field);

        field = (GenericRecord) fields.get(6);
        assertThat(field.getSchema().getName()).isEqualTo("StringField");
        assertThat(field.get("value")).isInstanceOf(Utf8.class);
        assertTermVector(field);
        assertIndexAndStore(field);
        assertField(field);

        field = (GenericRecord) fields.get(7);
        assertThat(field.getSchema().getName()).isEqualTo("TokenStreamField");
        assertThat(field.get("value")).isInstanceOf(List.class);
        List<List<Object>> l1 = (List<List<Object>>) field.get("value");
        assertThat(l1.get(0)).as("Wrong attribute impl list").hasSize(2);
        Object object = l1.get(0).get(0);
        assertThat(object).isNotNull();
        assertTermVector(field);
        assertField(field);

        field = (GenericRecord) fields.get(8);
        assertThat(field.getSchema().getName()).isEqualTo("ReaderField");
        assertThat(field.get("value")).isInstanceOf(ByteBuffer.class);
        assertTermVector(field);
        assertField(field);
      } catch (EOFException eof) {
        break;
      } catch (Exception ex) {
        ex.printStackTrace();
        throw ex;
      }
    }
  }
 /** Initialize this sensor. */
 @Override
 public final void init() {
   schema = Schema.parse(getScheme());
   uri = EntityUriBuilder.nativeUri(schema.getNamespace(), schema.getName());
   LOG.debug("Sensor storing to URI: {}", uri);
 }
 /** check whether a schema is a space holder (using field name) */
 public static boolean isUDPartialRecordSchema(Schema s) {
   return s.getName().equals(NONAME);
 }