Пример #1
0
 private void assertField(GenericRecord field) {
   assertThat(field.get("name")).isInstanceOf(Utf8.class);
   assertThat(field.get("name").toString()).isEqualTo(field.getSchema().getName());
   assertThat(field.get("boost")).isEqualTo(2.3f);
   assertThat(field.get("omitNorms")).isEqualTo(true);
   assertThat(field.get("omitTermFreqAndPositions")).isEqualTo(true);
 }
Пример #2
0
 private void buildAttribute(Object element, LuceneWorksBuilder hydrator) {
   if (element instanceof GenericRecord) {
     GenericRecord record = (GenericRecord) element;
     String name = record.getSchema().getName();
     if ("TokenTrackingAttribute".equals(name)) {
       @SuppressWarnings("unchecked")
       List<Integer> positionList = (List<Integer>) record.get("positions");
       hydrator.addTokenTrackingAttribute(positionList);
     } else if ("CharTermAttribute".equals(name)) {
       hydrator.addCharTermAttribute((CharSequence) record.get("sequence"));
     } else if ("PayloadAttribute".equals(name)) {
       hydrator.addPayloadAttribute(asByteArray(record, "payload"));
     } else if ("KeywordAttribute".equals(name)) {
       hydrator.addKeywordAttribute(asBoolean(record, "isKeyword"));
     } else if ("PositionIncrementAttribute".equals(name)) {
       hydrator.addPositionIncrementAttribute(asInt(record, "positionIncrement"));
     } else if ("FlagsAttribute".equals(name)) {
       hydrator.addFlagsAttribute(asInt(record, "flags"));
     } else if ("TypeAttribute".equals(name)) {
       hydrator.addTypeAttribute(asString(record, "type"));
     } else if ("OffsetAttribute".equals(name)) {
       hydrator.addOffsetAttribute(asInt(record, "startOffset"), asInt(record, "endOffset"));
     } else {
       throw log.unknownAttributeSerializedRepresentation(name);
     }
   } else if (element instanceof ByteBuffer) {
     hydrator.addSerializedAttribute(asByteArray((ByteBuffer) element));
   } else {
     throw log.unknownAttributeSerializedRepresentation(element.getClass().getName());
   }
 }
 @Override
 protected void processEvent(GenericRecord record, EventAggregator eventAggregator) {
   System.out.println(schemaName + "-Stream: " + record.toString());
   long userId = (long) record.get("userId");
   long time = (long) record.get("time");
   String contactHash = record.get("contactHash").toString();
   int msgLength = (int) record.get("msgLength");
   eventAggregator.processSmsReceived(userId, time, contactHash, msgLength);
 }
Пример #4
0
 private void assertEqualsWithGeneric(List<Bird> expected, List<GenericRecord> actual) {
   assertEquals(expected.size(), actual.size());
   for (int i = 0; i < expected.size(); i++) {
     Bird fixed = expected.get(i);
     GenericRecord generic = actual.get(i);
     assertEquals(fixed.number, generic.get("number"));
     assertEquals(fixed.quality, generic.get("quality").toString()); // From Avro util.Utf8
     assertEquals(fixed.quantity, generic.get("quantity"));
     assertEquals(fixed.species, generic.get("species").toString());
   }
 }
Пример #5
0
 private Statement decodeStatement(final GenericRecord record) {
   final Resource subj = decodeIdentifier((GenericRecord) record.get(0));
   final URI pred = (URI) decodeIdentifier((GenericRecord) record.get(1));
   final Value obj = decodeValue(record.get(2));
   final Resource ctx = decodeIdentifier((GenericRecord) record.get(3));
   if (ctx == null) {
     return this.factory.createStatement(subj, pred, obj);
   } else {
     return this.factory.createStatement(subj, pred, obj, ctx);
   }
 }
Пример #6
0
 private void processId(GenericRecord operation, LuceneWorksBuilder hydrator) {
   GenericRecord id = (GenericRecord) operation.get("id");
   Object value = id.get("value");
   if (value instanceof ByteBuffer) {
     hydrator.addIdAsJavaSerialized(asByteArray((ByteBuffer) value));
   } else if (value instanceof Utf8) {
     hydrator.addId(value.toString());
   } else {
     // the rest are serialized objects
     hydrator.addId((Serializable) value);
   }
 }
Пример #7
0
  /**
   * Get a map of field names to default values for an Avro schema.
   *
   * @param avroRecordSchema The schema to get the map of field names to values.
   * @return The map.
   */
  public static Map<String, Object> getDefaultValueMap(Schema avroRecordSchema) {
    List<Field> defaultFields = new ArrayList<Field>();
    for (Field f : avroRecordSchema.getFields()) {
      if (f.defaultValue() != null) {
        // Need to create a new Field here or we will get
        // org.apache.avro.AvroRuntimeException: Field already used:
        // schemaVersion
        defaultFields.add(new Field(f.name(), f.schema(), f.doc(), f.defaultValue(), f.order()));
      }
    }

    Schema defaultSchema = Schema.createRecord(defaultFields);
    Schema emptyRecordSchema = Schema.createRecord(new ArrayList<Field>());
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(emptyRecordSchema);
    DatumReader<GenericRecord> reader =
        new GenericDatumReader<GenericRecord>(emptyRecordSchema, defaultSchema);

    GenericRecord emptyRecord = new GenericData.Record(emptyRecordSchema);
    GenericRecord defaultRecord =
        AvroUtils.readAvroEntity(AvroUtils.writeAvroEntity(emptyRecord, writer), reader);

    Map<String, Object> defaultValueMap = new HashMap<String, Object>();
    for (Field f : defaultFields) {
      defaultValueMap.put(f.name(), defaultRecord.get(f.name()));
    }
    return defaultValueMap;
  }
 private void processRecordField(
     CommonRecord record, GenericRecord deltaRecord, String fieldName) {
   CommonRecord nextRecord = null;
   CommonValue nextValue = record.getField(fieldName);
   if (nextValue != null
       && nextValue.isRecord()
       && nextValue
           .getRecord()
           .getSchema()
           .getFullName()
           .equals(deltaRecord.getSchema().getFullName())) {
     nextRecord = nextValue.getRecord();
     GenericFixed uuidFixed = (GenericFixed) deltaRecord.get(UUID);
     if (uuidFixed != null) {
       UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed);
       // Checking if the uuid was changed
       if (!uuid.equals(nextRecord.getUuid())) {
         records.remove(nextRecord.getUuid());
         records.put(uuid, nextRecord);
         nextRecord.setUuid(uuid);
       }
     }
   } else {
     nextRecord = createCommonRecord(deltaRecord);
     record.setField(fieldName, commonFactory.createCommonValue(nextRecord));
   }
   updateRecord(nextRecord, deltaRecord);
 }
  @Test
  public void testMapWithUtf8Key() throws Exception {
    Schema schema = new Schema.Parser().parse(Resources.getResource("map.avsc").openStream());

    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
    tmp.deleteOnExit();
    tmp.delete();
    Path file = new Path(tmp.getPath());

    AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema);

    // Write a record with a map with Utf8 keys.
    GenericData.Record record =
        new GenericRecordBuilder(schema)
            .set("mymap", ImmutableMap.of(new Utf8("a"), 1, new Utf8("b"), 2))
            .build();
    writer.write(record);
    writer.close();

    AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file);
    GenericRecord nextRecord = reader.read();

    assertNotNull(nextRecord);
    assertEquals(ImmutableMap.of(str("a"), 1, str("b"), 2), nextRecord.get("mymap"));
  }
  @Test
  public void testMapWithNulls() throws Exception {
    Schema schema =
        new Schema.Parser().parse(Resources.getResource("map_with_nulls.avsc").openStream());

    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
    tmp.deleteOnExit();
    tmp.delete();
    Path file = new Path(tmp.getPath());

    AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema);

    // Write a record with a null value
    Map<CharSequence, Integer> map = new HashMap<CharSequence, Integer>();
    map.put(str("thirty-four"), 34);
    map.put(str("eleventy-one"), null);
    map.put(str("one-hundred"), 100);

    GenericData.Record record = new GenericRecordBuilder(schema).set("mymap", map).build();
    writer.write(record);
    writer.close();

    AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file);
    GenericRecord nextRecord = reader.read();

    assertNotNull(nextRecord);
    assertEquals(map, nextRecord.get("mymap"));
  }
Пример #11
0
 @SuppressWarnings("unchecked")
 private Record decodeRecord(
     final GenericRecord generic, @Nullable final Set<URI> propertiesToDecode) {
   final Record record = Record.create();
   final GenericRecord encodedID = (GenericRecord) generic.get(0);
   if (encodedID != null) {
     record.setID((URI) decodeIdentifier(encodedID));
   }
   for (final GenericRecord prop : (Iterable<GenericRecord>) generic.get(1)) {
     final URI property = (URI) decodeIdentifier((GenericRecord) prop.get(0));
     final List<Object> values = decodeNodes(prop.get(1));
     if (propertiesToDecode == null || propertiesToDecode.contains(property)) {
       record.set(property, values);
     }
   }
   return record;
 }
  public void testWrite() throws IOException {

    URL url = this.getClass().getClassLoader().getResource("input/Company.avsc");
    assertNotNull(url);
    Schema schema = new Schema.Parser().parse(new File(url.getFile()));
    assertNotNull(schema);

    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
    // Another way of loading a file
    File file = new File("src/test/resources/input/companies.avro");
    DataFileReader<GenericRecord> dataFileReader =
        new DataFileReader<GenericRecord>(file, datumReader);

    File fileOut = new File("target/companies2.avro");
    Schema schemaOut =
        new Schema.Parser().parse(new File("src/test/resources/input/Company2.avsc"));
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schemaOut);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);

    GenericRecord company = null;
    int count = 0;
    while (dataFileReader.hasNext()) {
      company = dataFileReader.next(company);
      if (company.get("name").toString().equals("aol")) {
        dataFileWriter.create(schemaOut, fileOut);

        GenericRecord recordOut = new GenericData.Record(schemaOut);
        recordOut.put("id", company.get("id"));
        recordOut.put("name", company.get("name"));
        assertTrue(recordOut.getSchema().getField("address") != null);
        assertTrue(recordOut.getSchema().getField("employeeCount") == null);

        // address is of complex type
        GenericRecord address =
            new GenericData.Record((GenericData.Record) company.get("address"), true);
        recordOut.put("address", address);

        dataFileWriter.append(recordOut);

        count++;
      }
    }
    assertTrue(count > 0);

    dataFileWriter.close();
  }
Пример #13
0
 private Resource decodeIdentifier(final GenericRecord record) {
   final Schema schema = record.getSchema();
   if (schema.equals(Schemas.COMPRESSED_IDENTIFIER)) {
     try {
       return this.dictionary.objectFor((Integer) record.get(0));
     } catch (final IOException ex) {
       throw new IllegalStateException("Cannot access dictionary: " + ex.getMessage(), ex);
     }
   } else if (schema.equals(Schemas.PLAIN_IDENTIFIER)) {
     final String string = record.get(0).toString();
     if (string.startsWith("_:")) {
       return this.factory.createBNode(string.substring(2));
     } else {
       return this.factory.createURI(string);
     }
   }
   throw new IllegalArgumentException("Unsupported encoded identifier: " + record);
 }
 @Override
 public void map(GenericRecord in, AvroCollector<Pair<Utf8, Long>> collector, Reporter reporter)
     throws IOException {
   Pair<Utf8, Long> p = new Pair<Utf8, Long>(PAIR_SCHEMA);
   Utf8 shape = (Utf8) in.get("shape");
   if (shape != null) {
     p.set(shape, 1L);
     collector.collect(p);
   }
 }
Пример #15
0
 private void buildAttributes(GenericRecord record, String field, LuceneWorksBuilder hydrator) {
   @SuppressWarnings("unchecked")
   List<List<?>> tokens = (List<List<?>>) record.get(field);
   for (List<?> token : tokens) {
     for (Object attribute : token) {
       buildAttribute(attribute, hydrator);
     }
     hydrator.addToken();
   }
 }
  public static void main(String[] args) {
    SparkConf conf = new SparkConf().setAppName("kafka-sandbox").setMaster("local[*]");
    JavaSparkContext sc = new JavaSparkContext(conf);
    JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));

    Set<String> topics = Collections.singleton("mytopic");
    Map<String, String> kafkaParams = new HashMap<>();
    kafkaParams.put("metadata.broker.list", "sandbox.hortonworks.com:6667");

    JavaPairInputDStream<String, byte[]> directKafkaStream =
        KafkaUtils.createDirectStream(
            ssc,
            String.class,
            byte[].class,
            StringDecoder.class,
            DefaultDecoder.class,
            kafkaParams,
            topics);

    directKafkaStream.foreachRDD(
        rdd -> {
          rdd.foreach(
              avroRecord -> {
                Schema.Parser parser = new Schema.Parser();
                Schema schema = parser.parse(AvroVulabProducer.USER_SCHEMA);
                Injection<GenericRecord, byte[]> recordInjection =
                    GenericAvroCodecs.toBinary(schema);
                GenericRecord record = recordInjection.invert(avroRecord._2).get();

                System.out.println(
                    "str1= "
                        + record.get("str1")
                        + ", str2= "
                        + record.get("str2")
                        + ", int1="
                        + record.get("int1"));
              });
        });

    ssc.start();
    ssc.awaitTermination();
  }
 private CommonRecord createCommonRecord(GenericRecord avroRecord) {
   GenericFixed uuidFixed = (GenericFixed) avroRecord.get(UUID);
   if (uuidFixed != null) {
     UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed);
     CommonRecord newRecord = commonFactory.createCommonRecord(uuid, avroRecord.getSchema());
     records.put(uuid, newRecord);
     return newRecord;
   } else {
     return commonFactory.createCommonRecord(avroRecord.getSchema());
   }
 }
Пример #18
0
 private Map<String, String> getAnalyzers(GenericRecord operation) {
   Map<?, ?> analyzersWithUtf8 = (Map<?, ?>) operation.get("fieldToAnalyzerMap");
   if (analyzersWithUtf8 == null) {
     return null;
   }
   Map<String, String> analyzers = new HashMap<>(analyzersWithUtf8.size());
   for (Map.Entry<?, ?> entry : analyzersWithUtf8.entrySet()) {
     analyzers.put(entry.getKey().toString(), entry.getValue().toString());
   }
   return analyzers;
 }
Пример #19
0
  public void serializeGeneric() throws IOException {
    // Create a datum to serialize.
    Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("/MyPair.avsc"));
    GenericRecord datum = new GenericData.Record(schema);
    datum.put("left", new Utf8("dog"));
    datum.put("right", new Utf8("cat"));

    // Serialize it.
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
    Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
    writer.write(datum, encoder);
    encoder.flush();
    out.close();
    System.out.println("Serialization: " + out);

    // Deserialize it.
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
    BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(out.toByteArray(), null);
    GenericRecord result = reader.read(null, decoder);
    System.out.printf("Left: %s, Right: %s\n", result.get("left"), result.get("right"));
  }
Пример #20
0
 private void assertNumericField(GenericRecord field) {
   assertThat(field.get("name")).isInstanceOf(Utf8.class);
   assertThat(field.get("name").toString()).isEqualTo("int");
   assertThat(field.get("precisionStep")).isEqualTo(3);
   assertThat(field.get("boost")).isEqualTo(2.3f);
   assertThat(field.get("indexed")).isEqualTo(true);
   assertThat(field.get("omitNorms")).isEqualTo(true);
   assertThat(field.get("omitTermFreqAndPositions")).isEqualTo(true);
   assertThat((field.get("store"))).isInstanceOf(GenericData.EnumSymbol.class);
   assertThat((field.get("store")).toString()).isEqualTo("YES");
 }
Пример #21
0
  @Test
  public void testWrite() throws IOException {
    // Write all test records
    for (String record : TestConstants.JSON_RECORDS) {
      this.writer.write(convertRecord(record));
    }

    Assert.assertEquals(this.writer.recordsWritten(), 3);

    this.writer.close();
    this.writer.commit();

    File outputFile =
        new File(
            TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR + this.filePath,
            TestConstants.TEST_FILE_NAME
                + "."
                + TestConstants.TEST_WRITER_ID
                + "."
                + TestConstants.TEST_FILE_EXTENSION);
    DataFileReader<GenericRecord> reader =
        new DataFileReader<GenericRecord>(
            outputFile, new GenericDatumReader<GenericRecord>(this.schema));

    // Read the records back and assert they are identical to the ones written
    GenericRecord user1 = reader.next();
    // Strings are in UTF8, so we have to call toString() here and below
    Assert.assertEquals(user1.get("name").toString(), "Alyssa");
    Assert.assertEquals(user1.get("favorite_number"), 256);
    Assert.assertEquals(user1.get("favorite_color").toString(), "yellow");

    GenericRecord user2 = reader.next();
    Assert.assertEquals(user2.get("name").toString(), "Ben");
    Assert.assertEquals(user2.get("favorite_number"), 7);
    Assert.assertEquals(user2.get("favorite_color").toString(), "red");

    GenericRecord user3 = reader.next();
    Assert.assertEquals(user3.get("name").toString(), "Charlie");
    Assert.assertEquals(user3.get("favorite_number"), 68);
    Assert.assertEquals(user3.get("favorite_color").toString(), "blue");

    reader.close();
  }
Пример #22
0
    /**
     * Describe <code>reduce</code> method here.
     *
     * @param object an <code>Object</code> value
     * @param iterable an <code>Iterable</code> value
     * @param context a <code>Reducer.Context</code> value
     * @exception IOException if an error occurs
     * @exception InterruptedException if an error occurs
     */
    @Override
    public final void reduce(Text key, Iterable<AvroValue<GenericRecord>> values, Context context)
        throws IOException, InterruptedException {
      GenericRecord output = new GenericData.Record(outputSchema);

      for (AvroValue<GenericRecord> value : values) {
        GenericRecord datum = value.datum();
        for (Schema.Field field : datum.getSchema().getFields()) {
          String fieldName = field.name();
          Object fieldValue = datum.get(fieldName);
          if (fieldValue != null) {
            output.put(fieldName, fieldValue);
          }
        }
      }

      CharSequence psam = (CharSequence) output.get(PSAM);
      CharSequence longitude = (CharSequence) output.get(LONGITUDE);
      CharSequence latitude = (CharSequence) output.get(LATITUDE);
      if (psam != null && longitude != null && latitude != null) {
        context.write(new AvroKey<GenericRecord>(output), NullWritable.get());
      }
    }
 public void validateAvroFile(File file) throws IOException {
   // read the events back using GenericRecord
   DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
   DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(file, reader);
   GenericRecord record = new GenericData.Record(fileReader.getSchema());
   int numEvents = 0;
   while (fileReader.hasNext()) {
     fileReader.next(record);
     String bodyStr = record.get("message").toString();
     System.out.println(bodyStr);
     numEvents++;
   }
   fileReader.close();
   Assert.assertEquals("Should have found a total of 3 events", 3, numEvents);
 }
 @Override
 public synchronized void onDeltaReceived(int index, GenericRecord data, boolean fullResync) {
   GenericFixed uuidFixed = (GenericFixed) data.get(UUID);
   UUID uuid = AvroGenericUtils.createUuidFromFixed(uuidFixed);
   if (LOG.isDebugEnabled()) {
     LOG.debug("Processing delta with uuid {}", uuidFixed.toString());
   }
   CommonRecord currentRecord = null;
   if (!fullResync && records.containsKey(uuid)) {
     currentRecord = records.get(uuid);
   } else {
     records.clear();
     currentRecord = createCommonRecord(data);
     rootRecord = currentRecord;
   }
   updateRecord(currentRecord, data);
 }
Пример #25
0
  @BeforeClass
  public static void before() throws Exception {
    final String filePath =
        TestUtils.getFileFromResourceUrl(
            DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
    if (INDEX_DIR.exists()) {
      FileUtils.deleteQuietly(INDEX_DIR);
    }

    final SegmentGeneratorConfig config =
        SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(
            new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test");

    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    driver.init(config);
    driver.build();

    final Schema schema = AvroUtils.extractSchemaFromAvro(new File(filePath));

    final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
    final org.apache.avro.Schema avroSchema = avroReader.getSchema();
    final String[] columns = new String[avroSchema.getFields().size()];
    int i = 0;
    for (final Field f : avroSchema.getFields()) {
      columns[i] = f.name();
      i++;
    }

    uniqueEntries = new HashMap<String, Set<Object>>();
    for (final String column : columns) {
      uniqueEntries.put(column, new HashSet<Object>());
    }

    while (avroReader.hasNext()) {
      final GenericRecord rec = avroReader.next();
      for (final String column : columns) {
        Object val = rec.get(column);
        if (val instanceof Utf8) {
          val = ((Utf8) val).toString();
        }
        uniqueEntries
            .get(column)
            .add(getAppropriateType(schema.getFieldSpecFor(column).getDataType(), val));
      }
    }
  }
Пример #26
0
 private Literal decodeLiteral(final Object generic) {
   if (generic instanceof GenericRecord) {
     final GenericRecord record = (GenericRecord) generic;
     final Schema schema = record.getSchema();
     if (schema.equals(Schemas.STRING_LANG)) {
       final String label = record.get(0).toString(); // Utf8 class used
       final Object language = record.get(1);
       return this.factory.createLiteral(label, language.toString());
     } else if (schema.equals(Schemas.SHORT)) {
       return this.factory.createLiteral(((Integer) record.get(0)).shortValue());
     } else if (schema.equals(Schemas.BYTE)) {
       return this.factory.createLiteral(((Integer) record.get(0)).byteValue());
     } else if (schema.equals(Schemas.BIGINTEGER)) {
       return this.factory.createLiteral(record.get(0).toString(), XMLSchema.INTEGER);
     } else if (schema.equals(Schemas.BIGDECIMAL)) {
       return this.factory.createLiteral(record.get(0).toString(), XMLSchema.DECIMAL);
     } else if (schema.equals(Schemas.CALENDAR)) {
       final int tz = (Integer) record.get(0);
       final GregorianCalendar calendar = new GregorianCalendar();
       calendar.setTimeInMillis((Long) record.get(1));
       calendar.setTimeZone(
           TimeZone.getTimeZone(
               String.format(
                   "GMT%s%02d:%02d", tz >= 0 ? "+" : "-", Math.abs(tz) / 60, Math.abs(tz) % 60)));
       return this.factory.createLiteral(this.datatypeFactory.newXMLGregorianCalendar(calendar));
     }
   } else if (generic instanceof CharSequence) {
     return this.factory.createLiteral(generic.toString()); // Utf8 class used
   } else if (generic instanceof Boolean) {
     return this.factory.createLiteral((Boolean) generic);
   } else if (generic instanceof Long) {
     return this.factory.createLiteral((Long) generic);
   } else if (generic instanceof Integer) {
     return this.factory.createLiteral((Integer) generic);
   } else if (generic instanceof Double) {
     return this.factory.createLiteral((Double) generic);
   } else if (generic instanceof Float) {
     return this.factory.createLiteral((Float) generic);
   }
   Preconditions.checkNotNull(generic);
   throw new IllegalArgumentException("Unsupported generic data: " + generic);
 }
  @Test
  public void testEmptyArray() throws Exception {
    Schema schema = new Schema.Parser().parse(Resources.getResource("array.avsc").openStream());

    File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp");
    tmp.deleteOnExit();
    tmp.delete();
    Path file = new Path(tmp.getPath());

    AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema);

    // Write a record with an empty array.
    List<Integer> emptyArray = new ArrayList<Integer>();
    GenericData.Record record = new GenericRecordBuilder(schema).set("myarray", emptyArray).build();
    writer.write(record);
    writer.close();

    AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(testConf, file);
    GenericRecord nextRecord = reader.read();

    assertNotNull(nextRecord);
    assertEquals(emptyArray, nextRecord.get("myarray"));
  }
 private void updateRecord(CommonRecord record, GenericRecord delta) {
   List<Field> deltaFields = delta.getSchema().getFields();
   for (Field deltaField : deltaFields) {
     String fieldName = deltaField.name();
     Object rawDeltaField = delta.get(fieldName);
     if (LOG.isDebugEnabled()) {
       LOG.debug(
           "Processing field \"{}\", current value: {}",
           fieldName,
           record.getField(fieldName) != null ? record.getField(fieldName).toString() : null);
     }
     if (AvroGenericUtils.isRecord(rawDeltaField)) {
       processRecordField(record, (GenericRecord) rawDeltaField, fieldName);
     } else if (AvroGenericUtils.isArray(rawDeltaField)) {
       processArrayField(record, (GenericArray) rawDeltaField, fieldName);
     } else if (AvroGenericUtils.isEnum(rawDeltaField)) {
       processEnumField(record, (GenericEnumSymbol) rawDeltaField, fieldName);
     } else if (AvroGenericUtils.isFixed(rawDeltaField)) {
       processFixedField(record, (GenericFixed) rawDeltaField, fieldName);
     } else {
       record.setField(fieldName, commonFactory.createCommonValue(rawDeltaField));
     }
   }
 }
Пример #29
0
    private Object translate(Object value, DataSchema dataSchema, Schema avroSchema) {
      AvroOverride avroOverride = getAvroOverride(dataSchema);
      if (avroOverride != null) {
        return avroOverride
            .getCustomDataTranslator()
            .avroGenericToData(this, value, avroSchema, dataSchema);
      }

      DataSchema dereferencedDataSchema = dataSchema.getDereferencedDataSchema();
      DataSchema.Type type = dereferencedDataSchema.getType();
      Object result;
      switch (type) {
        case NULL:
          if (value != null) {
            appendMessage("value must be null for null schema");
            result = BAD_RESULT;
            break;
          }
          result = Data.NULL;
          break;
        case BOOLEAN:
          result = ((Boolean) value).booleanValue();
          break;
        case INT:
          result = ((Number) value).intValue();
          break;
        case LONG:
          result = ((Number) value).longValue();
          break;
        case FLOAT:
          result = ((Number) value).floatValue();
          break;
        case DOUBLE:
          result = ((Number) value).doubleValue();
          break;
        case STRING:
          result = value.toString();
          break;
        case BYTES:
          ByteBuffer byteBuffer = (ByteBuffer) value;
          ByteString byteString = ByteString.copy(byteBuffer);
          byteBuffer.rewind();
          result = byteString;
          break;
        case ENUM:
          String enumValue = value.toString();
          EnumDataSchema enumDataSchema = (EnumDataSchema) dereferencedDataSchema;
          if (enumDataSchema.getSymbols().contains(enumValue) == false) {
            appendMessage(
                "enum value %1$s not one of %2$s", enumValue, enumDataSchema.getSymbols());
            result = BAD_RESULT;
            break;
          }
          result = enumValue;
          break;
        case FIXED:
          GenericFixed fixed = (GenericFixed) value;
          byte[] fixedBytes = fixed.bytes();
          FixedDataSchema fixedDataSchema = (FixedDataSchema) dereferencedDataSchema;
          if (fixedDataSchema.getSize() != fixedBytes.length) {
            appendMessage(
                "GenericFixed size %1$d != FixedDataSchema size %2$d",
                fixedBytes.length, fixedDataSchema.getSize());
            result = BAD_RESULT;
            break;
          }
          byteString = ByteString.copy(fixedBytes);
          result = byteString;
          break;
        case MAP:
          @SuppressWarnings("unchecked")
          Map<?, Object> map = (Map<?, Object>) value;
          DataSchema valueDataSchema = ((MapDataSchema) dereferencedDataSchema).getValues();
          Schema valueAvroSchema = avroSchema.getValueType();
          DataMap dataMap = new DataMap(map.size());
          for (Map.Entry<?, Object> entry : map.entrySet()) {
            String key = entry.getKey().toString();
            _path.addLast(key);
            Object entryValue = translate(entry.getValue(), valueDataSchema, valueAvroSchema);
            _path.removeLast();
            dataMap.put(key, entryValue);
          }
          result = dataMap;
          break;
        case ARRAY:
          GenericArray<?> list = (GenericArray<?>) value;
          DataSchema elementDataSchema = ((ArrayDataSchema) dereferencedDataSchema).getItems();
          Schema elementAvroSchema = avroSchema.getElementType();
          DataList dataList = new DataList(list.size());
          for (int i = 0; i < list.size(); i++) {
            _path.addLast(i);
            Object entryValue = translate(list.get(i), elementDataSchema, elementAvroSchema);
            _path.removeLast();
            dataList.add(entryValue);
          }
          result = dataList;
          break;
        case RECORD:
          GenericRecord record = (GenericRecord) value;
          RecordDataSchema recordDataSchema = (RecordDataSchema) dereferencedDataSchema;
          dataMap = new DataMap(avroSchema.getFields().size());
          for (RecordDataSchema.Field field : recordDataSchema.getFields()) {
            String fieldName = field.getName();
            Object fieldValue = record.get(fieldName);
            // fieldValue could be null if the Avro schema does not contain the named field or
            // the field is present with a null value. In either case we do not add a value
            // to the translated DataMap. We do not consider optional/required/default here
            // either (i.e. it is not an error if a required field is missing); the user can
            // later call ValidateDataAgainstSchema with various
            // settings for RequiredMode to obtain the desired behaviour.
            if (fieldValue == null) {
              continue;
            }
            boolean isOptional = field.getOptional();
            DataSchema fieldDataSchema = field.getType();
            Schema fieldAvroSchema = avroSchema.getField(fieldName).schema();
            if (isOptional && (fieldDataSchema.getDereferencedType() != DataSchema.Type.UNION)) {
              // Avro schema should be union with 2 types: null and the field's type.
              Map.Entry<String, Schema> fieldAvroEntry =
                  findUnionMember(fieldDataSchema, fieldAvroSchema);
              if (fieldAvroEntry == null) {
                continue;
              }
              fieldAvroSchema = fieldAvroEntry.getValue();
            }
            _path.addLast(fieldName);
            dataMap.put(fieldName, translate(fieldValue, fieldDataSchema, fieldAvroSchema));
            _path.removeLast();
          }
          result = dataMap;
          break;
        case UNION:
          UnionDataSchema unionDataSchema = (UnionDataSchema) dereferencedDataSchema;
          Map.Entry<DataSchema, Schema> memberSchemas =
              findUnionMemberSchema(value, unionDataSchema, avroSchema);
          if (memberSchemas == null) {
            result = BAD_RESULT;
            break;
          }
          if (value == null) {
            // schema must be "null" schema
            result = Data.NULL;
          } else {
            DataSchema memberDataSchema = memberSchemas.getKey();
            Schema memberAvroSchema = memberSchemas.getValue();
            String key = memberDataSchema.getUnionMemberKey();
            dataMap = new DataMap(1);
            _path.addLast(key);
            dataMap.put(key, translate(value, memberDataSchema, memberAvroSchema));
            _path.removeLast();
            result = dataMap;
          }
          break;
        default:
          appendMessage("schema type unknown %1$s", dereferencedDataSchema.getType());
          result = BAD_RESULT;
          break;
      }
      return result;
    }
Пример #30
0
 private byte[] asByteArray(GenericRecord operation, String field) {
   ByteBuffer buffer = (ByteBuffer) operation.get(field);
   return asByteArray(buffer);
 }