Exemplo n.º 1
0
  @BeforeClass
  public static void before() throws Exception {
    final String filePath =
        TestUtils.getFileFromResourceUrl(
            DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
    if (INDEX_DIR.exists()) {
      FileUtils.deleteQuietly(INDEX_DIR);
    }

    final SegmentGeneratorConfig config =
        SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(
            new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test");

    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    driver.init(config);
    driver.build();

    final Schema schema = AvroUtils.extractSchemaFromAvro(new File(filePath));

    final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
    final org.apache.avro.Schema avroSchema = avroReader.getSchema();
    final String[] columns = new String[avroSchema.getFields().size()];
    int i = 0;
    for (final Field f : avroSchema.getFields()) {
      columns[i] = f.name();
      i++;
    }

    uniqueEntries = new HashMap<String, Set<Object>>();
    for (final String column : columns) {
      uniqueEntries.put(column, new HashSet<Object>());
    }

    while (avroReader.hasNext()) {
      final GenericRecord rec = avroReader.next();
      for (final String column : columns) {
        Object val = rec.get(column);
        if (val instanceof Utf8) {
          val = ((Utf8) val).toString();
        }
        uniqueEntries
            .get(column)
            .add(getAppropriateType(schema.getFieldSpecFor(column).getDataType(), val));
      }
    }
  }
Exemplo n.º 2
0
 public void init() throws FileNotFoundException, IOException {
   dataStream =
       new DataFileStream<GenericRecord>(
           new FileInputStream(avroFile), new GenericDatumReader<GenericRecord>());
   schema = dataStream.getSchema();
 }