Exemple #1
0
  /**
   * Get a map of field names to default values for an Avro schema.
   *
   * @param avroRecordSchema The schema to get the map of field names to values.
   * @return The map.
   */
  public static Map<String, Object> getDefaultValueMap(Schema avroRecordSchema) {
    List<Field> defaultFields = new ArrayList<Field>();
    for (Field f : avroRecordSchema.getFields()) {
      if (f.defaultValue() != null) {
        // Need to create a new Field here or we will get
        // org.apache.avro.AvroRuntimeException: Field already used:
        // schemaVersion
        defaultFields.add(new Field(f.name(), f.schema(), f.doc(), f.defaultValue(), f.order()));
      }
    }

    Schema defaultSchema = Schema.createRecord(defaultFields);
    Schema emptyRecordSchema = Schema.createRecord(new ArrayList<Field>());
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(emptyRecordSchema);
    DatumReader<GenericRecord> reader =
        new GenericDatumReader<GenericRecord>(emptyRecordSchema, defaultSchema);

    GenericRecord emptyRecord = new GenericData.Record(emptyRecordSchema);
    GenericRecord defaultRecord =
        AvroUtils.readAvroEntity(AvroUtils.writeAvroEntity(emptyRecord, writer), reader);

    Map<String, Object> defaultValueMap = new HashMap<String, Object>();
    for (Field f : defaultFields) {
      defaultValueMap.put(f.name(), defaultRecord.get(f.name()));
    }
    return defaultValueMap;
  }
Exemple #2
0
 private static int hashCode(HashData data, Schema schema) throws IOException {
   Decoder decoder = data.decoder;
   switch (schema.getType()) {
     case RECORD:
       {
         int hashCode = 1;
         for (Field field : schema.getFields()) {
           if (field.order() == Field.Order.IGNORE) {
             GenericDatumReader.skip(field.schema(), decoder);
             continue;
           }
           hashCode = hashCode * 31 + hashCode(data, field.schema());
         }
         return hashCode;
       }
     case ENUM:
     case INT:
       return decoder.readInt();
     case FLOAT:
       return Float.floatToIntBits(decoder.readFloat());
     case LONG:
       {
         long l = decoder.readLong();
         return (int) (l ^ (l >>> 32));
       }
     case DOUBLE:
       {
         long l = Double.doubleToLongBits(decoder.readDouble());
         return (int) (l ^ (l >>> 32));
       }
     case ARRAY:
       {
         Schema elementType = schema.getElementType();
         int hashCode = 1;
         for (long l = decoder.readArrayStart(); l != 0; l = decoder.arrayNext())
           for (long i = 0; i < l; i++) hashCode = hashCode * 31 + hashCode(data, elementType);
         return hashCode;
       }
     case MAP:
       throw new AvroRuntimeException("Can't hashCode maps!");
     case UNION:
       return hashCode(data, schema.getTypes().get(decoder.readInt()));
     case FIXED:
       return hashBytes(1, data, schema.getFixedSize(), false);
     case STRING:
       return hashBytes(0, data, decoder.readInt(), false);
     case BYTES:
       return hashBytes(1, data, decoder.readInt(), true);
     case BOOLEAN:
       return decoder.readBoolean() ? 1231 : 1237;
     case NULL:
       return 0;
     default:
       throw new AvroRuntimeException("Unexpected schema to hashCode!");
   }
 }
Exemple #3
0
  /**
   * Returns true if the types of two avro schemas are equal. This ignores things like custom field
   * properties that the equals() implementation of Schema checks.
   *
   * @param schema1 The first schema to compare
   * @param schema2 The second schema to compare
   * @return True if the types are equal, otherwise false.
   */
  public static boolean avroSchemaTypesEqual(Schema schema1, Schema schema2) {
    if (schema1.getType() != schema2.getType()) {
      // if the types aren't equal, no need to go further. Return false
      return false;
    }

    if (schema1.getType() == Schema.Type.ENUM || schema1.getType() == Schema.Type.FIXED) {
      // Enum and Fixed types schemas should be equal using the Schema.equals
      // method.
      return schema1.equals(schema2);
    }
    if (schema1.getType() == Schema.Type.ARRAY) {
      // Avro element schemas should be equal, which is tested by recursively
      // calling this method.
      return avroSchemaTypesEqual(schema1.getElementType(), schema2.getElementType());
    } else if (schema1.getType() == Schema.Type.MAP) {
      // Map type values schemas should be equal, which is tested by recursively
      // calling this method.
      return avroSchemaTypesEqual(schema1.getValueType(), schema2.getValueType());
    } else if (schema1.getType() == Schema.Type.UNION) {
      // Compare Union fields in the same position by comparing their schemas
      // recursively calling this method.
      if (schema1.getTypes().size() != schema2.getTypes().size()) {
        return false;
      }
      for (int i = 0; i < schema1.getTypes().size(); i++) {
        if (!avroSchemaTypesEqual(schema1.getTypes().get(i), schema2.getTypes().get(i))) {
          return false;
        }
      }
      return true;
    } else if (schema1.getType() == Schema.Type.RECORD) {
      // Compare record fields that match in name by comparing their schemas
      // recursively calling this method.
      if (schema1.getFields().size() != schema2.getFields().size()) {
        return false;
      }
      for (Field field1 : schema1.getFields()) {
        Field field2 = schema2.getField(field1.name());
        if (field2 == null) {
          return false;
        }
        if (!avroSchemaTypesEqual(field1.schema(), field2.schema())) {
          return false;
        }
      }
      return true;
    } else {
      // All other types are primitive, so them matching in type is enough.
      return true;
    }
  }
  /**
   * Called by {@link #containsRecursiveRecord(Schema)} and it recursively checks whether the input
   * schema contains recursive records.
   */
  protected static boolean containsRecursiveRecord(Schema s, Set<String> definedRecordNames) {

    /* if it is a record, check itself and all fields*/
    if (s.getType().equals(Schema.Type.RECORD)) {
      String name = s.getName();
      if (definedRecordNames.contains(name)) return true;

      /* add its own name into defined record set*/
      definedRecordNames.add(s.getName());

      /* check all fields */
      List<Field> fields = s.getFields();
      for (Field field : fields) {
        Schema fs = field.schema();
        if (containsRecursiveRecord(fs, definedRecordNames)) return true;
      }

      /* remove its own name from the name set */
      definedRecordNames.remove(s.getName());

      return false;
    }

    /* if it is an array, check its element type */
    else if (s.getType().equals(Schema.Type.ARRAY)) {
      Schema fs = s.getElementType();
      return containsRecursiveRecord(fs, definedRecordNames);
    }

    /*if it is a map, check its value type */
    else if (s.getType().equals(Schema.Type.MAP)) {
      Schema vs = s.getValueType();
      return containsRecursiveRecord(vs, definedRecordNames);
    }

    /* if it is a union, check all possible types */
    else if (s.getType().equals(Schema.Type.UNION)) {
      List<Schema> types = s.getTypes();
      for (Schema type : types) {
        if (containsRecursiveRecord(type, definedRecordNames)) return true;
      }
      return false;
    }

    /* return false for other cases */
    else {
      return false;
    }
  }
Exemple #5
0
  @BeforeClass
  public static void before() throws Exception {
    final String filePath =
        TestUtils.getFileFromResourceUrl(
            DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
    if (INDEX_DIR.exists()) {
      FileUtils.deleteQuietly(INDEX_DIR);
    }

    final SegmentGeneratorConfig config =
        SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(
            new File(filePath), INDEX_DIR, "time_day", TimeUnit.DAYS, "test");

    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
    driver.init(config);
    driver.build();

    final Schema schema = AvroUtils.extractSchemaFromAvro(new File(filePath));

    final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
    final org.apache.avro.Schema avroSchema = avroReader.getSchema();
    final String[] columns = new String[avroSchema.getFields().size()];
    int i = 0;
    for (final Field f : avroSchema.getFields()) {
      columns[i] = f.name();
      i++;
    }

    uniqueEntries = new HashMap<String, Set<Object>>();
    for (final String column : columns) {
      uniqueEntries.put(column, new HashSet<Object>());
    }

    while (avroReader.hasNext()) {
      final GenericRecord rec = avroReader.next();
      for (final String column : columns) {
        Object val = rec.get(column);
        if (val instanceof Utf8) {
          val = ((Utf8) val).toString();
        }
        uniqueEntries
            .get(column)
            .add(getAppropriateType(schema.getFieldSpecFor(column).getDataType(), val));
      }
    }
  }
  private static boolean isSingleValueField(Field field) {
    org.apache.avro.Schema fieldSchema = field.schema();
    fieldSchema = extractSchemaFromUnionIfNeeded(fieldSchema);

    final Type type = fieldSchema.getType();
    if (type == Type.ARRAY) {
      return false;
    }
    return true;
  }
  @Override
  @SuppressWarnings("unchecked")
  protected void writeRecord(Schema schema, Object datum, Encoder out) throws IOException {

    if (persistent == null) {
      persistent = (T) datum;
    }

    if (!writeDirtyBits) {
      super.writeRecord(schema, datum, out);
      return;
    }

    // check if top level schema
    if (schema.equals(persistent.getSchema())) {
      // write readable fields and dirty fields info
      boolean[] dirtyFields = new boolean[schema.getFields().size()];
      boolean[] readableFields = new boolean[schema.getFields().size()];
      StateManager manager = persistent.getStateManager();

      int i = 0;
      for (@SuppressWarnings("unused") Field field : schema.getFields()) {
        dirtyFields[i] = manager.isDirty(persistent, i);
        readableFields[i] = manager.isReadable(persistent, i);
        i++;
      }

      IOUtils.writeBoolArray(out, dirtyFields);
      IOUtils.writeBoolArray(out, readableFields);

      for (Field field : schema.getFields()) {
        if (readableFields[field.pos()]) {
          write(field.schema(), getData().getField(datum, field.name(), field.pos()), out);
        }
      }

    } else {
      super.writeRecord(schema, datum, out);
    }
  }
  /** determine whether the input schema contains generic unions */
  public static boolean containsGenericUnion(Schema s) {

    /* if it is a record, check all fields*/
    if (s.getType().equals(Schema.Type.RECORD)) {
      List<Field> fields = s.getFields();
      for (Field field : fields) {
        Schema fs = field.schema();
        if (containsGenericUnion(fs)) return true;
      }
      return false;
    }

    /* if it is an array, check its element type */
    else if (s.getType().equals(Schema.Type.ARRAY)) {
      Schema fs = s.getElementType();
      return containsGenericUnion(fs);
    }

    /*if it is a map, check its value type */
    else if (s.getType().equals(Schema.Type.MAP)) {
      Schema vs = s.getValueType();
      return containsGenericUnion(vs);
    }

    /* if it is a union, check all possible types and itself */
    else if (s.getType().equals(Schema.Type.UNION)) {
      List<Schema> types = s.getTypes();
      for (Schema type : types) {
        if (containsGenericUnion(type)) return true;
      }
      /* check whether itself is acceptable (null-union) */
      return !isAcceptableUnion(s);
    }

    /* return false for other cases */
    else {
      return false;
    }
  }
 private void updateRecord(CommonRecord record, GenericRecord delta) {
   List<Field> deltaFields = delta.getSchema().getFields();
   for (Field deltaField : deltaFields) {
     String fieldName = deltaField.name();
     Object rawDeltaField = delta.get(fieldName);
     if (LOG.isDebugEnabled()) {
       LOG.debug(
           "Processing field \"{}\", current value: {}",
           fieldName,
           record.getField(fieldName) != null ? record.getField(fieldName).toString() : null);
     }
     if (AvroGenericUtils.isRecord(rawDeltaField)) {
       processRecordField(record, (GenericRecord) rawDeltaField, fieldName);
     } else if (AvroGenericUtils.isArray(rawDeltaField)) {
       processArrayField(record, (GenericArray) rawDeltaField, fieldName);
     } else if (AvroGenericUtils.isEnum(rawDeltaField)) {
       processEnumField(record, (GenericEnumSymbol) rawDeltaField, fieldName);
     } else if (AvroGenericUtils.isFixed(rawDeltaField)) {
       processFixedField(record, (GenericFixed) rawDeltaField, fieldName);
     } else {
       record.setField(fieldName, commonFactory.createCommonValue(rawDeltaField));
     }
   }
 }
Exemple #10
0
  /*
   * for each avro field, search for nested schema.
   * if field is nested, create tree and recursive
   * else fetch field as a element
   */
  private static void fetchToTree(Field field, Tree parent) {
    //		System.out.println(" Field: " + field.name());
    if (field.schema().getType().toString().equalsIgnoreCase("RECORD")) {
      //			if(!multipleData)
      //				multipleData = true;
      //

      Tree child = new Tree(parent);
      child.setName(field.name());

      List<Field> list = field.schema().getFields();
      Iterator<Field> it = list.iterator();

      while (it.hasNext()) {
        Field fieldOfField = it.next();
        fetchToTree(fieldOfField, child);
      }
      parent.getTrees().add(child);
    } else if (field.schema().getType().getName().equalsIgnoreCase("ARRAY")) {
      if (field.schema().getElementType().getType().name().toString().equalsIgnoreCase("RECORD")) {
        if (!multipleData) multipleData = true;

        Schema arraySchema = field.schema().getElementType();
        Tree childParent = new Tree(parent);
        childParent.setName(field.name()); // employee
        //					parent.getTrees().add(childParent);
        //					Tree child = new Tree(childParent);
        //					child.setName(arraySchema.getName());//employeerecord
        List<Field> list = arraySchema.getFields();
        Iterator<Field> it = list.iterator();

        while (it.hasNext()) {
          Field fieldOfField = it.next();
          fetchToTree(fieldOfField, childParent);
        }
        parent.getTrees().add(childParent);
      }
    } else {
      Element elementNew = new Element(parent);
      elementNew.setName(field.name());
      parent.getElements().add(elementNew);
    }
    //		return parent;
  }
Exemple #11
0
  public static DataType getColumnType(Field field) {
    org.apache.avro.Schema fieldSchema = field.schema();
    fieldSchema = extractSchemaFromUnionIfNeeded(fieldSchema);

    final Type type = fieldSchema.getType();
    if (type == Type.ARRAY) {
      org.apache.avro.Schema elementSchema =
          extractSchemaFromUnionIfNeeded(fieldSchema.getElementType());
      if (elementSchema.getType() == Type.RECORD) {
        if (elementSchema.getFields().size() == 1) {
          elementSchema = elementSchema.getFields().get(0).schema();
        } else {
          throw new RuntimeException("More than one schema in Multi-value column!");
        }
        elementSchema = extractSchemaFromUnionIfNeeded(elementSchema);
      }
      return DataType.valueOf(elementSchema.getType());
    } else {
      return DataType.valueOf(type);
    }
  }
Exemple #12
0
 /** Writes the given Avro datum into the given record, using the given Avro schema */
 private void extractTree(Object datum, Schema schema, Record outputRecord, String prefix) {
   // RECORD, ENUM, ARRAY, MAP, UNION, FIXED, STRING, BYTES, INT, LONG, FLOAT,
   // DOUBLE, BOOLEAN, NULL
   switch (schema.getType()) {
     case RECORD:
       {
         IndexedRecord avroRecord = (IndexedRecord) datum;
         String prefix2 = prefix + "/";
         for (Field field : schema.getFields()) {
           extractTree(
               avroRecord.get(field.pos()),
               field.schema(),
               outputRecord,
               prefix2 + field.name());
         }
         break;
       }
     case ENUM:
       {
         GenericEnumSymbol symbol = (GenericEnumSymbol) datum;
         outputRecord.put(prefix, symbol.toString());
         break;
       }
     case ARRAY:
       {
         Iterator iter = ((Collection) datum).iterator();
         while (iter.hasNext()) {
           extractTree(iter.next(), schema.getElementType(), outputRecord, prefix);
         }
         break;
       }
     case MAP:
       {
         Map<CharSequence, ?> map = (Map<CharSequence, ?>) datum;
         for (Map.Entry<CharSequence, ?> entry : map.entrySet()) {
           extractTree(
               entry.getValue(),
               schema.getValueType(),
               outputRecord,
               prefix + "/" + entry.getKey().toString());
         }
         break;
       }
     case UNION:
       {
         int index = GenericData.get().resolveUnion(schema, datum);
         // String typeName = schema.getTypes().get(index).getName();
         // String prefix2 = prefix + "/" + typeName;
         String prefix2 = prefix;
         extractTree(datum, schema.getTypes().get(index), outputRecord, prefix2);
         break;
       }
     case FIXED:
       {
         GenericFixed fixed = (GenericFixed) datum;
         outputRecord.put(prefix, fixed.bytes());
         // outputRecord.put(prefix, utf8toString(fixed.bytes()));
         break;
       }
     case BYTES:
       {
         ByteBuffer buf = (ByteBuffer) datum;
         int pos = buf.position();
         byte[] bytes = new byte[buf.remaining()];
         buf.get(bytes);
         buf.position(pos); // undo relative read
         outputRecord.put(prefix, bytes);
         // outputRecord.put(prefix, utf8toString(bytes));
         break;
       }
     case STRING:
       {
         outputRecord.put(prefix, datum.toString());
         break;
       }
     case INT:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case LONG:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case FLOAT:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case DOUBLE:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case BOOLEAN:
       {
         outputRecord.put(prefix, datum);
         break;
       }
     case NULL:
       {
         break;
       }
     default:
       throw new MorphlineRuntimeException("Unknown Avro schema type: " + schema.getType());
   }
 }
  public static List<SpecificRecordBase> parse(
      ResultSet resultSet,
      CollectProperties collectProperties,
      Sql2KafkaProperties sql2KafkaProperties) {
    List<SpecificRecordBase> metricsToSend = new ArrayList<SpecificRecordBase>();
    try {
      Class<?> schemaClass = Class.forName(collectProperties.getAvroSchemaClass());

      while (resultSet.next()) {
        SpecificRecordBase specificRecordBase = (SpecificRecordBase) schemaClass.newInstance();

        for (int c = 1; c <= resultSet.getMetaData().getColumnCount(); c++) {
          String fieldName = resultSet.getMetaData().getColumnName(c);
          if (specificRecordBase.getSchema().getField(fieldName) == null) continue;

          Object value = null;
          switch (specificRecordBase
              .getClass()
              .getDeclaredField(fieldName)
              .getType()
              .getCanonicalName()) {
            case "java.lang.Boolean":
              value = resultSet.getBoolean(fieldName);
              break;
            case "java.lang.Integer":
              value = resultSet.getInt(fieldName);
              break;
            case "java.lang.Long":
              value = resultSet.getLong(fieldName);
              break;
            case "java.lang.Float":
              value = resultSet.getFloat(fieldName);
              break;
            case "java.lang.Double":
              value = resultSet.getDouble(fieldName);
              break;
            case "java.lang.Byte":
              value = resultSet.getByte(fieldName);
              break;
            case "java.lang.String":
              value = resultSet.getString(fieldName);
              break;
            default:
              value = resultSet.getString(fieldName);
              break;
          }

          specificRecordBase.put(fieldName, value);
        }

        if (sql2KafkaProperties.getCollectorListResultsExit().equals("true")) {
          for (Field field : specificRecordBase.getSchema().getFields())
            LOG.info(field.name() + ": " + specificRecordBase.get(field.pos()));
        }

        metricsToSend.add(specificRecordBase);
      }

      if (sql2KafkaProperties.getCollectorListResultsExit().equals("true")) {
        System.exit(0);
      }

    } catch (SQLException e) {
      LOG.error("Error parsing results.", e);
    } catch (InstantiationException e) {
      LOG.error("Error parsing results.", e);
    } catch (IllegalAccessException e) {
      LOG.error("Error parsing results.", e);
    } catch (ClassNotFoundException e) {
      LOG.error("Error parsing results.", e);
    } catch (SecurityException e) {
      LOG.error("Error parsing results.", e);
    } catch (NoSuchFieldException e) {
      LOG.error("Error parsing results.", e);
    }
    return metricsToSend;
  }
Exemple #14
0
 /**
  * If equal, return the number of bytes consumed. If greater than, return GT, if less than, return
  * LT.
  */
 private static int compare(Decoders d, Schema schema) throws IOException {
   Decoder d1 = d.d1;
   Decoder d2 = d.d2;
   switch (schema.getType()) {
     case RECORD:
       {
         for (Field field : schema.getFields()) {
           if (field.order() == Field.Order.IGNORE) {
             GenericDatumReader.skip(field.schema(), d1);
             GenericDatumReader.skip(field.schema(), d2);
             continue;
           }
           int c = compare(d, field.schema());
           if (c != 0) return (field.order() != Field.Order.DESCENDING) ? c : -c;
         }
         return 0;
       }
     case ENUM:
     case INT:
       {
         int i1 = d1.readInt();
         int i2 = d2.readInt();
         return i1 == i2 ? 0 : (i1 > i2 ? 1 : -1);
       }
     case LONG:
       {
         long l1 = d1.readLong();
         long l2 = d2.readLong();
         return l1 == l2 ? 0 : (l1 > l2 ? 1 : -1);
       }
     case ARRAY:
       {
         long i = 0; // position in array
         long r1 = 0, r2 = 0; // remaining in current block
         long l1 = 0, l2 = 0; // total array length
         while (true) {
           if (r1 == 0) { // refill blocks(s)
             r1 = d1.readLong();
             if (r1 < 0) {
               r1 = -r1;
               d1.readLong();
             }
             l1 += r1;
           }
           if (r2 == 0) {
             r2 = d2.readLong();
             if (r2 < 0) {
               r2 = -r2;
               d2.readLong();
             }
             l2 += r2;
           }
           if (r1 == 0 || r2 == 0) // empty block: done
           return (l1 == l2) ? 0 : ((l1 > l2) ? 1 : -1);
           long l = Math.min(l1, l2);
           while (i < l) { // compare to end of block
             int c = compare(d, schema.getElementType());
             if (c != 0) return c;
             i++;
             r1--;
             r2--;
           }
         }
       }
     case MAP:
       throw new AvroRuntimeException("Can't compare maps!");
     case UNION:
       {
         int i1 = d1.readInt();
         int i2 = d2.readInt();
         if (i1 == i2) {
           return compare(d, schema.getTypes().get(i1));
         } else {
           return i1 - i2;
         }
       }
     case FIXED:
       {
         int size = schema.getFixedSize();
         int c =
             compareBytes(d.d1.getBuf(), d.d1.getPos(), size, d.d2.getBuf(), d.d2.getPos(), size);
         d.d1.skipFixed(size);
         d.d2.skipFixed(size);
         return c;
       }
     case STRING:
     case BYTES:
       {
         int l1 = d1.readInt();
         int l2 = d2.readInt();
         int c = compareBytes(d.d1.getBuf(), d.d1.getPos(), l1, d.d2.getBuf(), d.d2.getPos(), l2);
         d.d1.skipFixed(l1);
         d.d2.skipFixed(l2);
         return c;
       }
     case FLOAT:
       {
         float f1 = d1.readFloat();
         float f2 = d2.readFloat();
         return (f1 == f2) ? 0 : ((f1 > f2) ? 1 : -1);
       }
     case DOUBLE:
       {
         double f1 = d1.readDouble();
         double f2 = d2.readDouble();
         return (f1 == f2) ? 0 : ((f1 > f2) ? 1 : -1);
       }
     case BOOLEAN:
       boolean b1 = d1.readBoolean();
       boolean b2 = d2.readBoolean();
       return (b1 == b2) ? 0 : (b1 ? 1 : -1);
     case NULL:
       return 0;
     default:
       throw new AvroRuntimeException("Unexpected schema to compare!");
   }
 }
Exemple #15
0
  public void generateSimpleAggregationOnSingleColumnFilters() throws IOException {
    final Map<String, Map<Object, Integer>> cardinalityCountsMap =
        new HashMap<String, Map<Object, Integer>>();
    final Map<String, Map<Object, Map<String, Double>>> sumMap =
        new HashMap<String, Map<Object, Map<String, Double>>>();
    // here string key is columnName:columnValue:MetricName:GroupColumnName:groupKey:metricValue

    final Map<String, Map<Object, Double>> sumGroupBy = new HashMap<String, Map<Object, Double>>();

    aggregationQueries = new ArrayList<AvroQueryGenerator.TestSimpleAggreationQuery>();
    groupByQueries = new ArrayList<AvroQueryGenerator.TestGroupByAggreationQuery>();
    for (final Field f : schema.getFields()) {
      final String fieldName = f.name();
      if (dimensions.contains(fieldName) || metrics.contains(fieldName) || time.equals(fieldName)) {
        isSingleValueMap.put(fieldName, isSingleValueField(f));
        dataTypeMap.put(fieldName, getColumnType(f));
        if (!metrics.contains(fieldName)) {
          cardinalityCountsMap.put(fieldName, new HashMap<Object, Integer>());
        }
      }
    }

    for (final String column : cardinalityCountsMap.keySet()) {
      sumMap.put(column, new HashMap<Object, Map<String, Double>>());
    }

    // here string key is columnName:columnValue:MetricName:GroupColumnName:groupKey:metricValue

    while (dataStream.hasNext()) {
      final GenericRecord record = dataStream.next();

      for (final String column : cardinalityCountsMap.keySet()) {
        Object value = record.get(column);

        if (value == null) {
          switch (schema.getField(column).schema().getType()) {
            case INT:
              value = 0;
              break;
            case FLOAT:
              value = 0F;
              break;
            case LONG:
              value = 0L;
              break;
            case DOUBLE:
              value = 0D;
              break;
            case STRING:
            case BOOLEAN:
              value = "null";
              break;
          }
        }

        if (value instanceof Utf8) {
          value = ((Utf8) value).toString();
        }

        if (value instanceof Array) {
          continue;
        }

        // here string key is columnName:columnValue:MetricName:GroupColumnName:groupKey:metricValue

        for (final String metricName : metrics) {
          final String groupbyKeyBase = column + ":" + record.get(column) + ":" + metricName;
          int dimCounter = 1;
          for (final String dim : cardinalityCountsMap.keySet()) {
            if (!dim.equals(column)) {
              dimCounter++;
              final String groupbyKey = groupbyKeyBase + ":" + dim;
              if (sumGroupBy.containsKey(groupbyKey)) {
                if (sumGroupBy.get(groupbyKey).containsKey(record.get(dim))) {
                  sumGroupBy
                      .get(groupbyKey)
                      .put(
                          record.get(dim),
                          getAppropriateNumberType(
                              metricName,
                              record.get(metricName),
                              sumGroupBy.get(groupbyKey).get(record.get(dim))));
                } else {
                  sumGroupBy
                      .get(groupbyKey)
                      .put(record.get(dim), Double.parseDouble(record.get(metricName).toString()));
                }
              } else {
                sumGroupBy.put(groupbyKey, new HashMap<Object, Double>());
                sumGroupBy
                    .get(groupbyKey)
                    .put(record.get(dim), Double.parseDouble(record.get(metricName).toString()));
              }
            }
            if (dimCounter == 4) {
              break;
            }
          }
        }

        if (cardinalityCountsMap.get(column).containsKey(value)) {
          cardinalityCountsMap
              .get(column)
              .put(value, cardinalityCountsMap.get(column).get(value) + 1);
        } else {
          cardinalityCountsMap.get(column).put(value, 1);
        }

        if (!sumMap.get(column).containsKey(value)) {
          sumMap.get(column).put(value, new HashMap<String, Double>());
        }

        for (final String metric : metrics) {
          if (!sumMap.get(column).get(value).containsKey(metric)) {
            sumMap
                .get(column)
                .get(value)
                .put(metric, getAppropriateNumberType(metric, record.get(metric), 0D));
          } else {
            sumMap
                .get(column)
                .get(value)
                .put(
                    metric,
                    getAppropriateNumberType(
                        metric, record.get(metric), sumMap.get(column).get(value).get(metric)));
          }
        }
        // here string key is columnName:columnValue:MetricName:GroupColumnName:groupKey:metricValue
      }
    }

    dataStream.close();

    if (!isRealtimeSegment) {
      for (final String column : cardinalityCountsMap.keySet()) {
        for (final Object entry : cardinalityCountsMap.get(column).keySet()) {
          final StringBuilder bld = new StringBuilder();
          bld.append("select count(*) from ");
          bld.append(resourceName);
          bld.append(" where ");
          bld.append(column);
          bld.append("=");
          bld.append("'");
          bld.append(entry);
          bld.append("'");
          bld.append(" ");
          bld.append("limit 0");
          String queryString = bld.toString();
          if (!queryString.contains("null")) {
            aggregationQueries.add(
                new TestSimpleAggreationQuery(
                    queryString, new Double(cardinalityCountsMap.get(column).get(entry))));
          }
        }
      }
    }

    for (final String column : sumMap.keySet()) {
      for (final Object value : sumMap.get(column).keySet()) {
        for (final String metric : sumMap.get(column).get(value).keySet()) {
          final StringBuilder bld = new StringBuilder();
          bld.append("select sum('" + metric + "') from ");
          bld.append(resourceName);
          bld.append(" where ");
          bld.append(column);
          bld.append("=");
          bld.append("'");
          bld.append(value);
          bld.append("'");
          bld.append(" ");
          bld.append("limit 0");
          String queryString = bld.toString();
          if (!queryString.contains("null")) {
            aggregationQueries.add(
                new TestSimpleAggreationQuery(
                    bld.toString(), sumMap.get(column).get(value).get(metric)));
          }
        }
      }
    }

    for (final String groupKey : sumGroupBy.keySet()) {
      final String columnName = groupKey.split(":")[0];
      final String columnValue = groupKey.split(":")[1];
      final String metricColumn = groupKey.split(":")[2];
      final String groupByColumnName = groupKey.split(":")[3];

      final StringBuilder bld = new StringBuilder();
      bld.append("select sum('" + metricColumn + "') from ");
      bld.append(resourceName);
      bld.append(" where ");
      bld.append(columnName);
      bld.append("=");
      bld.append("'");
      bld.append(columnValue);
      bld.append("'");
      bld.append(" ");
      bld.append(" group by ");
      bld.append(groupByColumnName);
      bld.append(" top 10 ");
      bld.append("limit 0");
      String queryString = bld.toString();
      if (!queryString.contains("null")) {
        groupByQueries.add(
            new TestGroupByAggreationQuery(bld.toString(), sumGroupBy.get(groupKey)));
      }
    }
  }
Exemple #16
0
  @Test
  public void testDeepCopy() {
    // Set all non-default fields in an Interop instance:
    Interop.Builder interopBuilder = Interop.newBuilder();
    interopBuilder.setArrayField(Arrays.asList(new Double[] {1.1, 1.2, 1.3, 1.4}));
    interopBuilder.setBoolField(true);
    interopBuilder.setBytesField(ByteBuffer.wrap(new byte[] {1, 2, 3, 4}));
    interopBuilder.setDoubleField(3.14d);
    interopBuilder.setEnumField(Kind.B);
    interopBuilder.setFixedField(
        new MD5(new byte[] {4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1}));
    interopBuilder.setFloatField(6.022f);
    interopBuilder.setIntField(32);
    interopBuilder.setLongField(64L);

    Map<java.lang.String, org.apache.avro.Foo> map =
        new HashMap<java.lang.String, org.apache.avro.Foo>(1);
    map.put("foo", Foo.newBuilder().setLabel("bar").build());
    interopBuilder.setMapField(map);

    interopBuilder.setNullField(null);

    Node.Builder rootBuilder = Node.newBuilder().setLabel("/");
    Node.Builder homeBuilder = Node.newBuilder().setLabel("home");
    homeBuilder.setChildren(new ArrayList<Node>(0));
    rootBuilder.setChildren(Arrays.asList(new Node[] {homeBuilder.build()}));
    interopBuilder.setRecordField(rootBuilder.build());

    interopBuilder.setStringField("Hello");
    interopBuilder.setUnionField(true);

    Interop interop = interopBuilder.build();

    // Verify that deepCopy works for all fields:
    for (Field field : Interop.SCHEMA$.getFields()) {
      // Original field and deep copy should be equivalent:
      if (interop.get(field.pos()) instanceof ByteBuffer) {
        assertTrue(
            Arrays.equals(
                ((ByteBuffer) interop.get(field.pos())).array(),
                ((ByteBuffer) GenericData.get().deepCopy(field.schema(), interop.get(field.pos())))
                    .array()));
      } else {
        assertEquals(
            interop.get(field.pos()),
            SpecificData.get().deepCopy(field.schema(), interop.get(field.pos())));
      }

      // Original field and deep copy should be different instances:
      if ((field.schema().getType() != Type.ENUM)
          && (field.schema().getType() != Type.NULL)
          && (field.schema().getType() != Type.STRING)) {
        assertFalse(
            "Field " + field.name() + " is same instance in deep copy",
            interop.get(field.pos())
                == GenericData.get().deepCopy(field.schema(), interop.get(field.pos())));
      }
    }
  }
Exemple #17
0
 /**
  * Given an avro Schema.Field instance, make a clone of it.
  *
  * @param field The field to clone.
  * @return The cloned field.
  */
 public static Field cloneField(Field field) {
   return new Field(field.name(), field.schema(), field.doc(), field.defaultValue());
 }