/** Converts a raw row into its (possibly partial) dimension and complete metric values */ private StarTreeTableRow extractValues(GenericRow row) { List<Integer> dimensions = new ArrayList<Integer>(); for (String dimensionName : schema.getDimensionNames()) { Integer valueId; if (schema.getFieldSpecFor(dimensionName).isSingleValueField() && !starTreeIndexSpec.getExcludedDimensions().contains(dimensionName)) { Object value = row.getValue(dimensionName); valueId = dictionaryCreatorMap.get(dimensionName).indexOfSV(value); } else { // Multi-value fields are not supported - always ALL valueId = V1Constants.STARTREE_ALL_NUMBER.intValue(); } dimensions.add(valueId); } List<Number> metrics = new ArrayList<Number>(schema.getMetricNames().size()); for (MetricFieldSpec metricFieldSpec : schema.getMetricFieldSpecs()) { Object value = row.getValue(metricFieldSpec.getName()); switch (metricFieldSpec.getDataType()) { case INT: metrics.add((Integer) value); break; case LONG: metrics.add((Long) value); break; case DOUBLE: metrics.add((Double) value); break; case FLOAT: metrics.add((Float) value); break; default: throw new IllegalStateException("Unsupported data type " + metricFieldSpec.getDataType()); } } return new StarTreeTableRow(dimensions, metrics); }
/** Constructs the segment metadata file, and writes in outputDir */ private void writeMetadata(File outputDir, int totalDocs) throws ConfigurationException { final PropertiesConfiguration properties = new PropertiesConfiguration( new File(outputDir, V1Constants.MetadataKeys.METADATA_FILE_NAME)); properties.setProperty(SEGMENT_NAME, segmentName); properties.setProperty(TABLE_NAME, config.getTableName()); properties.setProperty(DIMENSIONS, config.getDimensions()); properties.setProperty(METRICS, config.getMetrics()); properties.setProperty(TIME_COLUMN_NAME, config.getTimeColumnName()); properties.setProperty(TIME_INTERVAL, "not_there"); properties.setProperty(SEGMENT_TOTAL_DOCS, String.valueOf(totalDocs)); // StarTree Joiner csv = Joiner.on(","); properties.setProperty(SPLIT_ORDER, csv.join(splitOrder)); properties.setProperty(SPLIT_EXCLUDES, csv.join(starTreeIndexSpec.getSplitExcludes())); properties.setProperty(MAX_LEAF_RECORDS, starTreeIndexSpec.getMaxLeafRecords()); properties.setProperty( EXCLUDED_DIMENSIONS, csv.join(starTreeIndexSpec.getExcludedDimensions())); String timeColumn = config.getTimeColumnName(); if (columnInfo.get(timeColumn) != null) { properties.setProperty(SEGMENT_START_TIME, columnInfo.get(timeColumn).getMin()); properties.setProperty(SEGMENT_END_TIME, columnInfo.get(timeColumn).getMax()); properties.setProperty(TIME_UNIT, config.getTimeUnitForSegment()); } if (config.containsKey(SEGMENT_START_TIME)) { properties.setProperty(SEGMENT_START_TIME, config.getStartTime()); } if (config.containsKey(SEGMENT_END_TIME)) { properties.setProperty(SEGMENT_END_TIME, config.getStartTime()); } if (config.containsKey(TIME_UNIT)) { properties.setProperty(TIME_UNIT, config.getTimeUnitForSegment()); } for (final String key : config.getAllCustomKeyValuePair().keySet()) { properties.setProperty(key, config.getAllCustomKeyValuePair().get(key)); } for (final String column : columnInfo.keySet()) { properties.setProperty( V1Constants.MetadataKeys.Column.getKeyFor(column, CARDINALITY), String.valueOf(columnInfo.get(column).getSortedUniqueElementsArray().length)); properties.setProperty( V1Constants.MetadataKeys.Column.getKeyFor(column, TOTAL_DOCS), String.valueOf(totalDocs)); properties.setProperty( V1Constants.MetadataKeys.Column.getKeyFor(column, DATA_TYPE), schema.getFieldSpecFor(column).getDataType().toString()); properties.setProperty( V1Constants.MetadataKeys.Column.getKeyFor(column, BITS_PER_ELEMENT), String.valueOf( SingleValueUnsortedForwardIndexCreator.getNumOfBits( columnInfo.get(column).getSortedUniqueElementsArray().length))); properties.setProperty( V1Constants.MetadataKeys.Column.getKeyFor(column, DICTIONARY_ELEMENT_SIZE), String.valueOf(dictionaryCreatorMap.get(column).getStringColumnMaxLength())); properties.setProperty( V1Constants.MetadataKeys.Column.getKeyFor(column, COLUMN_TYPE), String.valueOf(schema.getFieldSpecFor(column).getFieldType().toString())); properties.setProperty( V1Constants.MetadataKeys.Column.getKeyFor(column, IS_SORTED), String.valueOf(columnInfo.get(column).isSorted())); properties.setProperty( V1Constants.MetadataKeys.Column.getKeyFor(column, HAS_NULL_VALUE), String.valueOf(columnInfo.get(column).hasNulls())); properties.setProperty( V1Constants.MetadataKeys.Column.getKeyFor( column, V1Constants.MetadataKeys.Column.HAS_DICTIONARY), String.valueOf(columnInfo.get(column).isCreateDictionary())); properties.setProperty( V1Constants.MetadataKeys.Column.getKeyFor(column, HAS_INVERTED_INDEX), String.valueOf(true)); properties.setProperty( V1Constants.MetadataKeys.Column.getKeyFor(column, IS_SINGLE_VALUED), String.valueOf(schema.getFieldSpecFor(column).isSingleValueField())); properties.setProperty( V1Constants.MetadataKeys.Column.getKeyFor(column, MAX_MULTI_VALUE_ELEMTS), String.valueOf(columnInfo.get(column).getMaxNumberOfMutiValueElements())); properties.setProperty( V1Constants.MetadataKeys.Column.getKeyFor(column, TOTAL_NUMBER_OF_ENTRIES), String.valueOf(columnInfo.get(column).getTotalNumberOfEntries())); } properties.save(); }