Exemplo n.º 1
0
  /** Initialize dictionaries for dimension values */
  private void initializeAndBuildDictionaries(
      Schema schema, Map<String, ColumnIndexCreationInfo> columnInfo, File file) throws Exception {
    for (final FieldSpec spec : schema.getAllFieldSpecs()) {
      final ColumnIndexCreationInfo info = columnInfo.get(spec.getName());
      if (info.isCreateDictionary()) {
        dictionaryCreatorMap.put(
            spec.getName(),
            new SegmentDictionaryCreator(
                info.hasNulls(), info.getSortedUniqueElementsArray(), spec, file));
      } else {
        throw new RuntimeException("Creation of indices without dictionaries is not implemented!");
      }
      dictionaryCreatorMap.get(spec.getName()).build();
    }

    // Add __ALL__ to dimension dictionaries
    for (DimensionFieldSpec spec : schema.getDimensionFieldSpecs()) {
      Object allValue = StarTreeIndexNode.getAllValue(spec);
      if (schema.getFieldSpecFor(spec.getName()).isSingleValueField()) {
        Object allIndex = dictionaryCreatorMap.get(spec.getName()).indexOfSV(allValue);
      } else {
        Object allIndex = dictionaryCreatorMap.get(spec.getName()).indexOfMV(allValue);
      }
    }
  }
Exemplo n.º 2
0
  /**
   * Re-initializes only the metric dictionaries using the unique metric values (computed after
   * aggregation).
   */
  private void resetMetricDictionaries(Map<String, Set<Object>> uniqueMetricValues)
      throws Exception {
    for (MetricFieldSpec spec : schema.getMetricFieldSpecs()) {
      String column = spec.getName();
      ColumnIndexCreationInfo info = columnInfo.get(column);

      // The new unique values
      Object[] valuesWithAggregates = uniqueMetricValues.get(column).toArray();
      Arrays.sort(valuesWithAggregates);

      // Reset dictionaries
      dictionaryCreatorMap.put(
          column,
          new SegmentDictionaryCreator(info.hasNulls(), valuesWithAggregates, spec, outDir));
      dictionaryCreatorMap.get(column).build();
    }
  }
Exemplo n.º 3
0
  @Override
  public void init(
      SegmentGeneratorConfig config,
      Map<String, ColumnIndexCreationInfo> columnInfo,
      Schema schema,
      int totalDocs,
      File outDir)
      throws Exception {
    // Member variables
    this.config = config;
    this.columnInfo = columnInfo;
    this.schema = schema;
    this.outDir = outDir;
    this.starTreeDimensionDictionary = new HashMap<String, Integer>();
    this.starTreeMetricDictionary = new HashMap<String, Integer>();

    // Dictionaries (will go in root segment)
    initializeAndBuildDictionaries(schema, columnInfo, outDir);

    // Compute dimension dictionary
    for (int i = 0; i < schema.getDimensionNames().size(); i++) {
      starTreeDimensionDictionary.put(schema.getDimensionNames().get(i), i);
    }
    LOG.info("StarTree dimension dictionary: {}", starTreeDimensionDictionary);

    // Compute the metric dictionary
    for (int i = 0; i < schema.getMetricNames().size(); i++) {
      starTreeMetricDictionary.put(schema.getMetricNames().get(i), i);
    }
    LOG.info("StarTree metric dictionary: {}", starTreeDimensionDictionary);

    // Compute StarTree split order
    splitOrder = computeSplitOrder(columnInfo);
    LOG.info("Computed split order {}", splitOrder);
    List<Integer> splitOrderIndexes = new ArrayList<Integer>();
    for (String dimensionName : splitOrder) {
      Integer dimensionId = starTreeDimensionDictionary.get(dimensionName);
      splitOrderIndexes.add(dimensionId);
    }
    Collections.reverse(splitOrderIndexes);

    // StarTree builder / table
    StarTreeTable table = new LinkedListStarTreeTable(); // TODO: ByteBuffer-based
    StarTreeDocumentIdMap documentIdMap =
        new HashMapStarTreeDocumentIdMap(); // TODO: ByteBuffer-based
    starTreeBuilder.init(
        splitOrderIndexes, starTreeIndexSpec.getMaxLeafRecords(), table, documentIdMap);

    // Build the StarTree structure and table
    LOG.info("Building StarTree table...");
    int count = 0;
    long startMillis = System.currentTimeMillis();
    recordReader.rewind();
    while (recordReader.hasNext()) {
      GenericRow row = recordReader.next();
      StarTreeTableRow starTreeTableRow = extractValues(row);
      starTreeBuilder.append(starTreeTableRow);
      count++;
    }
    long endMillis = System.currentTimeMillis();
    LOG.info(
        "Finished building StarTree table ({} documents, took {} ms)",
        count,
        endMillis - startMillis);

    LOG.info("Building StarTree (computing aggregates)...");
    startMillis = System.currentTimeMillis();
    starTreeBuilder.build();
    endMillis = System.currentTimeMillis();
    LOG.info("Finished building StarTree, took {} ms", endMillis - startMillis);

    // Re-compute the unique values for metrics including aggregates to allow for dictionary
    // encoding
    LOG.info("Re-computing unique metric values for dictionary encoding...");
    startMillis = System.currentTimeMillis();
    Map<String, Set<Object>> uniqueMetricValues = computeUniqueMetricValues();
    resetMetricDictionaries(uniqueMetricValues);
    endMillis = System.currentTimeMillis();
    LOG.info("Finished re-computing unique metric values (took {} ms)", endMillis - startMillis);

    // StarTree directory
    starTreeDir = new File(outDir, V1Constants.STARTREE_DIR);
    if (!starTreeDir.mkdir()) {
      throw new RuntimeException(
          "Could not create star tree directory " + starTreeDir.getAbsolutePath());
    }

    // For each column, build its dictionary and initialize a forwards and an inverted index for raw
    // / agg segment
    int totalAggDocs = starTreeBuilder.getTotalAggregateDocumentCount();
    int totalRawDocs = starTreeBuilder.getTotalRawDocumentCount();
    for (final String column : dictionaryCreatorMap.keySet()) {
      ColumnIndexCreationInfo indexCreationInfo = columnInfo.get(column);

      Object[] uniqueValues = indexCreationInfo.getSortedUniqueElementsArray();
      if (schema.getMetricNames().contains(column)) {
        // Use the unique values including the new aggregate values
        uniqueValues = uniqueMetricValues.get(column).toArray();
      }

      if (schema.getFieldSpecFor(column).isSingleValueField()) {
        if (indexCreationInfo.isSorted()) {
          forwardIndexCreatorMap.put(
              column,
              new SingleValueSortedForwardIndexCreator(
                  outDir, uniqueValues.length, schema.getFieldSpecFor(column)));
          aggregateForwardIndexCreatorMap.put(
              column,
              new SingleValueSortedForwardIndexCreator(
                  starTreeDir, uniqueValues.length, schema.getFieldSpecFor(column)));
        } else {
          forwardIndexCreatorMap.put(
              column,
              new SingleValueUnsortedForwardIndexCreator(
                  schema.getFieldSpecFor(column),
                  outDir,
                  uniqueValues.length,
                  totalRawDocs,
                  indexCreationInfo.getTotalNumberOfEntries(),
                  indexCreationInfo.hasNulls()));
          aggregateForwardIndexCreatorMap.put(
              column,
              new SingleValueUnsortedForwardIndexCreator(
                  schema.getFieldSpecFor(column),
                  starTreeDir,
                  indexCreationInfo.getSortedUniqueElementsArray().length,
                  totalAggDocs,
                  indexCreationInfo.getTotalNumberOfEntries(),
                  indexCreationInfo.hasNulls()));
        }
      } else {
        forwardIndexCreatorMap.put(
            column,
            new MultiValueUnsortedForwardIndexCreator(
                schema.getFieldSpecFor(column),
                outDir,
                uniqueValues.length,
                totalRawDocs,
                indexCreationInfo.getTotalNumberOfEntries(),
                indexCreationInfo.hasNulls()));
        aggregateForwardIndexCreatorMap.put(
            column,
            new MultiValueUnsortedForwardIndexCreator(
                schema.getFieldSpecFor(column),
                starTreeDir,
                uniqueValues.length,
                totalAggDocs,
                indexCreationInfo.getTotalNumberOfEntries(),
                indexCreationInfo.hasNulls()));
      }

      if (config.createInvertedIndexEnabled()) {
        invertedIndexCreatorMap.put(
            column,
            new BitmapInvertedIndexCreator(
                outDir,
                indexCreationInfo.getSortedUniqueElementsArray().length,
                schema.getFieldSpecFor(column)));
        aggregateInvertedIndexCreatorMap.put(
            column,
            new BitmapInvertedIndexCreator(
                starTreeDir,
                indexCreationInfo.getSortedUniqueElementsArray().length,
                schema.getFieldSpecFor(column)));
      }
    }
  }