Пример #1
0
 /** Returns the dimension values, in order of tree level, to this node. */
 public Map<Integer, Integer> getPathValues() {
   StarTreeIndexNode current = this;
   Map<Integer, Integer> values = new HashMap<Integer, Integer>();
   while (current != null && current.getParent() != null) {
     values.put(current.getDimensionName(), current.getDimensionValue());
     current = current.getParent();
   }
   return values;
 }
Пример #2
0
 public StarTreeIndexNode getMatchingNode(StarTreeIndexNode node, List<Integer> dimensions) {
   if (node == null || node.isLeaf()) {
     return node;
   }
   Integer childDimensionName = node.getChildDimensionName();
   Integer childDimensionValue = dimensions.get(childDimensionName);
   StarTreeIndexNode child = node.getChildren().get(childDimensionValue);
   return getMatchingNode(child, dimensions);
 }
Пример #3
0
 /** Returns the dimension IDs, in order of tree level, to this node. */
 public List<Integer> getPathDimensions() {
   StarTreeIndexNode current = this;
   List<Integer> dimensions = new LinkedList<Integer>();
   while (current != null && current.getParent() != null) {
     dimensions.add(0, current.getDimensionName());
     current = current.getParent();
   }
   return dimensions;
 }
Пример #4
0
 /** Collects all the StarTree leaves that match the provided dimension values */
 private void findMatchingLeaves(
     StarTreeIndexNode node, List<Integer> values, Set<StarTreeIndexNode> leaves) {
   if (node.isLeaf()) {
     leaves.add(node);
   } else {
     Integer value = values.get(node.getChildDimensionName());
     findMatchingLeaves(node.getChildren().get(value), values, leaves);
     findMatchingLeaves(node.getChildren().get(StarTreeIndexNode.all()), values, leaves);
   }
 }
Пример #5
0
  public static void printTree(StarTreeIndexNode node, int level) {
    for (int i = 0; i < level; i++) {
      System.out.print("  ");
    }
    System.out.println(node);

    if (!node.isLeaf()) {
      for (StarTreeIndexNode child : node.getChildren().values()) {
        printTree(child, level + 1);
      }
    }
  }
Пример #6
0
  /** Initialize dictionaries for dimension values */
  private void initializeAndBuildDictionaries(
      Schema schema, Map<String, ColumnIndexCreationInfo> columnInfo, File file) throws Exception {
    for (final FieldSpec spec : schema.getAllFieldSpecs()) {
      final ColumnIndexCreationInfo info = columnInfo.get(spec.getName());
      if (info.isCreateDictionary()) {
        dictionaryCreatorMap.put(
            spec.getName(),
            new SegmentDictionaryCreator(
                info.hasNulls(), info.getSortedUniqueElementsArray(), spec, file));
      } else {
        throw new RuntimeException("Creation of indices without dictionaries is not implemented!");
      }
      dictionaryCreatorMap.get(spec.getName()).build();
    }

    // Add __ALL__ to dimension dictionaries
    for (DimensionFieldSpec spec : schema.getDimensionFieldSpecs()) {
      Object allValue = StarTreeIndexNode.getAllValue(spec);
      if (schema.getFieldSpecFor(spec.getName()).isSingleValueField()) {
        Object allIndex = dictionaryCreatorMap.get(spec.getName()).indexOfSV(allValue);
      } else {
        Object allIndex = dictionaryCreatorMap.get(spec.getName()).indexOfMV(allValue);
      }
    }
  }
Пример #7
0
  @Override
  public void indexRow(GenericRow row) {
    // Find matching leaves in StarTree for row
    currentMatchingNodes.clear();
    StarTreeTableRow tableRow = extractValues(row);
    findMatchingLeaves(starTreeBuilder.getTree(), tableRow.getDimensions(), currentMatchingNodes);

    // Only write the raw value, maintaining sort order (we will write aggregates when sealing)
    for (StarTreeIndexNode node : currentMatchingNodes) {
      Map<Integer, Integer> pathValues = node.getPathValues();
      if (!pathValues.containsValue(StarTreeIndexNode.all())) {
        StarTreeTableRange range = starTreeBuilder.getDocumentIdRange(node.getNodeId());
        StarTreeTable subTable =
            starTreeBuilder.getTable().view(range.getStartDocumentId(), range.getDocumentCount());

        Integer nextMatchingDocumentId =
            starTreeBuilder.getNextDocumentId(tableRow.getDimensions());
        if (nextMatchingDocumentId == null) {
          throw new IllegalStateException("Could not assign document ID for row " + tableRow);
        }

        // Write using that document ID to all columns
        for (final String column : dictionaryCreatorMap.keySet()) {
          Object columnValueToIndex = row.getValue(column);
          if (schema.getFieldSpecFor(column).isSingleValueField()) {
            System.out.println(column + ": " + columnValueToIndex);
            int dictionaryIndex = dictionaryCreatorMap.get(column).indexOfSV(columnValueToIndex);
            ((SingleValueForwardIndexCreator) forwardIndexCreatorMap.get(column))
                .index(nextMatchingDocumentId, dictionaryIndex);
            if (config.createInvertedIndexEnabled()) {
              invertedIndexCreatorMap
                  .get(column)
                  .add(nextMatchingDocumentId, (Object) dictionaryIndex);
            }
          } else {
            int[] dictionaryIndex = dictionaryCreatorMap.get(column).indexOfMV(columnValueToIndex);
            ((MultiValueForwardIndexCreator) forwardIndexCreatorMap.get(column))
                .index(nextMatchingDocumentId, dictionaryIndex);
            if (config.createInvertedIndexEnabled()) {
              invertedIndexCreatorMap.get(column).add(nextMatchingDocumentId, dictionaryIndex);
            }
          }
        }
      }
    }
  }
Пример #8
0
 @Override
 public boolean equals(Object o) {
   if (!(o instanceof StarTreeIndexNode)) {
     return false;
   }
   StarTreeIndexNode n = (StarTreeIndexNode) o;
   return Objects.equal(nodeId, n.getNodeId())
       && Objects.equal(level, n.getLevel())
       && Objects.equal(dimensionName, n.getDimensionName())
       && Objects.equal(dimensionValue, n.getDimensionValue())
       && Objects.equal(childDimensionName, n.getChildDimensionName())
       && Objects.equal(children, n.getChildren())
       && Objects.equal(startDocumentId, n.getStartDocumentId())
       && Objects.equal(documentCount, n.getDocumentCount());
 }
Пример #9
0
    public static com.linkedin.pinot.core.indexsegment.IndexSegment load(
        File indexDir, ReadMode readMode, IndexLoadingConfigMetadata indexLoadingConfigMetadata)
        throws Exception {
      SegmentMetadataImpl metadata = new SegmentMetadataImpl(indexDir);
      if (!metadata
          .getVersion()
          .equalsIgnoreCase(IndexSegmentImpl.EXPECTED_SEGMENT_VERSION.toString())) {

        SegmentVersion from = SegmentVersion.valueOf(metadata.getVersion());
        SegmentVersion to =
            SegmentVersion.valueOf(IndexSegmentImpl.EXPECTED_SEGMENT_VERSION.toString());
        LOGGER.info(
            "segment:{} needs to be converted from :{} to {} version.",
            indexDir.getName(),
            from,
            to);
        SegmentFormatConverter converter = SegmentFormatConverterFactory.getConverter(from, to);
        LOGGER.info("Using converter:{} to up-convert the format", converter.getClass().getName());
        converter.convert(indexDir);
        LOGGER.info(
            "Successfully up-converted segment:{} from :{} to {} version.",
            indexDir.getName(),
            from,
            to);
      }

      Map<String, ColumnIndexContainer> indexContainerMap =
          new HashMap<String, ColumnIndexContainer>();

      for (String column : metadata.getColumnMetadataMap().keySet()) {
        indexContainerMap.put(
            column,
            ColumnIndexContainer.init(
                column,
                indexDir,
                metadata.getColumnMetadataFor(column),
                indexLoadingConfigMetadata,
                readMode));
      }

      // The star tree index (if available)
      StarTreeIndexNode starTreeRoot = null;
      if (metadata.hasStarTree()) {
        File starTreeFile = new File(indexDir, V1Constants.STARTREE_FILE);
        LOGGER.debug("Loading star tree index file {}", starTreeFile);
        starTreeRoot = StarTreeIndexNode.fromBytes(new FileInputStream(starTreeFile));
      }
      return new IndexSegmentImpl(indexDir, metadata, indexContainerMap, starTreeRoot);
    }
Пример #10
0
  @Override
  public void seal() throws ConfigurationException, IOException {
    // Write all the aggregate rows to the aggregate segment
    LOG.info("Writing aggregate segment...");
    long startMillis = System.currentTimeMillis();
    int currentAggregateDocumentId = 0;
    Iterator<StarTreeTableRow> itr = starTreeBuilder.getTable().getAllCombinations();
    while (itr.hasNext()) {
      StarTreeTableRow next = itr.next();
      if (next.getDimensions().contains(StarTreeIndexNode.all())) {
        // Write using that document ID to all columns
        for (final String column : dictionaryCreatorMap.keySet()) {
          Object dictionaryIndex = null; // TODO: Is this okay?

          if (starTreeDimensionDictionary.containsKey(column)) {
            // Index the dimension value
            Integer dimensionId = starTreeDimensionDictionary.get(column);
            Integer dimensionValue = next.getDimensions().get(dimensionId);
            if (dimensionValue == StarTreeIndexNode.all()) {
              // Use all value
              Object allValue = StarTreeIndexNode.getAllValue(schema.getFieldSpecFor(column));
              if (schema.getFieldSpecFor(column).isSingleValueField()) {
                dictionaryIndex = dictionaryCreatorMap.get(column).indexOfSV(allValue);
              } else {
                dictionaryIndex = dictionaryCreatorMap.get(column).indexOfMV(allValue);
              }
            } else {
              dictionaryIndex = dimensionValue;
            }
          } else if (starTreeMetricDictionary.containsKey(column)) {
            // Index the aggregate metric
            Integer metricId = starTreeMetricDictionary.get(column);
            Object columnValueToIndex = next.getMetrics().get(metricId);
            if (schema.getFieldSpecFor(column).isSingleValueField()) {
              dictionaryIndex = dictionaryCreatorMap.get(column).indexOfSV(columnValueToIndex);
            } else {
              dictionaryIndex = dictionaryCreatorMap.get(column).indexOfMV(columnValueToIndex);
            }
          } else {
            // Just index the raw value
            Object columnValueToIndex =
                StarTreeIndexNode.getAllValue(schema.getFieldSpecFor(column));
            if (schema.getFieldSpecFor(column).isSingleValueField()) {
              dictionaryIndex = dictionaryCreatorMap.get(column).indexOfSV(columnValueToIndex);
            } else {
              dictionaryIndex = dictionaryCreatorMap.get(column).indexOfMV(columnValueToIndex);
            }
          }

          if (schema.getFieldSpecFor(column).isSingleValueField()) {
            ((SingleValueForwardIndexCreator) aggregateForwardIndexCreatorMap.get(column))
                .index(currentAggregateDocumentId, (Integer) dictionaryIndex);
          } else {
            ((MultiValueForwardIndexCreator) aggregateForwardIndexCreatorMap.get(column))
                .index(currentAggregateDocumentId, (int[]) dictionaryIndex);
          }

          if (config.createInvertedIndexEnabled()) {
            aggregateInvertedIndexCreatorMap
                .get(column)
                .add(currentAggregateDocumentId, dictionaryIndex);
          }
        }
        currentAggregateDocumentId++;
      }
    }
    long endMillis = System.currentTimeMillis();
    LOG.info("Done writing aggregate segment (took {} ms)", endMillis - startMillis);

    for (final String column : forwardIndexCreatorMap.keySet()) {
      forwardIndexCreatorMap.get(column).close();
      if (config.createInvertedIndexEnabled()) {
        invertedIndexCreatorMap.get(column).seal();
      }
      dictionaryCreatorMap.get(column).close();
    }

    for (final String column : aggregateForwardIndexCreatorMap.keySet()) {
      aggregateForwardIndexCreatorMap.get(column).close();
      if (config.createInvertedIndexEnabled()) {
        aggregateInvertedIndexCreatorMap.get(column).seal();
      }
      // n.b. The dictionary from raw data is used
    }

    writeMetadata(outDir, starTreeBuilder.getTotalRawDocumentCount());

    // Write star tree
    LOG.info("Writing " + V1Constants.STARTREE_FILE);
    startMillis = System.currentTimeMillis();
    File starTreeFile = new File(starTreeDir, V1Constants.STARTREE_FILE);
    OutputStream starTreeOutputStream = new FileOutputStream(starTreeFile);
    starTreeBuilder.getTree().writeTree(starTreeOutputStream);
    starTreeOutputStream.close();
    endMillis = System.currentTimeMillis();
    LOG.info("Wrote StarTree file (took {} ms)", endMillis - startMillis);

    // Copy the dictionary files into startree directory
    // n.b. this is done so the segment is as stand-alone as possible, though could be removed as an
    // optimization
    File[] dictionaryFiles =
        outDir.listFiles(
            new FilenameFilter() {
              @Override
              public boolean accept(File dir, String name) {
                return name.endsWith(V1Constants.Dict.FILE_EXTENTION);
              }
            });
    for (File dictionaryFile : dictionaryFiles) {
      FileUtils.copyFile(dictionaryFile, new File(starTreeDir, dictionaryFile.getName()));
    }

    // Write star tree metadata
    writeMetadata(starTreeDir, starTreeBuilder.getTotalAggregateDocumentCount());
  }