/** Returns the dimension values, in order of tree level, to this node. */ public Map<Integer, Integer> getPathValues() { StarTreeIndexNode current = this; Map<Integer, Integer> values = new HashMap<Integer, Integer>(); while (current != null && current.getParent() != null) { values.put(current.getDimensionName(), current.getDimensionValue()); current = current.getParent(); } return values; }
public StarTreeIndexNode getMatchingNode(StarTreeIndexNode node, List<Integer> dimensions) { if (node == null || node.isLeaf()) { return node; } Integer childDimensionName = node.getChildDimensionName(); Integer childDimensionValue = dimensions.get(childDimensionName); StarTreeIndexNode child = node.getChildren().get(childDimensionValue); return getMatchingNode(child, dimensions); }
/** Returns the dimension IDs, in order of tree level, to this node. */ public List<Integer> getPathDimensions() { StarTreeIndexNode current = this; List<Integer> dimensions = new LinkedList<Integer>(); while (current != null && current.getParent() != null) { dimensions.add(0, current.getDimensionName()); current = current.getParent(); } return dimensions; }
/** Collects all the StarTree leaves that match the provided dimension values */ private void findMatchingLeaves( StarTreeIndexNode node, List<Integer> values, Set<StarTreeIndexNode> leaves) { if (node.isLeaf()) { leaves.add(node); } else { Integer value = values.get(node.getChildDimensionName()); findMatchingLeaves(node.getChildren().get(value), values, leaves); findMatchingLeaves(node.getChildren().get(StarTreeIndexNode.all()), values, leaves); } }
public static void printTree(StarTreeIndexNode node, int level) { for (int i = 0; i < level; i++) { System.out.print(" "); } System.out.println(node); if (!node.isLeaf()) { for (StarTreeIndexNode child : node.getChildren().values()) { printTree(child, level + 1); } } }
/** Initialize dictionaries for dimension values */ private void initializeAndBuildDictionaries( Schema schema, Map<String, ColumnIndexCreationInfo> columnInfo, File file) throws Exception { for (final FieldSpec spec : schema.getAllFieldSpecs()) { final ColumnIndexCreationInfo info = columnInfo.get(spec.getName()); if (info.isCreateDictionary()) { dictionaryCreatorMap.put( spec.getName(), new SegmentDictionaryCreator( info.hasNulls(), info.getSortedUniqueElementsArray(), spec, file)); } else { throw new RuntimeException("Creation of indices without dictionaries is not implemented!"); } dictionaryCreatorMap.get(spec.getName()).build(); } // Add __ALL__ to dimension dictionaries for (DimensionFieldSpec spec : schema.getDimensionFieldSpecs()) { Object allValue = StarTreeIndexNode.getAllValue(spec); if (schema.getFieldSpecFor(spec.getName()).isSingleValueField()) { Object allIndex = dictionaryCreatorMap.get(spec.getName()).indexOfSV(allValue); } else { Object allIndex = dictionaryCreatorMap.get(spec.getName()).indexOfMV(allValue); } } }
@Override public void indexRow(GenericRow row) { // Find matching leaves in StarTree for row currentMatchingNodes.clear(); StarTreeTableRow tableRow = extractValues(row); findMatchingLeaves(starTreeBuilder.getTree(), tableRow.getDimensions(), currentMatchingNodes); // Only write the raw value, maintaining sort order (we will write aggregates when sealing) for (StarTreeIndexNode node : currentMatchingNodes) { Map<Integer, Integer> pathValues = node.getPathValues(); if (!pathValues.containsValue(StarTreeIndexNode.all())) { StarTreeTableRange range = starTreeBuilder.getDocumentIdRange(node.getNodeId()); StarTreeTable subTable = starTreeBuilder.getTable().view(range.getStartDocumentId(), range.getDocumentCount()); Integer nextMatchingDocumentId = starTreeBuilder.getNextDocumentId(tableRow.getDimensions()); if (nextMatchingDocumentId == null) { throw new IllegalStateException("Could not assign document ID for row " + tableRow); } // Write using that document ID to all columns for (final String column : dictionaryCreatorMap.keySet()) { Object columnValueToIndex = row.getValue(column); if (schema.getFieldSpecFor(column).isSingleValueField()) { System.out.println(column + ": " + columnValueToIndex); int dictionaryIndex = dictionaryCreatorMap.get(column).indexOfSV(columnValueToIndex); ((SingleValueForwardIndexCreator) forwardIndexCreatorMap.get(column)) .index(nextMatchingDocumentId, dictionaryIndex); if (config.createInvertedIndexEnabled()) { invertedIndexCreatorMap .get(column) .add(nextMatchingDocumentId, (Object) dictionaryIndex); } } else { int[] dictionaryIndex = dictionaryCreatorMap.get(column).indexOfMV(columnValueToIndex); ((MultiValueForwardIndexCreator) forwardIndexCreatorMap.get(column)) .index(nextMatchingDocumentId, dictionaryIndex); if (config.createInvertedIndexEnabled()) { invertedIndexCreatorMap.get(column).add(nextMatchingDocumentId, dictionaryIndex); } } } } } }
@Override public boolean equals(Object o) { if (!(o instanceof StarTreeIndexNode)) { return false; } StarTreeIndexNode n = (StarTreeIndexNode) o; return Objects.equal(nodeId, n.getNodeId()) && Objects.equal(level, n.getLevel()) && Objects.equal(dimensionName, n.getDimensionName()) && Objects.equal(dimensionValue, n.getDimensionValue()) && Objects.equal(childDimensionName, n.getChildDimensionName()) && Objects.equal(children, n.getChildren()) && Objects.equal(startDocumentId, n.getStartDocumentId()) && Objects.equal(documentCount, n.getDocumentCount()); }
public static com.linkedin.pinot.core.indexsegment.IndexSegment load( File indexDir, ReadMode readMode, IndexLoadingConfigMetadata indexLoadingConfigMetadata) throws Exception { SegmentMetadataImpl metadata = new SegmentMetadataImpl(indexDir); if (!metadata .getVersion() .equalsIgnoreCase(IndexSegmentImpl.EXPECTED_SEGMENT_VERSION.toString())) { SegmentVersion from = SegmentVersion.valueOf(metadata.getVersion()); SegmentVersion to = SegmentVersion.valueOf(IndexSegmentImpl.EXPECTED_SEGMENT_VERSION.toString()); LOGGER.info( "segment:{} needs to be converted from :{} to {} version.", indexDir.getName(), from, to); SegmentFormatConverter converter = SegmentFormatConverterFactory.getConverter(from, to); LOGGER.info("Using converter:{} to up-convert the format", converter.getClass().getName()); converter.convert(indexDir); LOGGER.info( "Successfully up-converted segment:{} from :{} to {} version.", indexDir.getName(), from, to); } Map<String, ColumnIndexContainer> indexContainerMap = new HashMap<String, ColumnIndexContainer>(); for (String column : metadata.getColumnMetadataMap().keySet()) { indexContainerMap.put( column, ColumnIndexContainer.init( column, indexDir, metadata.getColumnMetadataFor(column), indexLoadingConfigMetadata, readMode)); } // The star tree index (if available) StarTreeIndexNode starTreeRoot = null; if (metadata.hasStarTree()) { File starTreeFile = new File(indexDir, V1Constants.STARTREE_FILE); LOGGER.debug("Loading star tree index file {}", starTreeFile); starTreeRoot = StarTreeIndexNode.fromBytes(new FileInputStream(starTreeFile)); } return new IndexSegmentImpl(indexDir, metadata, indexContainerMap, starTreeRoot); }
@Override public void seal() throws ConfigurationException, IOException { // Write all the aggregate rows to the aggregate segment LOG.info("Writing aggregate segment..."); long startMillis = System.currentTimeMillis(); int currentAggregateDocumentId = 0; Iterator<StarTreeTableRow> itr = starTreeBuilder.getTable().getAllCombinations(); while (itr.hasNext()) { StarTreeTableRow next = itr.next(); if (next.getDimensions().contains(StarTreeIndexNode.all())) { // Write using that document ID to all columns for (final String column : dictionaryCreatorMap.keySet()) { Object dictionaryIndex = null; // TODO: Is this okay? if (starTreeDimensionDictionary.containsKey(column)) { // Index the dimension value Integer dimensionId = starTreeDimensionDictionary.get(column); Integer dimensionValue = next.getDimensions().get(dimensionId); if (dimensionValue == StarTreeIndexNode.all()) { // Use all value Object allValue = StarTreeIndexNode.getAllValue(schema.getFieldSpecFor(column)); if (schema.getFieldSpecFor(column).isSingleValueField()) { dictionaryIndex = dictionaryCreatorMap.get(column).indexOfSV(allValue); } else { dictionaryIndex = dictionaryCreatorMap.get(column).indexOfMV(allValue); } } else { dictionaryIndex = dimensionValue; } } else if (starTreeMetricDictionary.containsKey(column)) { // Index the aggregate metric Integer metricId = starTreeMetricDictionary.get(column); Object columnValueToIndex = next.getMetrics().get(metricId); if (schema.getFieldSpecFor(column).isSingleValueField()) { dictionaryIndex = dictionaryCreatorMap.get(column).indexOfSV(columnValueToIndex); } else { dictionaryIndex = dictionaryCreatorMap.get(column).indexOfMV(columnValueToIndex); } } else { // Just index the raw value Object columnValueToIndex = StarTreeIndexNode.getAllValue(schema.getFieldSpecFor(column)); if (schema.getFieldSpecFor(column).isSingleValueField()) { dictionaryIndex = dictionaryCreatorMap.get(column).indexOfSV(columnValueToIndex); } else { dictionaryIndex = dictionaryCreatorMap.get(column).indexOfMV(columnValueToIndex); } } if (schema.getFieldSpecFor(column).isSingleValueField()) { ((SingleValueForwardIndexCreator) aggregateForwardIndexCreatorMap.get(column)) .index(currentAggregateDocumentId, (Integer) dictionaryIndex); } else { ((MultiValueForwardIndexCreator) aggregateForwardIndexCreatorMap.get(column)) .index(currentAggregateDocumentId, (int[]) dictionaryIndex); } if (config.createInvertedIndexEnabled()) { aggregateInvertedIndexCreatorMap .get(column) .add(currentAggregateDocumentId, dictionaryIndex); } } currentAggregateDocumentId++; } } long endMillis = System.currentTimeMillis(); LOG.info("Done writing aggregate segment (took {} ms)", endMillis - startMillis); for (final String column : forwardIndexCreatorMap.keySet()) { forwardIndexCreatorMap.get(column).close(); if (config.createInvertedIndexEnabled()) { invertedIndexCreatorMap.get(column).seal(); } dictionaryCreatorMap.get(column).close(); } for (final String column : aggregateForwardIndexCreatorMap.keySet()) { aggregateForwardIndexCreatorMap.get(column).close(); if (config.createInvertedIndexEnabled()) { aggregateInvertedIndexCreatorMap.get(column).seal(); } // n.b. The dictionary from raw data is used } writeMetadata(outDir, starTreeBuilder.getTotalRawDocumentCount()); // Write star tree LOG.info("Writing " + V1Constants.STARTREE_FILE); startMillis = System.currentTimeMillis(); File starTreeFile = new File(starTreeDir, V1Constants.STARTREE_FILE); OutputStream starTreeOutputStream = new FileOutputStream(starTreeFile); starTreeBuilder.getTree().writeTree(starTreeOutputStream); starTreeOutputStream.close(); endMillis = System.currentTimeMillis(); LOG.info("Wrote StarTree file (took {} ms)", endMillis - startMillis); // Copy the dictionary files into startree directory // n.b. this is done so the segment is as stand-alone as possible, though could be removed as an // optimization File[] dictionaryFiles = outDir.listFiles( new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.endsWith(V1Constants.Dict.FILE_EXTENTION); } }); for (File dictionaryFile : dictionaryFiles) { FileUtils.copyFile(dictionaryFile, new File(starTreeDir, dictionaryFile.getName())); } // Write star tree metadata writeMetadata(starTreeDir, starTreeBuilder.getTotalAggregateDocumentCount()); }