Esempio n. 1
0
 @Override
 public int getLength() {
   return column.size();
 }
  @Override
  public void writeIndexes(List<IntBuffer> segmentRowNumConversions, Closer closer)
      throws IOException {
    final SerializerUtils serializerUtils = new SerializerUtils();
    long dimStartTime = System.currentTimeMillis();

    String bmpFilename = String.format("%s.inverted", dimensionName);
    bitmapWriter =
        new GenericIndexedWriter<>(
            ioPeon, bmpFilename, indexSpec.getBitmapSerdeFactory().getObjectStrategy());
    bitmapWriter.open();

    final MappedByteBuffer dimValsMapped = Files.map(dictionaryFile);
    closer.register(
        new Closeable() {
          @Override
          public void close() throws IOException {
            ByteBufferUtils.unmap(dimValsMapped);
          }
        });

    if (!dimensionName.equals(serializerUtils.readString(dimValsMapped))) {
      throw new ISE("dimensions[%s] didn't equate!?  This is a major WTF moment.", dimensionName);
    }
    Indexed<String> dimVals = GenericIndexed.read(dimValsMapped, GenericIndexed.STRING_STRATEGY);
    log.info("Starting dimension[%s] with cardinality[%,d]", dimensionName, dimVals.size());

    final BitmapSerdeFactory bitmapSerdeFactory = indexSpec.getBitmapSerdeFactory();
    final BitmapFactory bitmapFactory = bitmapSerdeFactory.getBitmapFactory();

    RTree tree = null;
    spatialWriter = null;
    boolean hasSpatial = capabilities.hasSpatialIndexes();
    spatialIoPeon = new TmpFileIOPeon();
    if (hasSpatial) {
      BitmapFactory bmpFactory = bitmapSerdeFactory.getBitmapFactory();
      String spatialFilename = String.format("%s.spatial", dimensionName);
      spatialWriter =
          new ByteBufferWriter<ImmutableRTree>(
              spatialIoPeon,
              spatialFilename,
              new IndexedRTree.ImmutableRTreeObjectStrategy(bmpFactory));
      spatialWriter.open();
      tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory);
    }

    IndexSeeker[] dictIdSeeker = toIndexSeekers(adapters, dimConversions, dimensionName);

    // Iterate all dim values's dictionary id in ascending order which in line with dim values's
    // compare result.
    for (int dictId = 0; dictId < dimVals.size(); dictId++) {
      progress.progress();
      List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(adapters.size());
      for (int j = 0; j < adapters.size(); ++j) {
        int seekedDictId = dictIdSeeker[j].seek(dictId);
        if (seekedDictId != IndexSeeker.NOT_EXIST) {
          convertedInverteds.add(
              new ConvertingIndexedInts(
                  adapters.get(j).getBitmapIndex(dimensionName, seekedDictId),
                  segmentRowNumConversions.get(j)));
        }
      }

      MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap();
      for (Integer row :
          CombiningIterable.createSplatted(
              convertedInverteds, Ordering.<Integer>natural().nullsFirst())) {
        if (row != IndexMerger.INVALID_ROW) {
          bitset.add(row);
        }
      }
      if ((dictId == 0) && (Iterables.getFirst(dimVals, "") == null)) {
        bitset.or(nullRowsBitmap);
      }

      bitmapWriter.write(bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset));

      if (hasSpatial) {
        String dimVal = dimVals.get(dictId);
        if (dimVal != null) {
          List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
          float[] coords = new float[stringCoords.size()];
          for (int j = 0; j < coords.length; j++) {
            coords[j] = Float.valueOf(stringCoords.get(j));
          }
          tree.insert(coords, bitset);
        }
      }
    }

    log.info(
        "Completed dimension[%s] in %,d millis.",
        dimensionName, System.currentTimeMillis() - dimStartTime);

    if (hasSpatial) {
      spatialWriter.write(ImmutableRTree.newImmutableFromMutable(tree));
    }
  }
Esempio n. 3
0
  @Override
  public void validateSortedEncodedArrays(
      int[] lhs, int[] rhs, Indexed<String> lhsEncodings, Indexed<String> rhsEncodings)
      throws SegmentValidationException {
    if (lhs == null || rhs == null) {
      if (lhs != rhs) {
        throw new SegmentValidationException(
            "Expected nulls, found %s and %s", Arrays.toString(lhs), Arrays.toString(rhs));
      } else {
        return;
      }
    }

    int lhsLen = Array.getLength(lhs);
    int rhsLen = Array.getLength(rhs);

    if (lhsLen != rhsLen) {
      // Might be OK if one of them has null. This occurs in IndexMakerTest
      if (lhsLen == 0 && rhsLen == 1) {
        final String dimValName = rhsEncodings.get(rhs[0]);
        if (dimValName == null) {
          return;
        } else {
          throw new SegmentValidationException(
              "Dim [%s] value [%s] is not null", dimensionName, dimValName);
        }
      } else if (rhsLen == 0 && lhsLen == 1) {
        final String dimValName = lhsEncodings.get(lhs[0]);
        if (dimValName == null) {
          return;
        } else {
          throw new SegmentValidationException(
              "Dim [%s] value [%s] is not null", dimensionName, dimValName);
        }
      } else {
        throw new SegmentValidationException(
            "Dim [%s] value lengths not equal. Expected %d found %d",
            dimensionName, lhsLen, rhsLen);
      }
    }

    for (int j = 0; j < Math.max(lhsLen, rhsLen); ++j) {
      final int dIdex1 = lhsLen <= j ? -1 : lhs[j];
      final int dIdex2 = rhsLen <= j ? -1 : rhs[j];

      final String dim1ValName = dIdex1 < 0 ? null : lhsEncodings.get(dIdex1);
      final String dim2ValName = dIdex2 < 0 ? null : rhsEncodings.get(dIdex2);
      if ((dim1ValName == null) || (dim2ValName == null)) {
        if ((dim1ValName == null) && (dim2ValName == null)) {
          continue;
        } else {
          throw new SegmentValidationException(
              "Dim [%s] value not equal. Expected [%s] found [%s]",
              dimensionName, dim1ValName, dim2ValName);
        }
      }

      if (!dim1ValName.equals(dim2ValName)) {
        throw new SegmentValidationException(
            "Dim [%s] value not equal. Expected [%s] found [%s]",
            dimensionName, dim1ValName, dim2ValName);
      }
    }
  }