Exemple #1
0
    public boolean append(BlockCursor cursor) {
      // the extra BYTE here is for the null flag
      int writableBytes = sliceOutput.writableBytes() - SIZE_OF_BYTE;

      boolean isNull = cursor.isNull();

      if (type == Type.FIXED_INT_64) {
        if (writableBytes < SIZE_OF_LONG) {
          return false;
        }

        positionOffsets.add(sliceOutput.size());
        sliceOutput.writeByte(isNull ? 1 : 0);
        sliceOutput.appendLong(isNull ? 0 : cursor.getLong());
      } else if (type == Type.DOUBLE) {
        if (writableBytes < SIZE_OF_DOUBLE) {
          return false;
        }

        positionOffsets.add(sliceOutput.size());
        sliceOutput.writeByte(isNull ? 1 : 0);
        sliceOutput.appendDouble(isNull ? 0 : cursor.getDouble());
      } else if (type == Type.BOOLEAN) {
        if (writableBytes < SIZE_OF_BYTE) {
          return false;
        }

        positionOffsets.add(sliceOutput.size());
        sliceOutput.writeByte(isNull ? 1 : 0);
        sliceOutput.writeByte(!isNull && cursor.getBoolean() ? 1 : 0);
      } else if (type == Type.VARIABLE_BINARY) {
        int sliceLength =
            isNull ? 0 : getVariableBinaryLength(cursor.getRawSlice(), cursor.getRawOffset());
        if (writableBytes < SIZE_OF_INT + sliceLength) {
          return false;
        }

        int startingOffset = sliceOutput.size();
        positionOffsets.add(startingOffset);
        sliceOutput.writeByte(isNull ? 1 : 0);
        sliceOutput.appendInt(sliceLength + SIZE_OF_BYTE + SIZE_OF_INT);
        if (!isNull) {
          sliceOutput.writeBytes(
              cursor.getRawSlice(),
              cursor.getRawOffset() + SIZE_OF_BYTE + SIZE_OF_INT,
              sliceLength);
        }
      } else {
        throw new IllegalArgumentException("Unsupported type " + type);
      }
      return true;
    }
  public static void addField(
      String indexDir, String newFieldName, FlamdexReader docReader, final String[] values)
      throws IOException {
    final int[] indices = new int[docReader.getNumDocs()];
    for (int i = 0; i < indices.length; i++) {
      indices[i] = i;
    }
    log.debug("sorting");
    Quicksortables.sort(
        new Quicksortable() {
          @Override
          public void swap(int i, int j) {
            Quicksortables.swap(indices, i, j);
          }

          @Override
          public int compare(int i, int j) {
            // Sorting logic: Primarily by value (String), secondarily by document ID (indices[i])
            final String left = values[indices[i]];
            final String right = values[indices[j]];
            if (left.compareTo(right) < 0) {
              return -1;
            } else if (left.compareTo(right) > 0) {
              return 1;
            } else { // left == right
              if (indices[i] < indices[j]) {
                return -1;
              } else if (indices[i] > indices[j]) {
                return 1;
              } else {
                return 0; // Both value & doc ID match
              }
            }
          }
        },
        values.length);

    log.debug("writing field " + newFieldName);
    final SimpleFlamdexWriter w = new SimpleFlamdexWriter(indexDir, docReader.getNumDocs(), false);
    final StringFieldWriter sfw = w.getStringFieldWriter(newFieldName, true);
    final IntArrayList docList = new IntArrayList();
    docList.add(indices[0]);
    for (int i = 1; i < indices.length; ++i) {
      final String prev = values[indices[i - 1]];
      final String cur = values[indices[i]];
      if (cur.compareTo(prev) != 0) {
        sfw.nextTerm(prev);
        for (int j = 0; j < docList.size(); ++j) {
          sfw.nextDoc(docList.getInt(j));
        }
        docList.clear();
      }
      docList.add(indices[i]);
    }
    if (docList.size() > 0) {
      sfw.nextTerm(values[indices[indices.length - 1]]);
      for (int j = 0; j < docList.size(); ++j) {
        sfw.nextDoc(docList.getInt(j));
      }
    }

    sfw.close();
    w.close();
  }
  /**
   * loads multi-value facet data. This method uses a workarea to prepare loading.
   *
   * @param fieldName
   * @param reader
   * @param listFactory
   * @param workArea
   * @throws IOException
   */
  public void load(
      String fieldName, IndexReader reader, TermListFactory<T> listFactory, WorkArea workArea)
      throws IOException {
    long t0 = System.currentTimeMillis();
    int maxdoc = reader.maxDoc();
    BufferedLoader loader = getBufferedLoader(maxdoc, workArea);

    TermEnum tenum = null;
    TermDocs tdoc = null;
    TermValueList<T> list =
        (listFactory == null
            ? (TermValueList<T>) new TermStringList()
            : listFactory.createTermList());
    IntArrayList minIDList = new IntArrayList();
    IntArrayList maxIDList = new IntArrayList();
    IntArrayList freqList = new IntArrayList();
    OpenBitSet bitset = new OpenBitSet(maxdoc + 1);
    int negativeValueCount = getNegativeValueCount(reader, fieldName.intern());
    int t = 0; // current term number
    list.add(null);
    minIDList.add(-1);
    maxIDList.add(-1);
    freqList.add(0);
    t++;

    _overflow = false;
    try {
      tdoc = reader.termDocs();
      tenum = reader.terms(new Term(fieldName, ""));
      if (tenum != null) {
        do {
          Term term = tenum.term();
          if (term == null || !fieldName.equals(term.field())) break;

          String val = term.text();

          if (val != null) {
            list.add(val);

            tdoc.seek(tenum);
            // freqList.add(tenum.docFreq()); // removed because the df doesn't take into account
            // the num of deletedDocs
            int df = 0;
            int minID = -1;
            int maxID = -1;
            int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
            if (tdoc.next()) {
              df++;
              int docid = tdoc.doc();
              if (!loader.add(docid, valId)) logOverflow(fieldName);
              minID = docid;
              bitset.fastSet(docid);
              while (tdoc.next()) {
                df++;
                docid = tdoc.doc();

                if (!loader.add(docid, valId)) logOverflow(fieldName);
                bitset.fastSet(docid);
              }
              maxID = docid;
            }
            freqList.add(df);
            minIDList.add(minID);
            maxIDList.add(maxID);
          }

          t++;
        } while (tenum.next());
      }
    } finally {
      try {
        if (tdoc != null) {
          tdoc.close();
        }
      } finally {
        if (tenum != null) {
          tenum.close();
        }
      }
    }

    list.seal();

    try {
      _nestedArray.load(maxdoc + 1, loader);
    } catch (IOException e) {
      throw e;
    } catch (Exception e) {
      throw new RuntimeException("failed to load due to " + e.toString(), e);
    }

    this.valArray = list;
    this.freqs = freqList.toIntArray();
    this.minIDs = minIDList.toIntArray();
    this.maxIDs = maxIDList.toIntArray();

    int doc = 0;
    while (doc <= maxdoc && !_nestedArray.contains(doc, 0, true)) {
      ++doc;
    }
    if (doc <= maxdoc) {
      this.minIDs[0] = doc;
      doc = maxdoc;
      while (doc > 0 && !_nestedArray.contains(doc, 0, true)) {
        --doc;
      }
      if (doc > 0) {
        this.maxIDs[0] = doc;
      }
    }
    this.freqs[0] = maxdoc + 1 - (int) bitset.cardinality();
  }