Пример #1
0
  public static void writeFlamdex(final FlamdexReader fdx, final FlamdexWriter w)
      throws IOException {
    final DocIdStream dis = fdx.getDocIdStream();
    final int[] docIdBuf = new int[DOC_ID_BUFFER_SIZE];

    for (final String intField : fdx.getIntFields()) {
      final IntFieldWriter ifw = w.getIntFieldWriter(intField);
      final IntTermIterator iter = fdx.getIntTermIterator(intField);
      while (iter.next()) {
        ifw.nextTerm(iter.term());
        dis.reset(iter);
        while (true) {
          final int n = dis.fillDocIdBuffer(docIdBuf);
          for (int i = 0; i < n; ++i) {
            ifw.nextDoc(docIdBuf[i]);
          }
          if (n < docIdBuf.length) break;
        }
      }
      iter.close();
      ifw.close();
    }

    for (final String stringField : fdx.getStringFields()) {
      final StringFieldWriter sfw = w.getStringFieldWriter(stringField);
      final StringTermIterator iter = fdx.getStringTermIterator(stringField);
      while (iter.next()) {
        sfw.nextTerm(iter.term());
        dis.reset(iter);
        while (true) {
          final int n = dis.fillDocIdBuffer(docIdBuf);
          for (int i = 0; i < n; ++i) {
            sfw.nextDoc(docIdBuf[i]);
          }
          if (n < docIdBuf.length) break;
        }
      }
      iter.close();
      sfw.close();
    }

    dis.close();
    w.close();
  }
Пример #2
0
  public static void merge(FlamdexReader[] readers, FlamdexWriter w) throws IOException {
    final DocIdStream[] docIdStreams = new DocIdStream[readers.length];
    final int[] segmentStartDocs = new int[readers.length];
    int totalNumDocs = 0;
    for (int i = 0; i < readers.length; ++i) {
      docIdStreams[i] = readers[i].getDocIdStream();
      segmentStartDocs[i] = totalNumDocs;
      totalNumDocs += readers[i].getNumDocs();
    }

    log.info("merging " + readers.length + " readers with a total of " + totalNumDocs + " docs");

    final int[] indexBuf = new int[readers.length];
    final int[] docIdBuf = new int[64];

    for (final String intField : mergeIntFields(readers)) {
      final IntFieldWriter ifw = w.getIntFieldWriter(intField);

      final IntTermIteratorWrapper[] iterators = new IntTermIteratorWrapper[readers.length];
      final IndirectPriorityQueue<IntTermIteratorWrapper> pq =
          new ObjectHeapSemiIndirectPriorityQueue<IntTermIteratorWrapper>(
              iterators, iterators.length);
      for (int i = 0; i < readers.length; ++i) {
        if (!readers[i].getIntFields().contains(intField)) continue;
        final IntTermIterator it = readers[i].getIntTermIterator(intField);
        if (it.next()) {
          iterators[i] = new IntTermIteratorWrapper(it, i);
          pq.enqueue(i);
        } else {
          it.close();
        }
      }

      while (!pq.isEmpty()) {
        final long term = iterators[pq.first()].it.term();
        int numIndexes = 0;
        IntTermIteratorWrapper wrap;
        while (!pq.isEmpty() && (wrap = iterators[pq.first()]).it.term() == term) {
          final int index = wrap.index;
          docIdStreams[index].reset(wrap.it);
          indexBuf[numIndexes++] = index;
          if (wrap.it.next()) {
            pq.changed();
          } else {
            wrap.it.close();
            pq.dequeue();
          }
        }

        ifw.nextTerm(term);
        for (int i = 0; i < numIndexes; ++i) {
          final int index = indexBuf[i];
          final int startDoc = segmentStartDocs[index];
          final DocIdStream dis = docIdStreams[index];
          while (true) {
            final int n = dis.fillDocIdBuffer(docIdBuf);

            for (int j = 0; j < n; ++j) {
              ifw.nextDoc(docIdBuf[j] + startDoc);
            }

            if (n < docIdBuf.length) break;
          }
        }
      }

      ifw.close();
    }

    for (final String stringField : mergeStringFields(readers)) {
      final StringFieldWriter sfw = w.getStringFieldWriter(stringField);

      final StringTermIteratorWrapper[] iterators = new StringTermIteratorWrapper[readers.length];
      final IndirectPriorityQueue<StringTermIteratorWrapper> pq =
          new ObjectHeapSemiIndirectPriorityQueue<StringTermIteratorWrapper>(
              iterators, iterators.length);
      for (int i = 0; i < readers.length; ++i) {
        if (!readers[i].getStringFields().contains(stringField)) continue;
        final StringTermIterator it = readers[i].getStringTermIterator(stringField);
        if (it.next()) {
          iterators[i] = new StringTermIteratorWrapper(it, i);
          pq.enqueue(i);
        } else {
          it.close();
        }
      }

      while (!pq.isEmpty()) {
        final String term = iterators[pq.first()].it.term();
        int numIndexes = 0;
        StringTermIteratorWrapper wrap;
        while (!pq.isEmpty() && (wrap = iterators[pq.first()]).it.term().equals(term)) {
          final int index = wrap.index;
          docIdStreams[index].reset(wrap.it);
          indexBuf[numIndexes++] = index;
          if (wrap.it.next()) {
            pq.changed();
          } else {
            wrap.it.close();
            pq.dequeue();
          }
        }

        sfw.nextTerm(term);
        for (int i = 0; i < numIndexes; ++i) {
          final int index = indexBuf[i];
          final int startDoc = segmentStartDocs[index];
          final DocIdStream dis = docIdStreams[index];
          while (true) {
            final int n = dis.fillDocIdBuffer(docIdBuf);

            for (int j = 0; j < n; ++j) {
              sfw.nextDoc(docIdBuf[j] + startDoc);
            }

            if (n < docIdBuf.length) break;
          }
        }
      }

      sfw.close();
    }

    for (final DocIdStream dis : docIdStreams) {
      dis.close();
    }
  }