コード例 #1
0
ファイル: DocReconstructor.java プロジェクト: belle96/luke
 /**
  * Prepare a document reconstructor.
  *
  * @param reader IndexReader to read from.
  * @param fieldNames if non-null or not empty, data will be collected only from these fields,
  *     otherwise data will be collected from all fields
  * @param numTerms total number of terms in the index, or -1 if unknown (will be calculated)
  * @throws Exception
  */
 public DocReconstructor(IndexReader reader, String[] fieldNames, int numTerms) throws Exception {
   if (reader == null) {
     throw new Exception("IndexReader cannot be null.");
   }
   this.reader = reader;
   if (fieldNames == null || fieldNames.length == 0) {
     // collect fieldNames
     this.fieldNames = (String[]) reader.getFieldNames(FieldOption.ALL).toArray(new String[0]);
   } else {
     this.fieldNames = fieldNames;
   }
   if (numTerms == -1) {
     Fields fields = MultiFields.getFields(reader);
     numTerms = 0;
     FieldsEnum fe = fields.iterator();
     String fld = null;
     while ((fld = fe.next()) != null) {
       TermsEnum te = fe.terms();
       while (te.next() != null) {
         numTerms++;
       }
     }
     this.numTerms = numTerms;
   }
   deleted = MultiFields.getDeletedDocs(reader);
 }
コード例 #2
0
  /*
   *  listPostings displays the first n postings for a term in a
   *  field in an index (specified by reader).  Set n to MAX_VALUE
   *  to display all postings.
   */
  static void listPostings(IndexReader reader, String termString, String field, Integer n)
      throws IOException {

    System.out.println("\nPostings:  " + termString + " " + field);

    /*
     *  Prepare to access the index.
     */
    BytesRef termBytes = new BytesRef(termString);
    Term term = new Term(field, termBytes);
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    /*
     *  Lookup the collection term frequency (ctf).
     */
    long df = reader.docFreq(term);
    System.out.println("\tdf:  " + df);

    long ctf = reader.totalTermFreq(term);
    System.out.println("\tctf:  " + ctf);

    if (df < 1) return;

    /*
     *  Lookup the inverted list.
     */
    DocsAndPositionsEnum postings =
        MultiFields.getTermPositionsEnum(reader, liveDocs, field, termBytes);

    /*
     *  Iterate through the first n postings.
     */
    long count = 0;

    while ((count < n) && (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)) {

      System.out.println("\tdocid: " + postings.docID());
      int tf = postings.freq();
      System.out.println("\ttf: " + tf);
      System.out.print("\tPositions: ");

      for (int j = 0; j < tf; j++) {
        int pos = postings.nextPosition();
        System.out.print(pos + " ");
      }

      System.out.println("");

      count++;
    }
    ;

    return;
  }
コード例 #3
0
  /*
   *  listTermDictionary displays the term dictionary for a field.
   */
  static void listTermDictionary(IndexReader reader, String fieldName) throws IOException {

    System.out.println("\nTerm Dictionary:  field " + fieldName);

    /*
      Grant says:
      MultiFields.getTerms(IndexReader, fieldName)
    */

    Terms terms = MultiFields.getTerms(reader, fieldName);

    if ((terms == null) || (terms.size() == -1))
      System.out.println("    The term dictionary is empty.");
    else {
      System.out.println("    Vocabulary size: " + terms.size() + " terms");

      TermsEnum ithTerm = terms.iterator(null);

      /*
       *  Iterate over the terms in this document.
       *  Information about a term's occurrences (tf and
       *  positions) is accessed via the indexing API, which
       *  returns inverted lists that describe (only) the
       *  current document.
       */
      while (ithTerm.next() != null) {
        System.out.format(
            "      %-30s %d %d\n",
            ithTerm.term().utf8ToString(), ithTerm.docFreq(), ithTerm.totalTermFreq());
      }
      ;
    }
    ;
  }
コード例 #4
0
  /**
   * Return a query that will return docs like the passed lucene document ID.
   *
   * @param docNum the documentID of the lucene doc to generate the 'More Like This" query for.
   * @return a query that will return docs like the passed lucene document ID.
   */
  public Query like(int docNum) throws IOException {
    if (fieldNames == null) {
      // gather list of valid fields from lucene
      Collection<String> fields = MultiFields.getIndexedFields(ir);
      fieldNames = fields.toArray(new String[fields.size()]);
    }

    return createQuery(retrieveTerms(docNum));
  }
コード例 #5
0
  /**
   * We assume that the initial indexing has been done and a set of reference objects has been found
   * and indexed in the separate directory. However further documents were added and they now need
   * to get a ranked list of reference objects. So we (i) get all these new documents missing the
   * field "ro-order" and (ii) add this field.
   *
   * @param indexPath the index to update
   * @throws IOException
   */
  public void updateIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    boolean hasDeletions = reader.hasDeletions();
    int countUpdated = 0;

    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1);
    perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField);

    IndexWriter iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      if (document.getField("ro-order") == null) { // if the field is not here we create it.
        ImageSearchHits hits = searcher.search(document, readerRo);
        sb.delete(0, sb.length());
        for (int j = 0; j < numReferenceObjectsUsed; j++) {
          sb.append(hits.doc(j).getValues("ro-id")[0]);
          sb.append(' ');
        }
        // System.out.println(sb.toString());
        document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
        iw.updateDocument(
            new Term(
                DocumentBuilder.FIELD_NAME_IDENTIFIER,
                document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
            document);
        countUpdated++;
      }

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

      // debug:
      System.out.println("countUpdated = " + countUpdated);
    }
    iw.commit();
    iw.close();
  }
コード例 #6
0
 private Fields generateTermVectors(
     Collection<GetField> getFields,
     boolean withOffsets,
     @Nullable Map<String, String> perFieldAnalyzer)
     throws IOException {
   /* store document in memory index */
   MemoryIndex index = new MemoryIndex(withOffsets);
   for (GetField getField : getFields) {
     String field = getField.getName();
     Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer);
     for (Object text : getField.getValues()) {
       index.addField(field, text.toString(), analyzer);
     }
   }
   /* and read vectors from it */
   return MultiFields.getFields(index.createSearcher().getIndexReader());
 }
コード例 #7
0
 public void testTermDocsEnum() throws Exception {
   Directory dir = newDirectory();
   IndexWriter w =
       new IndexWriter(
           dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
   Document d = new Document();
   d.add(newStringField("f", "j", Field.Store.NO));
   w.addDocument(d);
   w.commit();
   w.addDocument(d);
   IndexReader r = w.getReader();
   w.close();
   DocsEnum de = MultiFields.getTermDocsEnum(r, null, "f", new BytesRef("j"));
   assertEquals(0, de.nextDoc());
   assertEquals(1, de.nextDoc());
   assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
   r.close();
   dir.close();
 }
コード例 #8
0
ファイル: DocReconstructor.java プロジェクト: belle96/luke
 /**
  * Reconstruct document fields.
  *
  * @param docNum document number. If this document is deleted, but the index is not optimized yet,
  *     the reconstruction process may still yield the reconstructed field content even from
  *     deleted documents.
  * @return reconstructed document
  * @throws Exception
  */
 public Reconstructed reconstruct(int docNum) throws Exception {
   if (docNum < 0 || docNum > reader.maxDoc()) {
     throw new Exception("Document number outside of valid range.");
   }
   Reconstructed res = new Reconstructed();
   if (deleted != null && deleted.get(docNum)) {
     throw new Exception("Document is deleted.");
   } else {
     Document doc = reader.document(docNum);
     for (int i = 0; i < fieldNames.length; i++) {
       Field[] fs = doc.getFields(fieldNames[i]);
       if (fs != null && fs.length > 0) {
         res.getStoredFields().put(fieldNames[i], fs);
       }
     }
   }
   // collect values from unstored fields
   HashSet<String> fields = new HashSet<String>(Arrays.asList(fieldNames));
   // try to use term vectors if available
   progress.maxValue = fieldNames.length;
   progress.curValue = 0;
   progress.minValue = 0;
   for (int i = 0; i < fieldNames.length; i++) {
     TermFreqVector tvf = reader.getTermFreqVector(docNum, fieldNames[i]);
     if (tvf != null && tvf.size() > 0 && (tvf instanceof TermPositionVector)) {
       TermPositionVector tpv = (TermPositionVector) tvf;
       progress.message = "Reading term vectors ...";
       progress.curValue = i;
       setChanged();
       notifyObservers(progress);
       BytesRef[] tv = tpv.getTerms();
       for (int k = 0; k < tv.length; k++) {
         // do we have positions?
         int[] posArr = tpv.getTermPositions(k);
         if (posArr == null) {
           // only offsets
           TermVectorOffsetInfo[] offsets = tpv.getOffsets(k);
           if (offsets.length == 0) {
             continue;
           }
           // convert offsets into positions
           posArr = convertOffsets(offsets);
         }
         GrowableStringArray gsa = res.getReconstructedFields().get(fieldNames[i]);
         if (gsa == null) {
           gsa = new GrowableStringArray();
           res.getReconstructedFields().put(fieldNames[i], gsa);
         }
         for (int m = 0; m < posArr.length; m++) {
           gsa.append(posArr[m], "|", tv[k].utf8ToString());
         }
       }
       fields.remove(fieldNames[i]); // got what we wanted
     }
   }
   // this loop collects data only from left-over fields
   // not yet collected through term vectors
   progress.maxValue = fields.size();
   progress.curValue = 0;
   progress.minValue = 0;
   for (String fld : fields) {
     progress.message = "Collecting terms in " + fld + " ...";
     progress.curValue++;
     setChanged();
     notifyObservers(progress);
     Terms terms = MultiFields.getTerms(reader, fld);
     if (terms == null) { // no terms in this field
       continue;
     }
     TermsEnum te = terms.iterator();
     while (te.next() != null) {
       DocsAndPositionsEnum dpe = te.docsAndPositions(deleted, null);
       if (dpe == null) { // no position info for this field
         break;
       }
       int num = dpe.advance(docNum);
       if (num != docNum) { // either greater than or NO_MORE_DOCS
         continue; // no data for this term in this doc
       }
       String term = te.term().utf8ToString();
       GrowableStringArray gsa = (GrowableStringArray) res.getReconstructedFields().get(fld);
       if (gsa == null) {
         gsa = new GrowableStringArray();
         res.getReconstructedFields().put(fld, gsa);
       }
       for (int k = 0; k < dpe.freq(); k++) {
         int pos = dpe.nextPosition();
         gsa.append(pos, "|", term);
       }
     }
   }
   progress.message = "Done.";
   progress.curValue = 100;
   setChanged();
   notifyObservers(progress);
   return res;
 }
コード例 #9
0
  public void testRandom() throws Exception {

    int num = atLeast(2);
    for (int iter = 0; iter < num; iter++) {
      if (VERBOSE) {
        System.out.println("TEST: iter=" + iter);
      }

      Directory dir = newDirectory();

      IndexWriter w =
          new IndexWriter(
              dir,
              newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
                  .setMergePolicy(NoMergePolicy.COMPOUND_FILES));
      _TestUtil.keepFullyDeletedSegments(w);

      Map<BytesRef, List<Integer>> docs = new HashMap<BytesRef, List<Integer>>();
      Set<Integer> deleted = new HashSet<Integer>();
      List<BytesRef> terms = new ArrayList<BytesRef>();

      int numDocs = _TestUtil.nextInt(random(), 1, 100 * RANDOM_MULTIPLIER);
      Document doc = new Document();
      Field f = newStringField("field", "", Field.Store.NO);
      doc.add(f);
      Field id = newStringField("id", "", Field.Store.NO);
      doc.add(id);

      boolean onlyUniqueTerms = random().nextBoolean();
      if (VERBOSE) {
        System.out.println("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
      }
      Set<BytesRef> uniqueTerms = new HashSet<BytesRef>();
      for (int i = 0; i < numDocs; i++) {

        if (!onlyUniqueTerms && random().nextBoolean() && terms.size() > 0) {
          // re-use existing term
          BytesRef term = terms.get(random().nextInt(terms.size()));
          docs.get(term).add(i);
          f.setStringValue(term.utf8ToString());
        } else {
          String s = _TestUtil.randomUnicodeString(random(), 10);
          BytesRef term = new BytesRef(s);
          if (!docs.containsKey(term)) {
            docs.put(term, new ArrayList<Integer>());
          }
          docs.get(term).add(i);
          terms.add(term);
          uniqueTerms.add(term);
          f.setStringValue(s);
        }
        id.setStringValue("" + i);
        w.addDocument(doc);
        if (random().nextInt(4) == 1) {
          w.commit();
        }
        if (i > 0 && random().nextInt(20) == 1) {
          int delID = random().nextInt(i);
          deleted.add(delID);
          w.deleteDocuments(new Term("id", "" + delID));
          if (VERBOSE) {
            System.out.println("TEST: delete " + delID);
          }
        }
      }

      if (VERBOSE) {
        List<BytesRef> termsList = new ArrayList<BytesRef>(uniqueTerms);
        Collections.sort(termsList, BytesRef.getUTF8SortedAsUTF16Comparator());
        System.out.println("TEST: terms in UTF16 order:");
        for (BytesRef b : termsList) {
          System.out.println("  " + UnicodeUtil.toHexString(b.utf8ToString()) + " " + b);
          for (int docID : docs.get(b)) {
            if (deleted.contains(docID)) {
              System.out.println("    " + docID + " (deleted)");
            } else {
              System.out.println("    " + docID);
            }
          }
        }
      }

      IndexReader reader = w.getReader();
      w.close();
      if (VERBOSE) {
        System.out.println("TEST: reader=" + reader);
      }

      Bits liveDocs = MultiFields.getLiveDocs(reader);
      for (int delDoc : deleted) {
        assertFalse(liveDocs.get(delDoc));
      }

      for (int i = 0; i < 100; i++) {
        BytesRef term = terms.get(random().nextInt(terms.size()));
        if (VERBOSE) {
          System.out.println(
              "TEST: seek term=" + UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
        }

        DocsEnum docsEnum = _TestUtil.docs(random(), reader, "field", term, liveDocs, null, 0);
        assertNotNull(docsEnum);

        for (int docID : docs.get(term)) {
          if (!deleted.contains(docID)) {
            assertEquals(docID, docsEnum.nextDoc());
          }
        }
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
      }

      reader.close();
      dir.close();
    }
  }
コード例 #10
0
  public TermVectorResponse getTermVector(TermVectorRequest request, String concreteIndex) {
    final Engine.Searcher searcher = indexShard.acquireSearcher("term_vector");
    IndexReader topLevelReader = searcher.reader();
    final TermVectorResponse termVectorResponse =
        new TermVectorResponse(concreteIndex, request.type(), request.id());

    final Term uidTerm =
        new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(request.type(), request.id()));
    Engine.GetResult get = indexShard.get(new Engine.Get(request.realtime(), uidTerm));
    boolean docFromTranslog = get.source() != null;
    AggregatedDfs dfs = null;

    /* fetched from translog is treated as an artificial document */
    if (docFromTranslog) {
      request.doc(get.source().source, false);
      termVectorResponse.setDocVersion(get.version());
    }

    /* handle potential wildcards in fields */
    if (request.selectedFields() != null) {
      handleFieldWildcards(request);
    }

    try {
      Fields topLevelFields = MultiFields.getFields(topLevelReader);
      Versions.DocIdAndVersion docIdAndVersion = get.docIdAndVersion();
      /* from an artificial document */
      if (request.doc() != null) {
        Fields termVectorsByField = generateTermVectorsFromDoc(request, !docFromTranslog);
        // if no document indexed in shard, take the queried document itself for stats
        if (topLevelFields == null) {
          topLevelFields = termVectorsByField;
        }
        if (termVectorsByField != null && useDfs(request)) {
          dfs = getAggregatedDfs(termVectorsByField, request);
        }
        termVectorResponse.setFields(
            termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields, dfs);
        termVectorResponse.setExists(true);
        termVectorResponse.setArtificial(!docFromTranslog);
      }
      /* or from an existing document */
      else if (docIdAndVersion != null) {
        // fields with stored term vectors
        Fields termVectorsByField =
            docIdAndVersion.context.reader().getTermVectors(docIdAndVersion.docId);
        Set<String> selectedFields = request.selectedFields();
        // generate tvs for fields where analyzer is overridden
        if (selectedFields == null && request.perFieldAnalyzer() != null) {
          selectedFields = getFieldsToGenerate(request.perFieldAnalyzer(), termVectorsByField);
        }
        // fields without term vectors
        if (selectedFields != null) {
          termVectorsByField =
              addGeneratedTermVectors(get, termVectorsByField, request, selectedFields);
        }
        if (termVectorsByField != null && useDfs(request)) {
          dfs = getAggregatedDfs(termVectorsByField, request);
        }
        termVectorResponse.setFields(
            termVectorsByField, request.selectedFields(), request.getFlags(), topLevelFields, dfs);
        termVectorResponse.setDocVersion(docIdAndVersion.version);
        termVectorResponse.setExists(true);
      } else {
        termVectorResponse.setExists(false);
      }
    } catch (Throwable ex) {
      throw new ElasticsearchException("failed to execute term vector request", ex);
    } finally {
      searcher.close();
      get.release();
    }
    return termVectorResponse;
  }
コード例 #11
0
  /**
   * Creates a set of reference objects and stores it in a new index (name "<indexPath>-ro"). Then
   * creates ordered lists of reference object positions for each data item in the index with given
   * feature. Finally a new index (name "<indexPath>-ms") is created where all the original
   * documents as well as the new data are stored.
   *
   * @param indexPath the path to the original index
   * @throws IOException
   */
  public void createIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();

    if (numDocs < numReferenceObjects) {
      throw new UnsupportedOperationException("Too few documents in index.");
    }

    // progress report
    progress.setNumDocsAll(numDocs);
    progress.setCurrentState(State.RoSelection);

    boolean hasDeletions = reader.hasDeletions();

    // init reference objects:
    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true);
    HashSet<Integer> referenceObjsIds = new HashSet<Integer>(numReferenceObjects);

    double numDocsDouble = (double) numDocs;
    while (referenceObjsIds.size() < numReferenceObjects) {
      referenceObjsIds.add((int) (numDocsDouble * Math.random()));
    }
    int count = 0;

    if (hasDeletions) {
      System.err.println(
          "WARNING: There are deleted docs in your index. You should "
              + "optimize your index before using this method.");
    }

    // progress report
    progress.setCurrentState(State.RoIndexing);

    // find them in the index and put them into a separate index:
    for (int i : referenceObjsIds) {
      count++;
      Document document = reader.document(i);
      document.add(new Field("ro-id", count + "", StringField.TYPE_STORED));
      iw.addDocument(document);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Indexing);

    // now find the reference objects for each entry ;)
    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
    analyzerPerField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(
            new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField);

    iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      ImageSearchHits hits = searcher.search(document, readerRo);
      sb.delete(0, sb.length());
      for (int j = 0; j < numReferenceObjectsUsed; j++) {
        sb.append(hits.doc(j).getValues("ro-id")[0]);
        sb.append(' ');
      }
      // System.out.println(sb.toString());
      document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
      iw.updateDocument(
          new Term(
              DocumentBuilder.FIELD_NAME_IDENTIFIER,
              document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
          document);

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Idle);
  }
コード例 #12
0
  public static void main(String[] args) throws IOException {

    IndexReader reader = null;

    /*
     *  Opening the index first simplifies the processing of the
     *  rest of the command line arguments.
     */
    for (int i = 0; i < args.length; i++) {
      if (("-index".equals(args[i])) && ((i + 1) < args.length)) {
        reader = DirectoryReader.open(FSDirectory.open(new File(args[i + 1])));

        if (reader == null) {
          System.err.println("Error:  Can't open index " + args[i + 1]);
          System.exit(1);
        }
        ;

        break;
      }
      ;
    }
    ;

    if (reader == null) {
      System.err.println(usage);
      System.exit(1);
    }
    ;

    /*
     *  Process the command line arguments sequentially.
     */
    for (int i = 0; i < args.length; i++) {

      if ("-index".equals(args[i])) {

        /*
         *  Handled in the previous loop, so just skip the argument.
         */
        i++;

      } else if ("-list-edocid".equals(args[i])) {

        System.out.println("-list-edocid:");

        if ((i + 1) >= args.length) {
          System.out.println(usage);
          break;
        }
        ;

        Document d = reader.document(Integer.parseInt(args[i + 1]));

        System.out.println(
            "Internal docid --> External docid: " + args[i + 1] + " --> " + d.get("externalId"));

        i += 1;
      } else if ("-list-docids".equals(args[i])) {

        System.out.println("-list-docids:");

        for (int j = 0; j < reader.numDocs(); j++) {
          Document d = reader.document(j);
          System.out.println("Internal --> external docid: " + j + " --> " + d.get("externalId"));
        }
        ;

      } else if ("-list-fields".equals(args[i])) {

        Fields fields = MultiFields.getFields(reader);

        System.out.print("\nNumber of fields:  ");

        if (fields == null) System.out.println("0");
        else {
          System.out.println(fields.size());

          Iterator<String> is = fields.iterator();

          while (is.hasNext()) {
            System.out.println("\t" + is.next());
          }
          ;
        }
        ;

      } else if ("-list-postings".equals(args[i])) {

        if ((i + 2) >= args.length) {
          System.out.println(usage);
          break;
        }
        ;

        listPostings(reader, args[i + 1], args[i + 2], Integer.MAX_VALUE);
        i += 2;

      } else if ("-list-postings-sample".equals(args[i])) {

        if ((i + 2) >= args.length) {
          System.out.println(usage);
          break;
        }
        ;

        listPostings(reader, args[i + 1], args[i + 2], 5);
        i += 2;

      } else if ("-list-stats".equals(args[i])) {

        System.out.println("Corpus statistics:");
        System.out.println("\tnumdocs\t\t" + reader.numDocs());
        System.out.println(
            "\turl:\t"
                + "\tnumdocs="
                + reader.getDocCount("url")
                + "\tsumTotalTF="
                + reader.getSumTotalTermFreq("url")
                + "\tavglen="
                + reader.getSumTotalTermFreq("url") / (float) reader.getDocCount("url"));

        System.out.println(
            "\tkeywords:"
                + "\tnumdocs="
                + reader.getDocCount("keywords")
                + "\tsumTotalTF="
                + reader.getSumTotalTermFreq("keywords")
                + "\tavglen="
                + reader.getSumTotalTermFreq("keywords") / (float) reader.getDocCount("keywords"));

        System.out.println(
            "\ttitle:\t"
                + "\tnumdocs="
                + reader.getDocCount("title")
                + "\tsumTotalTF="
                + reader.getSumTotalTermFreq("title")
                + "\tavglen="
                + reader.getSumTotalTermFreq("title") / (float) reader.getDocCount("title"));

        System.out.println(
            "\tbody:\t"
                + "\tnumdocs="
                + reader.getDocCount("body")
                + "\tsumTotalTF="
                + reader.getSumTotalTermFreq("body")
                + "\tavglen="
                + reader.getSumTotalTermFreq("body") / (float) reader.getDocCount("body"));

        System.out.println(
            "\tinlink:\t"
                + "\tnumdocs="
                + reader.getDocCount("inlink")
                + "\tsumTotalTF="
                + reader.getSumTotalTermFreq("inlink")
                + "\tavglen="
                + reader.getSumTotalTermFreq("inlink") / (float) reader.getDocCount("inlink"));

      } else if ("-list-terms".equals(args[i])) {

        if ((i + 1) >= args.length) {
          System.out.println(usage);
          break;
        }
        ;

        listTermDictionary(reader, args[i + 1]);
        i += 1;

      } else if ("-list-termvector".equals(args[i])) {

        if ((i + 1) >= args.length) {
          System.out.println(usage);
          break;
        }
        ;

        listTermVectors(reader, args[i + 1]);
        i += 1;

      } else if ("-list-termvector-field".equals(args[i])) {

        if ((i + 2) >= args.length) {
          System.out.println(usage);
          break;
        }
        ;

        listTermVectorField(reader, args[i + 1], args[i + 2]);
        i += 2;

      } else System.err.println("\nWarning:  Unknown argument " + args[i] + " ignored.");
    }
    ;

    /*
     *  Close the index and exit gracefully.
     */
    reader.close();
  }