Java IndexReader.document Examples

Programming Language: Java

Namespace/Package Name: org.apache.lucene.search

Class/Type: IndexReader

Method/Function: document

Examples at hotexamples.com: 3

Java IndexReader.document - 3 examples found. These are the top rated real world Java examples of org.apache.lucene.search.IndexReader.document extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

close(9)

maxDoc(6)

numDocs(5)

docFreq(3)

document(3)

getDocCount(2)

getSumTotalTermFreq(2)

getTermVector(2)

getTermVectors(2)

open(2)

totalTermFreq(2)

isDeleted(1)

termDocs(1)

Example #1

Show file

File: XMoreLikeThis.java Project: anywayjiong/elasticsearch

  /**
   * Find words for a more-like-this query former.
   *
   * @param docNum the id of the lucene document from which to find terms
   */
  private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
    Map<String, Int> termFreqMap = new HashMap<>();
    for (String fieldName : fieldNames) {
      final Fields vectors = ir.getTermVectors(docNum);
      final Terms vector;
      if (vectors != null) {
        vector = vectors.terms(fieldName);
      } else {
        vector = null;
      }

      // field does not store term vector info
      if (vector == null) {
        Document d = ir.document(docNum);
        IndexableField fields[] = d.getFields(fieldName);
        for (IndexableField field : fields) {
          final String stringValue = field.stringValue();
          if (stringValue != null) {
            addTermFrequencies(new FastStringReader(stringValue), termFreqMap, fieldName);
          }
        }
      } else {
        addTermFrequencies(termFreqMap, vector, fieldName);
      }
    }

    return createQueue(termFreqMap);
  }

Example #2

Show file

File: ChildrenQueryTests.java Project: Rjoydip/elasticsearch

  /**
   * Assert that the {@code scoreType} operates as expected and parents are found in the expected
   * order.
   *
   * <p>This will use the test index's parent/child types to create parents with multiple children.
   * Each child will have a randomly generated scored stored in {@link #CHILD_SCORE_NAME}, which is
   * used to score based on the {@code scoreType} by using a {@link MockScorer} to determine the
   * expected scores.
   *
   * @param scoreType The score type to use within the query to score parents relative to their
   *     children.
   * @throws IOException if any unexpected error occurs
   */
  private void assertScoreType(ScoreType scoreType) throws IOException {
    SearchContext context = SearchContext.current();
    Directory directory = newDirectory();
    IndexWriter writer =
        new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random())));

    // calculates the expected score per parent
    MockScorer scorer = new MockScorer(scoreType);
    scorer.scores = new FloatArrayList(10);

    // number of parents to generate
    int parentDocs = scaledRandomIntBetween(2, 10);
    // unique child ID
    int childDocId = 0;

    // Parent ID to expected score
    Map<String, Float> parentScores = new TreeMap<>();

    // Add a few random parents to ensure that the children's score is appropriately taken into
    // account
    for (int parentDocId = 0; parentDocId < parentDocs; ++parentDocId) {
      String parent = Integer.toString(parentDocId);

      // Create the parent
      Document parentDocument = new Document();

      parentDocument.add(
          new StringField(UidFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.YES));
      parentDocument.add(new StringField(IdFieldMapper.NAME, parent, Field.Store.YES));
      parentDocument.add(new StringField(TypeFieldMapper.NAME, "parent", Field.Store.NO));

      // add the parent to the index
      writer.addDocument(parentDocument);

      int numChildDocs = scaledRandomIntBetween(1, 10);

      // forget any parent's previous scores
      scorer.scores.clear();

      // associate children with the parent
      for (int i = 0; i < numChildDocs; ++i) {
        int childScore = random().nextInt(128);

        Document childDocument = new Document();

        childDocument.add(
            new StringField(
                UidFieldMapper.NAME,
                Uid.createUid("child", Integer.toString(childDocId++)),
                Field.Store.NO));
        childDocument.add(new StringField(TypeFieldMapper.NAME, "child", Field.Store.NO));
        // parent association:
        childDocument.add(
            new StringField(
                ParentFieldMapper.NAME, Uid.createUid("parent", parent), Field.Store.NO));
        childDocument.add(new DoubleField(CHILD_SCORE_NAME, childScore, Field.Store.NO));

        // remember the score to be calculated
        scorer.scores.add(childScore);

        // add the associated child to the index
        writer.addDocument(childDocument);
      }

      // this score that should be returned for this parent
      parentScores.put(parent, scorer.score());
    }

    writer.commit();

    IndexReader reader = DirectoryReader.open(writer, true);
    IndexSearcher searcher = new IndexSearcher(reader);

    // setup to read the parent/child map
    Engine.Searcher engineSearcher =
        new Engine.Searcher(ChildrenQueryTests.class.getSimpleName(), searcher);
    ((TestSearchContext) context).setSearcher(new ContextIndexSearcher(context, engineSearcher));

    // child query that returns the score as the value of "childScore" for each child document, with
    // the parent's score determined by the score type
    QueryBuilder childQueryBuilder =
        functionScoreQuery(typeFilter("child"))
            .add(new FieldValueFactorFunctionBuilder(CHILD_SCORE_NAME));
    QueryBuilder queryBuilder =
        hasChildQuery("child", childQueryBuilder)
            .scoreType(scoreType.name().toLowerCase(Locale.ENGLISH))
            .setShortCircuitCutoff(parentDocs);

    // Perform the search for the documents using the selected score type
    TopDocs docs = searcher.search(parseQuery(queryBuilder), parentDocs);
    assertThat("Expected all parents", docs.totalHits, is(parentDocs));

    // score should be descending (just a sanity check)
    float topScore = docs.scoreDocs[0].score;

    // ensure each score is returned as expected
    for (int i = 0; i < parentDocs; ++i) {
      ScoreDoc scoreDoc = docs.scoreDocs[i];
      // get the ID from the document to get its expected score; remove it so we cannot double-count
      // it
      float score = parentScores.remove(reader.document(scoreDoc.doc).get(IdFieldMapper.NAME));

      // expect exact match
      assertThat("Unexpected score", scoreDoc.score, is(score));
      assertThat("Not descending", score, lessThanOrEqualTo(topScore));

      // it had better keep descending
      topScore = score;
    }

    reader.close();
    writer.close();
    directory.close();
  }

Example #3

Show file

File: InspectIndex.java Project: laosiaudi/CMU-11642-Project

  public static void main(String[] args) throws IOException {

    IndexReader reader = null;

    /*
     *  Opening the index first simplifies the processing of the
     *  rest of the command line arguments.
     */
    for (int i = 0; i < args.length; i++) {
      if (("-index".equals(args[i])) && ((i + 1) < args.length)) {
        reader = DirectoryReader.open(FSDirectory.open(new File(args[i + 1])));

        if (reader == null) {
          System.err.println("Error:  Can't open index " + args[i + 1]);
          System.exit(1);
        }
        ;

        break;
      }
      ;
    }
    ;

    if (reader == null) {
      System.err.println(usage);
      System.exit(1);
    }
    ;

    /*
     *  Process the command line arguments sequentially.
     */
    for (int i = 0; i < args.length; i++) {

      if ("-index".equals(args[i])) {

        /*
         *  Handled in the previous loop, so just skip the argument.
         */
        i++;

      } else if ("-list-edocid".equals(args[i])) {

        System.out.println("-list-edocid:");

        if ((i + 1) >= args.length) {
          System.out.println(usage);
          break;
        }
        ;

        Document d = reader.document(Integer.parseInt(args[i + 1]));

        System.out.println(
            "Internal docid --> External docid: " + args[i + 1] + " --> " + d.get("externalId"));

        i += 1;
      } else if ("-list-docids".equals(args[i])) {

        System.out.println("-list-docids:");

        for (int j = 0; j < reader.numDocs(); j++) {
          Document d = reader.document(j);
          System.out.println("Internal --> external docid: " + j + " --> " + d.get("externalId"));
        }
        ;

      } else if ("-list-fields".equals(args[i])) {

        Fields fields = MultiFields.getFields(reader);

        System.out.print("\nNumber of fields:  ");

        if (fields == null) System.out.println("0");
        else {
          System.out.println(fields.size());

          Iterator<String> is = fields.iterator();

          while (is.hasNext()) {
            System.out.println("\t" + is.next());
          }
          ;
        }
        ;

      } else if ("-list-postings".equals(args[i])) {

        if ((i + 2) >= args.length) {
          System.out.println(usage);
          break;
        }
        ;

        listPostings(reader, args[i + 1], args[i + 2], Integer.MAX_VALUE);
        i += 2;

      } else if ("-list-postings-sample".equals(args[i])) {

        if ((i + 2) >= args.length) {
          System.out.println(usage);
          break;
        }
        ;

        listPostings(reader, args[i + 1], args[i + 2], 5);
        i += 2;

      } else if ("-list-stats".equals(args[i])) {

        System.out.println("Corpus statistics:");
        System.out.println("\tnumdocs\t\t" + reader.numDocs());
        System.out.println(
            "\turl:\t"
                + "\tnumdocs="
                + reader.getDocCount("url")
                + "\tsumTotalTF="
                + reader.getSumTotalTermFreq("url")
                + "\tavglen="
                + reader.getSumTotalTermFreq("url") / (float) reader.getDocCount("url"));

        System.out.println(
            "\tkeywords:"
                + "\tnumdocs="
                + reader.getDocCount("keywords")
                + "\tsumTotalTF="
                + reader.getSumTotalTermFreq("keywords")
                + "\tavglen="
                + reader.getSumTotalTermFreq("keywords") / (float) reader.getDocCount("keywords"));

        System.out.println(
            "\ttitle:\t"
                + "\tnumdocs="
                + reader.getDocCount("title")
                + "\tsumTotalTF="
                + reader.getSumTotalTermFreq("title")
                + "\tavglen="
                + reader.getSumTotalTermFreq("title") / (float) reader.getDocCount("title"));

        System.out.println(
            "\tbody:\t"
                + "\tnumdocs="
                + reader.getDocCount("body")
                + "\tsumTotalTF="
                + reader.getSumTotalTermFreq("body")
                + "\tavglen="
                + reader.getSumTotalTermFreq("body") / (float) reader.getDocCount("body"));

        System.out.println(
            "\tinlink:\t"
                + "\tnumdocs="
                + reader.getDocCount("inlink")
                + "\tsumTotalTF="
                + reader.getSumTotalTermFreq("inlink")
                + "\tavglen="
                + reader.getSumTotalTermFreq("inlink") / (float) reader.getDocCount("inlink"));

      } else if ("-list-terms".equals(args[i])) {

        if ((i + 1) >= args.length) {
          System.out.println(usage);
          break;
        }
        ;

        listTermDictionary(reader, args[i + 1]);
        i += 1;

      } else if ("-list-termvector".equals(args[i])) {

        if ((i + 1) >= args.length) {
          System.out.println(usage);
          break;
        }
        ;

        listTermVectors(reader, args[i + 1]);
        i += 1;

      } else if ("-list-termvector-field".equals(args[i])) {

        if ((i + 2) >= args.length) {
          System.out.println(usage);
          break;
        }
        ;

        listTermVectorField(reader, args[i + 1], args[i + 2]);
        i += 2;

      } else System.err.println("\nWarning:  Unknown argument " + args[i] + " ignored.");
    }
    ;

    /*
     *  Close the index and exit gracefully.
     */
    reader.close();
  }