/**
   * Extracts termClaimsDescriptionAbstractTitles of the documents; Adds them to vector in the same
   * order
   *
   * @param hits
   * @param i
   * @param j
   * @return relevantDocsTerms docs must be in order
   * @throws java.io.IOException
   */
  public Map<TermFreqVector, String> getDocsTerms(TopDocs hits, int i, int j) throws IOException {
    Map<TermFreqVector, String> docsTerms = new HashMap<>();
    // Process each of the documents
    while (i < j && i < hits.totalHits && i >= 0) {
      ScoreDoc scoreDoc = hits.scoreDocs[i];
      if (sourceField.equals(PatentQuery.all)) {
        Terms termTitle = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[1]);
        TermFreqVector docTermsTitle = new TermFreqVector(termTitle);
        docsTerms.put(docTermsTitle, PatentQuery.getFields()[1]);

        Terms termAbstract = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[2]);
        TermFreqVector docTermsAbstract = new TermFreqVector(termAbstract);
        docsTerms.put(docTermsAbstract, PatentQuery.getFields()[2]);

        Terms termDescription = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[3]);
        TermFreqVector docTermsDescription = new TermFreqVector(termDescription);
        docsTerms.put(docTermsDescription, PatentQuery.getFields()[3]);

        Terms termClaims = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[5]);
        TermFreqVector docTermsClaims = new TermFreqVector(termClaims);
        docsTerms.put(docTermsClaims, PatentQuery.getFields()[5]);

      } else {
        Terms term = ir.getTermVector(scoreDoc.doc, sourceField); // get termvector for document
        // Create termVector and add it to vector
        TermFreqVector docTerms = new TermFreqVector(term);
        docsTerms.put(docTerms, sourceField);
      }
      i++;
    }
    return docsTerms;
  }
Пример #2
0
  /*
   *  listTermVectorField displays the term vector for a field in
   *  a document in an index (specified by reader).
   */
  static void listTermVectorField(IndexReader reader, String docidString, String field)
      throws IOException {

    System.out.println("\nTermVector:  docid " + docidString + ", field " + field);

    int docid = Integer.parseInt(docidString);

    if ((docid < 0) || (docid >= reader.numDocs())) {
      System.out.println("ERROR:  " + docidString + " is a bad document id.");
      return;
    }
    ;

    Terms terms = reader.getTermVector(docid, field);
    termVectorDisplay(terms);
  }