コード例 #1
0
  /**
   * Extracts termClaimsDescriptionAbstractTitles of the documents; Adds them to vector in the same
   * order
   *
   * @param hits
   * @param i
   * @param j
   * @return relevantDocsTerms docs must be in order
   * @throws java.io.IOException
   */
  public Map<TermFreqVector, String> getDocsTerms(TopDocs hits, int i, int j) throws IOException {
    Map<TermFreqVector, String> docsTerms = new HashMap<>();
    // Process each of the documents
    while (i < j && i < hits.totalHits && i >= 0) {
      ScoreDoc scoreDoc = hits.scoreDocs[i];
      if (sourceField.equals(PatentQuery.all)) {
        Terms termTitle = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[1]);
        TermFreqVector docTermsTitle = new TermFreqVector(termTitle);
        docsTerms.put(docTermsTitle, PatentQuery.getFields()[1]);

        Terms termAbstract = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[2]);
        TermFreqVector docTermsAbstract = new TermFreqVector(termAbstract);
        docsTerms.put(docTermsAbstract, PatentQuery.getFields()[2]);

        Terms termDescription = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[3]);
        TermFreqVector docTermsDescription = new TermFreqVector(termDescription);
        docsTerms.put(docTermsDescription, PatentQuery.getFields()[3]);

        Terms termClaims = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[5]);
        TermFreqVector docTermsClaims = new TermFreqVector(termClaims);
        docsTerms.put(docTermsClaims, PatentQuery.getFields()[5]);

      } else {
        Terms term = ir.getTermVector(scoreDoc.doc, sourceField); // get termvector for document
        // Create termVector and add it to vector
        TermFreqVector docTerms = new TermFreqVector(term);
        docsTerms.put(docTerms, sourceField);
      }
      i++;
    }
    return docsTerms;
  }
コード例 #2
0
  /**
   * Creates a new instance of QueryExpansion
   *
   * @param hits
   * @param ir
   * @param parameters
   * @param source
   * @param Nbr_Docs
   * @param Nbr_Terms
   * @throws java.io.IOException
   */
  public RocchioQueryExpansion(
      TopDocs hits,
      IndexReader ir,
      Map<String, Float> parameters,
      int source,
      int Nbr_Docs,
      int Nbr_Terms)
      throws IOException {
    this.ir = ir;
    this.parameters = parameters;
    if (source != 7) {
      this.sourceField = PatentQuery.getFields()[source];
    } else {
      this.sourceField = PatentQuery.all;
    }

    this.Nbr_Terms = Nbr_Terms;

    // Create combine documents termTitle vectors - sum ( rel termTitle vectors )
    // Get terms from relevant documents
    docsTermVectorReldocs = getDocsTerms(hits, 0, Nbr_Docs);
    // Get terms from irrelevant documents
    docsTermVectorIrreldocs = getDocsTerms(hits, hits.totalHits - Nbr_Docs, hits.totalHits);
  }