/** * Extracts termClaimsDescriptionAbstractTitles of the documents; Adds them to vector in the same * order * * @param hits * @param i * @param j * @return relevantDocsTerms docs must be in order * @throws java.io.IOException */ public Map<TermFreqVector, String> getDocsTerms(TopDocs hits, int i, int j) throws IOException { Map<TermFreqVector, String> docsTerms = new HashMap<>(); // Process each of the documents while (i < j && i < hits.totalHits && i >= 0) { ScoreDoc scoreDoc = hits.scoreDocs[i]; if (sourceField.equals(PatentQuery.all)) { Terms termTitle = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[1]); TermFreqVector docTermsTitle = new TermFreqVector(termTitle); docsTerms.put(docTermsTitle, PatentQuery.getFields()[1]); Terms termAbstract = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[2]); TermFreqVector docTermsAbstract = new TermFreqVector(termAbstract); docsTerms.put(docTermsAbstract, PatentQuery.getFields()[2]); Terms termDescription = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[3]); TermFreqVector docTermsDescription = new TermFreqVector(termDescription); docsTerms.put(docTermsDescription, PatentQuery.getFields()[3]); Terms termClaims = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[5]); TermFreqVector docTermsClaims = new TermFreqVector(termClaims); docsTerms.put(docTermsClaims, PatentQuery.getFields()[5]); } else { Terms term = ir.getTermVector(scoreDoc.doc, sourceField); // get termvector for document // Create termVector and add it to vector TermFreqVector docTerms = new TermFreqVector(term); docsTerms.put(docTerms, sourceField); } i++; } return docsTerms; }
/** * Creates a new instance of QueryExpansion * * @param hits * @param ir * @param parameters * @param source * @param Nbr_Docs * @param Nbr_Terms * @throws java.io.IOException */ public RocchioQueryExpansion( TopDocs hits, IndexReader ir, Map<String, Float> parameters, int source, int Nbr_Docs, int Nbr_Terms) throws IOException { this.ir = ir; this.parameters = parameters; if (source != 7) { this.sourceField = PatentQuery.getFields()[source]; } else { this.sourceField = PatentQuery.all; } this.Nbr_Terms = Nbr_Terms; // Create combine documents termTitle vectors - sum ( rel termTitle vectors ) // Get terms from relevant documents docsTermVectorReldocs = getDocsTerms(hits, 0, Nbr_Docs); // Get terms from irrelevant documents docsTermVectorIrreldocs = getDocsTerms(hits, hits.totalHits - Nbr_Docs, hits.totalHits); }