/** * Extracts termClaimsDescriptionAbstractTitles of the documents; Adds them to vector in the same * order * * @param hits * @param i * @param j * @return relevantDocsTerms docs must be in order * @throws java.io.IOException */ public Map<TermFreqVector, String> getDocsTerms(TopDocs hits, int i, int j) throws IOException { Map<TermFreqVector, String> docsTerms = new HashMap<>(); // Process each of the documents while (i < j && i < hits.totalHits && i >= 0) { ScoreDoc scoreDoc = hits.scoreDocs[i]; if (sourceField.equals(PatentQuery.all)) { Terms termTitle = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[1]); TermFreqVector docTermsTitle = new TermFreqVector(termTitle); docsTerms.put(docTermsTitle, PatentQuery.getFields()[1]); Terms termAbstract = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[2]); TermFreqVector docTermsAbstract = new TermFreqVector(termAbstract); docsTerms.put(docTermsAbstract, PatentQuery.getFields()[2]); Terms termDescription = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[3]); TermFreqVector docTermsDescription = new TermFreqVector(termDescription); docsTerms.put(docTermsDescription, PatentQuery.getFields()[3]); Terms termClaims = ir.getTermVector(scoreDoc.doc, PatentQuery.getFields()[5]); TermFreqVector docTermsClaims = new TermFreqVector(termClaims); docsTerms.put(docTermsClaims, PatentQuery.getFields()[5]); } else { Terms term = ir.getTermVector(scoreDoc.doc, sourceField); // get termvector for document // Create termVector and add it to vector TermFreqVector docTerms = new TermFreqVector(term); docsTerms.put(docTerms, sourceField); } i++; } return docsTerms; }
/* * listTermVectorField displays the term vector for a field in * a document in an index (specified by reader). */ static void listTermVectorField(IndexReader reader, String docidString, String field) throws IOException { System.out.println("\nTermVector: docid " + docidString + ", field " + field); int docid = Integer.parseInt(docidString); if ((docid < 0) || (docid >= reader.numDocs())) { System.out.println("ERROR: " + docidString + " is a bad document id."); return; } ; Terms terms = reader.getTermVector(docid, field); termVectorDisplay(terms); }