@Test(groups = "ch12") public void vectorTest() throws Exception { FullTextSession session = Search.getFullTextSession(openSession()); Transaction tx = session.beginTransaction(); buildIndex(session, tx); try { tx = session.beginTransaction(); Query query = new TermQuery(new Term("content", "properties")); System.out.println(query.toString()); FullTextQuery hibQuery = session.createFullTextQuery(query, ElectricalProperties.class); hibQuery.setProjection( FullTextQuery.DOCUMENT, FullTextQuery.DOCUMENT_ID, FullTextQuery.SCORE); reader = getReader(session); List<Object[]> results = hibQuery.list(); assert results.size() > 0 : "no results returned"; for (int x = 0; x < results.size(); x++) { Integer docId = (Integer) results.get(x)[1]; TermPositionVector vector = (TermPositionVector) reader.getTermFreqVector(docId, "content"); String[] terms = vector.getTerms(); int[] f = vector.getTermFrequencies(); System.out.println(results.get(x)[2]); for (int y = 0; y < vector.size(); y++) { System.out.print("docID# =>" + docId); System.out.print(" term => " + terms[y]); System.out.print(" freq => " + f[y]); int[] positions = vector.getTermPositions(y); TermVectorOffsetInfo[] offsets = vector.getOffsets(y); for (int z = 0; z < positions.length; z++) { System.out.print(" position => " + positions[z]); System.out.print(" starting offset => " + offsets[z].getStartOffset()); System.out.println(" ending offset => " + offsets[z].getEndOffset()); } System.out.println("---------------"); } } for (Object element : session.createQuery("from " + ElectricalProperties.class.getName()).list()) session.delete(element); tx.commit(); } finally { session.close(); if (provider != null) { provider.closeReader(reader); } } }
/** * Reconstruct document fields. * * @param docNum document number. If this document is deleted, but the index is not optimized yet, * the reconstruction process may still yield the reconstructed field content even from * deleted documents. * @return reconstructed document * @throws Exception */ public Reconstructed reconstruct(int docNum) throws Exception { if (docNum < 0 || docNum > reader.maxDoc()) { throw new Exception("Document number outside of valid range."); } Reconstructed res = new Reconstructed(); if (deleted != null && deleted.get(docNum)) { throw new Exception("Document is deleted."); } else { Document doc = reader.document(docNum); for (int i = 0; i < fieldNames.length; i++) { Field[] fs = doc.getFields(fieldNames[i]); if (fs != null && fs.length > 0) { res.getStoredFields().put(fieldNames[i], fs); } } } // collect values from unstored fields HashSet<String> fields = new HashSet<String>(Arrays.asList(fieldNames)); // try to use term vectors if available progress.maxValue = fieldNames.length; progress.curValue = 0; progress.minValue = 0; for (int i = 0; i < fieldNames.length; i++) { TermFreqVector tvf = reader.getTermFreqVector(docNum, fieldNames[i]); if (tvf != null && tvf.size() > 0 && (tvf instanceof TermPositionVector)) { TermPositionVector tpv = (TermPositionVector) tvf; progress.message = "Reading term vectors ..."; progress.curValue = i; setChanged(); notifyObservers(progress); BytesRef[] tv = tpv.getTerms(); for (int k = 0; k < tv.length; k++) { // do we have positions? int[] posArr = tpv.getTermPositions(k); if (posArr == null) { // only offsets TermVectorOffsetInfo[] offsets = tpv.getOffsets(k); if (offsets.length == 0) { continue; } // convert offsets into positions posArr = convertOffsets(offsets); } GrowableStringArray gsa = res.getReconstructedFields().get(fieldNames[i]); if (gsa == null) { gsa = new GrowableStringArray(); res.getReconstructedFields().put(fieldNames[i], gsa); } for (int m = 0; m < posArr.length; m++) { gsa.append(posArr[m], "|", tv[k].utf8ToString()); } } fields.remove(fieldNames[i]); // got what we wanted } } // this loop collects data only from left-over fields // not yet collected through term vectors progress.maxValue = fields.size(); progress.curValue = 0; progress.minValue = 0; for (String fld : fields) { progress.message = "Collecting terms in " + fld + " ..."; progress.curValue++; setChanged(); notifyObservers(progress); Terms terms = MultiFields.getTerms(reader, fld); if (terms == null) { // no terms in this field continue; } TermsEnum te = terms.iterator(); while (te.next() != null) { DocsAndPositionsEnum dpe = te.docsAndPositions(deleted, null); if (dpe == null) { // no position info for this field break; } int num = dpe.advance(docNum); if (num != docNum) { // either greater than or NO_MORE_DOCS continue; // no data for this term in this doc } String term = te.term().utf8ToString(); GrowableStringArray gsa = (GrowableStringArray) res.getReconstructedFields().get(fld); if (gsa == null) { gsa = new GrowableStringArray(); res.getReconstructedFields().put(fld, gsa); } for (int k = 0; k < dpe.freq(); k++) { int pos = dpe.nextPosition(); gsa.append(pos, "|", term); } } } progress.message = "Done."; progress.curValue = 100; setChanged(); notifyObservers(progress); return res; }