コード例 #1
0
  @Test(groups = "ch12")
  public void vectorTest() throws Exception {
    FullTextSession session = Search.getFullTextSession(openSession());
    Transaction tx = session.beginTransaction();
    buildIndex(session, tx);

    try {
      tx = session.beginTransaction();

      Query query = new TermQuery(new Term("content", "properties"));
      System.out.println(query.toString());

      FullTextQuery hibQuery = session.createFullTextQuery(query, ElectricalProperties.class);
      hibQuery.setProjection(
          FullTextQuery.DOCUMENT, FullTextQuery.DOCUMENT_ID, FullTextQuery.SCORE);

      reader = getReader(session);

      List<Object[]> results = hibQuery.list();

      assert results.size() > 0 : "no results returned";
      for (int x = 0; x < results.size(); x++) {

        Integer docId = (Integer) results.get(x)[1];
        TermPositionVector vector = (TermPositionVector) reader.getTermFreqVector(docId, "content");
        String[] terms = vector.getTerms();
        int[] f = vector.getTermFrequencies();

        System.out.println(results.get(x)[2]);
        for (int y = 0; y < vector.size(); y++) {
          System.out.print("docID# =>" + docId);
          System.out.print(" term => " + terms[y]);
          System.out.print(" freq => " + f[y]);

          int[] positions = vector.getTermPositions(y);
          TermVectorOffsetInfo[] offsets = vector.getOffsets(y);
          for (int z = 0; z < positions.length; z++) {
            System.out.print(" position => " + positions[z]);
            System.out.print(" starting offset => " + offsets[z].getStartOffset());
            System.out.println(" ending offset => " + offsets[z].getEndOffset());
          }
          System.out.println("---------------");
        }
      }
      for (Object element :
          session.createQuery("from " + ElectricalProperties.class.getName()).list())
        session.delete(element);

      tx.commit();
    } finally {
      session.close();
      if (provider != null) {
        provider.closeReader(reader);
      }
    }
  }
コード例 #2
0
ファイル: DocReconstructor.java プロジェクト: belle96/luke
 /**
  * Reconstruct document fields.
  *
  * @param docNum document number. If this document is deleted, but the index is not optimized yet,
  *     the reconstruction process may still yield the reconstructed field content even from
  *     deleted documents.
  * @return reconstructed document
  * @throws Exception
  */
 public Reconstructed reconstruct(int docNum) throws Exception {
   if (docNum < 0 || docNum > reader.maxDoc()) {
     throw new Exception("Document number outside of valid range.");
   }
   Reconstructed res = new Reconstructed();
   if (deleted != null && deleted.get(docNum)) {
     throw new Exception("Document is deleted.");
   } else {
     Document doc = reader.document(docNum);
     for (int i = 0; i < fieldNames.length; i++) {
       Field[] fs = doc.getFields(fieldNames[i]);
       if (fs != null && fs.length > 0) {
         res.getStoredFields().put(fieldNames[i], fs);
       }
     }
   }
   // collect values from unstored fields
   HashSet<String> fields = new HashSet<String>(Arrays.asList(fieldNames));
   // try to use term vectors if available
   progress.maxValue = fieldNames.length;
   progress.curValue = 0;
   progress.minValue = 0;
   for (int i = 0; i < fieldNames.length; i++) {
     TermFreqVector tvf = reader.getTermFreqVector(docNum, fieldNames[i]);
     if (tvf != null && tvf.size() > 0 && (tvf instanceof TermPositionVector)) {
       TermPositionVector tpv = (TermPositionVector) tvf;
       progress.message = "Reading term vectors ...";
       progress.curValue = i;
       setChanged();
       notifyObservers(progress);
       BytesRef[] tv = tpv.getTerms();
       for (int k = 0; k < tv.length; k++) {
         // do we have positions?
         int[] posArr = tpv.getTermPositions(k);
         if (posArr == null) {
           // only offsets
           TermVectorOffsetInfo[] offsets = tpv.getOffsets(k);
           if (offsets.length == 0) {
             continue;
           }
           // convert offsets into positions
           posArr = convertOffsets(offsets);
         }
         GrowableStringArray gsa = res.getReconstructedFields().get(fieldNames[i]);
         if (gsa == null) {
           gsa = new GrowableStringArray();
           res.getReconstructedFields().put(fieldNames[i], gsa);
         }
         for (int m = 0; m < posArr.length; m++) {
           gsa.append(posArr[m], "|", tv[k].utf8ToString());
         }
       }
       fields.remove(fieldNames[i]); // got what we wanted
     }
   }
   // this loop collects data only from left-over fields
   // not yet collected through term vectors
   progress.maxValue = fields.size();
   progress.curValue = 0;
   progress.minValue = 0;
   for (String fld : fields) {
     progress.message = "Collecting terms in " + fld + " ...";
     progress.curValue++;
     setChanged();
     notifyObservers(progress);
     Terms terms = MultiFields.getTerms(reader, fld);
     if (terms == null) { // no terms in this field
       continue;
     }
     TermsEnum te = terms.iterator();
     while (te.next() != null) {
       DocsAndPositionsEnum dpe = te.docsAndPositions(deleted, null);
       if (dpe == null) { // no position info for this field
         break;
       }
       int num = dpe.advance(docNum);
       if (num != docNum) { // either greater than or NO_MORE_DOCS
         continue; // no data for this term in this doc
       }
       String term = te.term().utf8ToString();
       GrowableStringArray gsa = (GrowableStringArray) res.getReconstructedFields().get(fld);
       if (gsa == null) {
         gsa = new GrowableStringArray();
         res.getReconstructedFields().put(fld, gsa);
       }
       for (int k = 0; k < dpe.freq(); k++) {
         int pos = dpe.nextPosition();
         gsa.append(pos, "|", term);
       }
     }
   }
   progress.message = "Done.";
   progress.curValue = 100;
   setChanged();
   notifyObservers(progress);
   return res;
 }