コード例 #1
0
ファイル: Main.java プロジェクト: muyuxuebao/Lucene-exercise
  private static Map<String, List<String>> generate_result(Directory directory) {
    Map<String, List<String>> result_map = new HashMap<String, List<String>>();

    try {
      IndexReader reader = IndexReader.open(directory);
      TermEnum termEnum = reader.terms();
      while (termEnum.next()) {
        String termEnumString = termEnum.term().toString();
        if (termEnumString.startsWith("content:")) {
          String term = termEnumString.substring(termEnumString.lastIndexOf(":") + 1);
          TermDocs termDocs = reader.termDocs(termEnum.term());
          while (termDocs.next()) {
            Document doc = reader.document(termDocs.doc());
            String relative_path = doc.get("relative_path");

            if (result_map.containsKey(relative_path)) {
              result_map.get(relative_path).add(term + termDocs.freq());
            } else {
              result_map.put(relative_path, new ArrayList<String>());
            }
          }
        }
      }
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
    }

    return result_map;
  }
コード例 #2
0
  public void getIndexInfo(String indexdir, int freqThreshold) {
    IndexReader reader = null;

    try {
      Directory dir = FSDirectory.open(new File(indexdir));
      System.out.println(dir);
      reader = IndexReader.open(dir);

      System.out.println("document num:" + reader.numDocs());
      System.out.println("======================");

      TermEnum terms = reader.terms();
      sortedTermQueue.clear();
      maxDocNum = reader.maxDoc();
      linkMap.clear();
      termList.clear();
      while (terms.next()) {
        // System.out.print(terms.term() + "\tDocFreq:" +
        TermDocs termDocs = reader.termDocs(terms.term());
        MyTerm temp = new MyTerm(terms.term(), termDocs, maxDocNum);
        if (temp.totalFreq < freqThreshold) {
          continue;
        } /*
           * if(temp.originTrem.text().length()==1){ continue; }
           */
        linkMap.put(temp.originTrem.text(), temp);
        sortedTermQueue.add(temp);
        termList.add(temp);
      }
      System.out.println("total Size:" + sortedTermQueue.size());
      System.out.println("mapsize:" + linkMap.keySet().size());
      // System.exit(0);
      int num = 0;
      this.maxFreq = sortedTermQueue.peek().totalFreq;
      while (!sortedTermQueue.isEmpty()) {
        num++;
        System.out.println(num + ":" + sortedTermQueue.poll());
      }
      System.out.println("read index info done");
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      try {
        reader.close();

      } catch (IOException e) {
        e.printStackTrace();
      }
    }
  }
コード例 #3
0
    public void seek(TermEnum terms) throws IOException {
      original.seek(terms);

      docFreq = terms.docFreq();
      pointer = -1;

      if (docFreq > postingMaps.length) { // grow postingsMap
        PostingMap[] newMap = new PostingMap[docFreq];
        System.arraycopy(postingMaps, 0, newMap, 0, postingMaps.length);
        for (int i = postingMaps.length; i < docFreq; i++) {
          newMap[i] = new PostingMap();
        }
        postingMaps = newMap;
      }

      out.reset();

      int i = 0;
      while (original.next()) {
        PostingMap map = postingMaps[i++];
        map.newDoc = oldToNew[original.doc()]; // remap the newDoc id
        map.offset = out.getFilePointer(); // save pointer to buffer

        final int tf = original.freq(); // buffer tf & positions
        out.writeVInt(tf);
        int prevPosition = 0;
        for (int j = tf; j > 0; j--) { // delta encode positions
          int p = original.nextPosition();
          out.writeVInt(p - prevPosition);
          prevPosition = p;
        }
      }
      out.flush();
      docFreq = i; // allow for deletions

      Arrays.sort(postingMaps, 0, docFreq); // resort by mapped doc ids
      // HeapSorter.sort(postingMaps,docFreq); // TODO MC - due to the lack of space

      // NOTE: this might be substantially faster if RAMInputStream were public
      // and supported a reset() operation.
      in = tempDir.openInput(TEMP_FILE);
    }