예제 #1
0
  public void buildIndex(JSONObject indexData) {

    try {
      Directory dir = FSDirectory.open(new File(indexDir));
      IKAnalyzer analyzer = new IKAnalyzer();
      analyzer.setUseSmart(true);
      IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, analyzer);
      indexWriter = new IndexWriter(dir, iwc);
      indexWriter.deleteAll();

      JSONArray statusData = indexData.getJSONArray("statusData");
      for (int i = 0; i < statusData.length(); i++) {
        String text = statusData.getString(i);
        Document doc = new Document();
        doc.add(
            new Field(
                "text",
                text,
                Field.Store.YES,
                Field.Index.ANALYZED,
                Field.TermVector.WITH_POSITIONS_OFFSETS));
        indexWriter.addDocument(doc);
      }

      JSONArray userData = indexData.getJSONArray("userData");
      for (int i = 0; i < userData.length(); i++) {
        String text = userData.getString(i);
        Document doc = new Document();
        doc.add(
            new Field(
                "text",
                text,
                Field.Store.YES,
                Field.Index.ANALYZED,
                Field.TermVector.WITH_POSITIONS_OFFSETS));
        indexWriter.addDocument(doc);
      }
      // indexWriter.commit();
      System.out.println("Index is done");
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (JSONException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      try {
        indexWriter.close();
      } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
  }
예제 #2
0
  public void getIndexInfo(String indexdir, int freqThreshold) {
    IndexReader reader = null;

    try {
      Directory dir = FSDirectory.open(new File(indexdir));
      System.out.println(dir);
      reader = IndexReader.open(dir);

      System.out.println("document num:" + reader.numDocs());
      System.out.println("======================");

      TermEnum terms = reader.terms();
      sortedTermQueue.clear();
      maxDocNum = reader.maxDoc();
      linkMap.clear();
      termList.clear();
      while (terms.next()) {
        // System.out.print(terms.term() + "\tDocFreq:" +
        TermDocs termDocs = reader.termDocs(terms.term());
        MyTerm temp = new MyTerm(terms.term(), termDocs, maxDocNum);
        if (temp.totalFreq < freqThreshold) {
          continue;
        } /*
           * if(temp.originTrem.text().length()==1){ continue; }
           */
        linkMap.put(temp.originTrem.text(), temp);
        sortedTermQueue.add(temp);
        termList.add(temp);
      }
      System.out.println("total Size:" + sortedTermQueue.size());
      System.out.println("mapsize:" + linkMap.keySet().size());
      // System.exit(0);
      int num = 0;
      this.maxFreq = sortedTermQueue.peek().totalFreq;
      while (!sortedTermQueue.isEmpty()) {
        num++;
        System.out.println(num + ":" + sortedTermQueue.poll());
      }
      System.out.println("read index info done");
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      try {
        reader.close();

      } catch (IOException e) {
        e.printStackTrace();
      }
    }
  }