Beispiel #1
0
  /**
   * 在所有的字段中搜索,如果是坐标的话,需要加上*,才能匹配到
   *
   * @param queryString 关键字
   * @throws Exception
   */
  public static void searchListIKWithExp(String queryString) throws Exception {
    // 查询的字符串:输入不存在的字符串是查询不到的,如:中国
    // 查询字段集合
    IndexSearcher searcher = LuceneUtils.createIndexSearcher();
    // 这里使用的是IK的
    Query query = IKQueryParser.parse(queryString);
    System.out.println(query);
    // 在搜索器中进行查询
    // 对查询内容进行过滤
    Filter filter = null;
    // 一次在索引器查询多少条数据
    int queryCount = 100;

    TopDocs results = searcher.search(query, filter, queryCount);
    System.out.println("总符合: " + results.totalHits + "条数!");

    // 显示记录
    for (ScoreDoc sr : results.scoreDocs) {
      // 文档编号
      int docID = sr.doc;
      // 真正的内容
      Document doc = searcher.doc(docID);
      System.out.println("name = " + doc.get("name"));
      System.out.println("address = " + doc.get("address"));
      System.out.println("eastNew = " + doc.get("eastNew"));
      System.out.println("northNew = " + doc.get("northNew"));
      System.out.println("geom = " + doc.get("geom"));
      System.out.println("phone = " + doc.get("phone"));
      System.out.println("id = " + doc.get("id"));
      System.out.println("datatype = " + doc.get("datatype"));
      System.out.println("dataTypeKey = " + doc.get("dataTypeKey"));
    }
  }
Beispiel #2
0
 public static void main(String[] args) {
   String ikQueryExp =
       "(id='ABcdRf' && date:{'20010101','20110101'} && keyword:'^魔兽中国$') || (content:'魔兽 中国'  || ulr='www.ik.com') - name:'林良益'";
   //		String ikQueryExp = "content:'----'  || title:'----' - name:'林良益'";
   Query result = IKQueryParser.parse(ikQueryExp);
   //		Query result = IKQueryParser.parse("(newsKeyword='---' || newsTitle:'---' ||
   // newsContent:'---') && newsClass='1'");
   System.out.println(result);
 }
  public String getContentByTitle(String title) {

    if (!(new File(indexDir).exists()) || (new File(indexDir).listFiles().length == 0)) {
      System.out.println("索引文件夹不存在或文件夹为空~!");
      return null;
    }

    String content = "";

    try {

      Query query = IKQueryParser.parse("title", title);
      // System.out.println(query.toString());

      TopDocCollector topCollector = new TopDocCollector(1000);

      IndexSearcher searcher = new IndexSearcher(indexDir);

      searcher.setSimilarity(new IKSimilarity());

      searcher.search(query, topCollector);

      ScoreDoc[] scoreDocs = topCollector.topDocs().scoreDocs;

      // System.out.println(scoreDocs.length);

      // System.out.println(query.toString());

      Document doc = null;

      for (ScoreDoc scoreDoc : scoreDocs) {
        doc = searcher.doc(scoreDoc.doc);
        content = doc.get("content");
        System.out.println(doc.get("title"));
        if (doc.get("title").equals(title)) break;
      }
    } catch (Exception e) {
      e.printStackTrace();
    }
    return content;
  }
  @Override
  public void hooker(String token, Searcher searcher, int cateGory, int dfOfToken)
      throws IOException {
    // TODO Auto-generated method stub
    Query query = IKQueryParser.parse(IndexFields.CONTENTS, token);
    TreeSet<Term> terms = new TreeSet<Term>();
    query.extractTerms(terms);
    double df = searcher.docFreq(terms.first());
    double pt = df / totalTextNumber;

    double max = Double.MIN_VALUE;
    double avg = 0;
    for (int i = 0; i < Constants.CATEGORYS.length; ++i) {
      double dfInc = searchers[i].docFreq(terms.first());
      double ptIfC = dfInc / CategoryTextCount[i];

      double MI = log2n(ptIfC / pt);
      if (MI > max) {
        max = MI;
      }
      avg += MI * CategoryProbabilities[i];
    }
    maxFeatureWeight.add(new FeatureWeight(token, max, dfOfToken));
  }
Beispiel #5
0
  /**
   * @param key 搜索内容所在的字段名称
   * @param value 所要搜索的内容
   * @throws CorruptIndexException
   * @throws IOException
   * @throws ParseException
   */
  public static void search(String key, String value)
      throws CorruptIndexException, IOException, ParseException {
    IndexSearcher searcher;

    // 创建QueryParser对象,第一个参数表示Lucene的版本,第二个表示搜索Field的字段,第三个表示搜索使用分词器
    // Analyzer analyzer = new IKAnalyzer(); //new StandardAnalyzer(Version.LUCENE_36)
    // QueryParser qp = new QueryParser(Version.LUCENE_36, key,analyzer);
    searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File(PATH))));
    searcher.setSimilarity(new IKSimilarity());
    Query query = IKQueryParser.parse(key, value);
    //	Query tq = qp.parse(value);
    TopDocs topDocs = searcher.search(query, 10000);
    //	searcher.search(query, results); // new function
    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    System.out.println("命中:" + topDocs.totalHits);
    Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
    Scorer fragmentScorer = new QueryScorer(query);
    Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
    Fragmenter fragmenter = new SimpleFragmenter(50);
    highlighter.setTextFragmenter(fragmenter);
    Analyzer analyzer = new IKAnalyzer();
    for (int i = 0; i < scoreDocs.length; i++) {
      ScoreDoc scoreDoc = topDocs.scoreDocs[i];
      int docSn = scoreDoc.doc; // 文档内部编号
      Document doc = searcher.doc(docSn); // 根据文档编号取出文档

      hightlightFeild(highlighter, analyzer, doc, "name");
      hightlightFeild(highlighter, analyzer, doc, "address");
      hightlightFeild(highlighter, analyzer, doc, "datatype");
      // scoreDocs[i].
      System.out.println("name:" + doc.get("name")); // new function
      System.out.println("address:" + doc.get("address"));
      System.out.println("datatype:" + doc.get("datatype"));
      System.out.println("geom:" + doc.get("geom"));
    }
  }
Beispiel #6
0
    /** 语法解析 */
    private void parseSyntax() {
      for (int i = 0; i < this.elements.size(); i++) {
        Element e = this.elements.get(i);
        if ('F' == e.type) {
          Element e2 = this.elements.get(i + 1);
          if ('=' != e2.type && ':' != e2.type) {
            throw new IllegalStateException("表达式异常: = 或 : 号丢失");
          }
          Element e3 = this.elements.get(i + 2);
          // 处理 = 和 : 运算
          if ('\'' == e3.type) {
            i += 2;
            if ('=' == e2.type) {
              TermQuery tQuery = new TermQuery(new Term(e.toString(), e3.toString()));
              this.querys.push(tQuery);
            } else if (':' == e2.type) {
              String keyword = e3.toString();
              if (keyword.startsWith("^") && keyword.endsWith("$")) {
                Query pQuery = this.luceneQueryParse(e.toString(), keyword);
                this.querys.push(pQuery);
              } else {
                Query tQuery = IKQueryParser.parse(e.toString(), e3.toString());
                this.querys.push(tQuery);
              }
            }

          } else if ('[' == e3.type || '{' == e3.type) {
            i += 2;
            // 处理 [] 和 {}
            LinkedList<Element> eQueue = new LinkedList<Element>();
            eQueue.add(e3);
            for (i++; i < this.elements.size(); i++) {
              Element eN = this.elements.get(i);
              eQueue.add(eN);
              if (']' == eN.type || '}' == eN.type) {
                break;
              }
            }
            // 翻译RangeQuery
            Query rangeQuery = this.toTermRangeQuery(e, eQueue);
            this.querys.push(rangeQuery);
          } else {
            throw new IllegalStateException("表达式异常:匹配值丢失");
          }

        } else if ('(' == e.type) {
          this.operates.push(e);

        } else if (')' == e.type) {
          boolean doPop = true;
          while (doPop && !this.operates.empty()) {
            Element op = this.operates.pop();
            if ('(' == op.type) {
              doPop = false;
            } else {
              Query q = toQuery(op);
              this.querys.push(q);
            }
          }
        } else {

          if (this.operates.isEmpty()) {
            this.operates.push(e);
          } else {
            boolean doPeek = true;
            while (doPeek && !this.operates.isEmpty()) {
              Element eleOnTop = this.operates.peek();
              if ('(' == eleOnTop.type) {
                doPeek = false;
                this.operates.push(e);
              } else if (compare(e, eleOnTop) == 1) {
                this.operates.push(e);
                doPeek = false;
              } else if (compare(e, eleOnTop) == 0) {
                Query q = toQuery(eleOnTop);
                this.operates.pop();
                this.querys.push(q);
              } else {
                Query q = toQuery(eleOnTop);
                this.operates.pop();
                this.querys.push(q);
              }
            }

            if (doPeek && this.operates.empty()) {
              this.operates.push(e);
            }
          }
        }
      }

      while (!this.operates.isEmpty()) {
        Element eleOnTop = this.operates.pop();
        Query q = toQuery(eleOnTop);
        this.querys.push(q);
      }
    }