Java Highlighter示例，org.apache.lucene.search.highlight.Highlighter Java示例

示例#1

0

显示文件

文件： LuceneHelperImpl.java 项目： narigone/liferay-portal

  @Override
  public String getSnippet(
      Query query,
      String field,
      String s,
      int maxNumFragments,
      int fragmentLength,
      String fragmentSuffix,
      Formatter formatter)
      throws IOException {

    QueryScorer queryScorer = new QueryScorer(query, field);

    Highlighter highlighter = new Highlighter(formatter, queryScorer);

    highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength));

    TokenStream tokenStream = getAnalyzer().tokenStream(field, new UnsyncStringReader(s));

    try {
      String snippet =
          highlighter.getBestFragments(tokenStream, s, maxNumFragments, fragmentSuffix);

      if (Validator.isNotNull(snippet)
          && !StringUtil.endsWith(snippet, fragmentSuffix)
          && !s.equals(snippet)) {

        snippet = snippet.concat(fragmentSuffix);
      }

      return snippet;
    } catch (InvalidTokenOffsetsException itoe) {
      throw new IOException(itoe);
    }
  }

示例#2

0

显示文件

文件： BaseDao.java 项目： artyama/jeesite

  /**
   * 设置关键字高亮
   *
   * @param query 查询对象
   * @param list 设置高亮的内容列表
   * @param subLength 截取长度
   * @param fields 字段名
   */
  public List<T> keywordsHighlight(
      BooleanQuery query, List<T> list, int subLength, String... fields) {
    Analyzer analyzer = new IKAnalyzer();
    Formatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
    Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(subLength));

    for (T entity : list) {
      try {
        for (String field : fields) {
          String text = StringUtils.replaceHtml((String) Reflections.invokeGetter(entity, field));
          // 设置高亮字段
          String description = highlighter.getBestFragment(analyzer, field, text);
          if (description != null) {
            Reflections.invokeSetter(entity, fields[0], description);
            break;
          }
          Reflections.invokeSetter(entity, fields[0], StringUtils.abbr(text, subLength * 2));
        }
      } catch (IOException e) {
        e.printStackTrace();
      } catch (InvalidTokenOffsetsException e) {
        e.printStackTrace();
      }
    }
    return list;
  }

示例#3

0

显示文件

文件： DefaultSolrHighlighter.java 项目： ieure/lucene-solr-snapshot

 /**
  * Return a {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this field.
  *
  * @param query The current Query
  * @param fieldName The name of the field
  * @param request The current SolrQueryRequest
  */
 protected Highlighter getHighlighter(Query query, String fieldName, SolrQueryRequest request) {
   SolrParams params = request.getParams();
   Highlighter highlighter =
       new Highlighter(
           getFormatter(fieldName, params),
           getEncoder(fieldName, params),
           getQueryScorer(query, fieldName, request));
   highlighter.setTextFragmenter(getFragmenter(fieldName, params));
   return highlighter;
 }

示例#4

0

显示文件

文件： SearchService.java 项目： politrend/vta

  /**
   * Annoncements adapter
   *
   * @param hg
   * @param a
   * @param ann
   * @return
   * @throws Exception
   */
  public SearchResultWH makeHW(Highlighter hg, Analyzer a, Announce ann) throws Exception {
    String s = "";

    {
      String text = ann.getITopDescription() + "";

      TokenStream tokenStream = a.tokenStream("topdescription", new StringReader(text));
      s +=
          cP(
              "Совпадения в заголовке объявления",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));
    }

    {
      String text = ann.getIDescription() + "";

      TokenStream tokenStream = a.tokenStream("description", new StringReader(text));
      s +=
          cP(
              "Совпадения в тексте объявления",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));
    }

    String metatexts = "";

    {
      String text = ann.getMeta_keywords() + "";

      TokenStream tokenStream = a.tokenStream("meta_keywords", new StringReader(text));
      metatexts +=
          cPmeta(
              "Совпадения в keywords",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));

      text = ann.getMeta_description() + "";

      tokenStream = a.tokenStream("meta_description", new StringReader(text));
      metatexts +=
          cPmeta(
              "Совпадения в description",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));

      text = ann.getMeta_subject() + "";

      tokenStream = a.tokenStream("meta_subject", new StringReader(text));
      metatexts +=
          cPmeta(
              "Совпадения в subject",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));
    }

    SearchResultWH swh = new SearchResultWH(ann, "Announce", s, metatexts);
    return swh;
  }

示例#5

0

显示文件

文件： LuceneUtils.java 项目： feicai/lucene_study

 /**
  * @Title: createHighlighter @Description: 创建高亮器
  *
  * @param query 索引查询对象
  * @param prefix 高亮前缀字符串
  * @param stuffix 高亮后缀字符串
  * @param fragmenterLength 摘要最大长度
  * @return
  */
 public static Highlighter createHighlighter(
     Query query, String prefix, String stuffix, int fragmenterLength) {
   Formatter formatter =
       new SimpleHTMLFormatter(
           (prefix == null || prefix.trim().length() == 0) ? "<font color=\"red\">" : prefix,
           (stuffix == null || stuffix.trim().length() == 0) ? "</font>" : stuffix);
   Scorer fragmentScorer = new QueryScorer(query);
   Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
   Fragmenter fragmenter = new SimpleFragmenter(fragmenterLength <= 0 ? 50 : fragmenterLength);
   highlighter.setTextFragmenter(fragmenter);
   return highlighter;
 }

示例#6

0

显示文件

文件： IndexOperator.java 项目： sky24987/lucene-use

  public static String hightlightFeild(
      Highlighter highlighter, Analyzer analyzer, Document doc, String feild) {
    String docContent = doc.get(feild);
    try {

      String hc = highlighter.getBestFragment(analyzer, feild, docContent);
      if (hc == null) {
        if (docContent.length() >= 50) {
          hc = docContent.substring(0, 50);
        } else {
          hc = docContent;
        }
      }
      doc.getField(feild).setValue(hc);
      // System.out.println(hc);
      return hc;
    } catch (InvalidTokenOffsetsException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    return docContent;
  }

示例#7

0

显示文件

文件： DefaultSolrHighlighter.java 项目： ieure/lucene-solr-snapshot

  /**
   * Return a phrase {@link org.apache.lucene.search.highlight.Highlighter} appropriate for this
   * field.
   *
   * @param query The current Query
   * @param fieldName The name of the field
   * @param request The current SolrQueryRequest
   * @param tokenStream document text CachingTokenStream
   * @throws IOException
   */
  protected Highlighter getPhraseHighlighter(
      Query query, String fieldName, SolrQueryRequest request, CachingTokenFilter tokenStream)
      throws IOException {
    SolrParams params = request.getParams();
    Highlighter highlighter = null;

    highlighter =
        new Highlighter(
            getFormatter(fieldName, params),
            getEncoder(fieldName, params),
            getSpanQueryScorer(query, fieldName, tokenStream, request));

    highlighter.setTextFragmenter(getFragmenter(fieldName, params));

    return highlighter;
  }

示例#8

0

显示文件

文件： Searcher.java 项目： VenuGopalYeredla/Patterns

 public void heighlight(String field, String searchText) {
   String text =
       "In this section we'll show you how to make the simplest "
           + "programmatic query, searching for a single term, and then "
           + "we'll see how to use QueryParser to accept textual queries. "
           + "In the sections that follow, we’ll take this simple example "
           + "further by detailing all the query types built into Lucene. "
           + "We begin with the simplest search of all: searching for all "
           + "documents that contain a single term.";
   Analyzer analyzer = new StandardAnalyzer();
   QueryParser queryParser = new QueryParser(field, analyzer);
   try {
     Query query = queryParser.parse(searchText);
     SimpleHTMLFormatter formatter =
         new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
     TokenStream tokens = analyzer.tokenStream("f", new StringReader(text));
     QueryScorer scorer = new QueryScorer(query, "f");
     Highlighter highlighter = new Highlighter(formatter, scorer);
     highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
     String result = highlighter.getBestFragments(tokens, text, 3, "...");
     FileWriter writer = new FileWriter("/home/venugopal/Downloads/Lucene/lia/highter.html"); // #8
     writer.write("<html>"); // #8
     writer.write(
         "<style>\n"
             + // #8
             ".highlight {\n"
             + // #8
             " background: yellow;\n"
             + // #8
             "}\n"
             + // #8
             "</style>"); // #8
     writer.write("<body>"); // #8
     writer.write(result); // #8
     writer.write("</body></html>"); // #8
     writer.close();
   } catch (ParseException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   } catch (IOException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   } catch (InvalidTokenOffsetsException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   }
 }

示例#9

0

显示文件

文件： LuceneUtil.java 项目： arkorsky/Nagging

 /**
  * @param query 索引查询对象
  * @param prefix 高亮前缀
  * @param suffix 高亮后缀
  * @param fragmenterLength 摘要最大长度
  * @param ifCustom 是否自定义Highlighter
  * @return
  * @author huangzhiqian
  * @date 2015年12月11日
  */
 public static Highlighter createHighlighter(
     Query query, String prefix, String suffix, int fragmenterLength, boolean ifCustom) {
   Formatter formatter =
       new SimpleHTMLFormatter(
           (prefix == null || prefix.trim().length() == 0) ? "<font color=\"blue\">" : prefix,
           (suffix == null || suffix.trim().length() == 0) ? "</font>" : suffix);
   Scorer fragmentScorer = new QueryScorer(query);
   Highlighter highlighter = null;
   if (ifCustom) {
     highlighter = new CusHzqHighlighter(formatter, fragmentScorer);
   } else {
     highlighter = new Highlighter(formatter, fragmentScorer);
   }
   Fragmenter fragmenter = new SimpleFragmenter(fragmenterLength <= 0 ? 50 : fragmenterLength);
   highlighter.setTextFragmenter(fragmenter);
   return highlighter;
 }

示例#10

0

显示文件

文件： AnsjAnalysisTest.java 项目： shidan66/ansj_seg_lucene_32_plug

 /**
  * 高亮设置
  *
  * @param query
  * @param doc
  * @param field
  * @return
  */
 private String toHighlighter(Analyzer analyzer, Query query, Document doc) {
   String field = "text";
   try {
     SimpleHTMLFormatter simpleHtmlFormatter =
         new SimpleHTMLFormatter("<font color=\"red\">", "</font>");
     Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
     TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field)));
     String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));
     return highlighterStr == null ? doc.get(field) : highlighterStr;
   } catch (IOException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   } catch (InvalidTokenOffsetsException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   }
   return null;
 }

示例#11

0

显示文件

文件： EFOExpandedHighlighter.java 项目： EBIBioSamples/biosd_ui

  private String doHighlightQuery(
      Query query, String fieldName, String text, String openMark, String closeMark) {
    try {
      SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(openMark, closeMark);
      Highlighter highlighter =
          new Highlighter(htmlFormatter, new QueryScorer(query, fieldName, this.env.defaultField));
      highlighter.setTextFragmenter(new NullFragmenter());

      String str =
          highlighter.getBestFragment(
              this.env.indexAnalyzer,
              "".equals(fieldName) ? this.env.defaultField : fieldName,
              text);

      return null != str ? str : text;
    } catch (Exception x) {
      logger.error("Caught an exception:", x);
    }
    return text;
  }

示例#12

0

显示文件

文件： SearchService.java 项目： politrend/vta

  /**
   * Visa adapter
   *
   * @param hg
   * @param a
   * @param ann
   * @return
   * @throws Exception
   */
  public SearchResultWH makeHW(Highlighter hg, Analyzer a, Visa v) throws Exception {
    String s = "";
    String text = v.getIDescription() + "";

    TokenStream tokenStream = a.tokenStream("description", new StringReader(text));
    s +=
        cP(
            "Совпадения в описании",
            hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));

    String metatexts = "";

    {
      text = v.getMeta_keywords() + "";

      tokenStream = a.tokenStream("meta_keywords", new StringReader(text));
      metatexts +=
          cPmeta(
              "Совпадения в keywords",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));

      text = v.getMeta_description() + "";

      tokenStream = a.tokenStream("meta_description", new StringReader(text));
      metatexts +=
          cPmeta(
              "Совпадения в description",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));

      text = v.getMeta_subject() + "";

      tokenStream = a.tokenStream("meta_subject", new StringReader(text));
      metatexts +=
          cPmeta(
              "Совпадения в subject",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));
    }

    SearchResultWH swh = new SearchResultWH(v, "Visa", s, metatexts);
    return swh;
  }

示例#13

0

显示文件

文件： SearchService.java 项目： politrend/vta

  /**
   * Public image adapter
   *
   * @param hg
   * @param a
   * @param pi
   * @return
   * @throws Exception
   */
  public SearchResultWH makeHW(Highlighter hg, Analyzer a, PublicImage pi) throws Exception {
    String text = pi.getDescription() + "";

    TokenStream tokenStream = a.tokenStream("description", new StringReader(text));
    String s =
        cP(
            "Совпадения в описании",
            hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));

    SearchResultWH swh = new SearchResultWH(pi, "PublicImage", s);
    return swh;
  }

示例#14

0

显示文件

文件： SearchService.java 项目： politrend/vta

  /**
   * Any text adapter
   *
   * @param hg
   * @param a
   * @param ann
   * @return
   * @throws Exception
   */
  public SearchResultWH makeHW(Highlighter hg, Analyzer a, AnyText t) throws Exception {
    String s = "";
    String text = t.getIDescription() + "";

    TokenStream tokenStream = a.tokenStream("anytext", new StringReader(text));
    s +=
        cP(
            "Совпадения в тексте",
            hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));

    SearchResultWH swh = new SearchResultWH(t, "AnyText", s);
    return swh;
  }

示例#15

0

显示文件

文件： LuceneSearchEngine.java 项目： Eljah/jamwiki

 protected String retrieveResultSummary(
     Document document, Highlighter highlighter, StandardAnalyzer analyzer)
     throws InvalidTokenOffsetsException, IOException {
   String content = document.get(FIELD_TOPIC_CONTENT);
   TokenStream tokenStream = analyzer.tokenStream(FIELD_TOPIC_CONTENT, new StringReader(content));
   String summary = highlighter.getBestFragments(tokenStream, content, 3, "...");
   if (StringUtils.isBlank(summary) && !StringUtils.isBlank(content)) {
     summary = StringEscapeUtils.escapeHtml(content.substring(0, Math.min(200, content.length())));
     if (Math.min(200, content.length()) == 200) {
       summary += "...";
     }
   }
   return summary;
 }

示例#16

0

显示文件

文件： ResultSetImpl_TopDocs.java 项目： mr-justin/apexaifbxxplore

  public String getSnippet(int index) throws Exception {
    StandardAnalyzer analyzer = new StandardAnalyzer();
    QueryParser qp = new QueryParser(FieldType.TEXT.toString(), analyzer);
    SchemaObjectInfo info = getResult(index);
    String text = info.getTextDescription();
    String label = info.getLabel().toLowerCase();
    // remove abundant labels
    if (!label.equals("")) {
      while (text.indexOf(label) == 0) text = text.substring(label.length()).trim();
    }

    if (keyword != null) {
      try {
        Highlighter hl = new Highlighter(new QueryScorer(qp.parse(keyword)));
        TokenStream ts = analyzer.tokenStream(text, new StringReader(text));
        String snippet = hl.getBestFragment(ts, text);
        if (snippet != null) return snippet;
      } catch (Exception e) {
        //				e.printStackTrace();
      }
    }
    return text;
  }

示例#17

0

显示文件

文件： IndexOperator.java 项目： sky24987/lucene-use

  /**
   * @param key 搜索内容所在的字段名称
   * @param value 所要搜索的内容
   * @throws CorruptIndexException
   * @throws IOException
   * @throws ParseException
   */
  public static void search(String key, String value)
      throws CorruptIndexException, IOException, ParseException {
    IndexSearcher searcher;

    // 创建QueryParser对象,第一个参数表示Lucene的版本,第二个表示搜索Field的字段,第三个表示搜索使用分词器
    // Analyzer analyzer = new IKAnalyzer(); //new StandardAnalyzer(Version.LUCENE_36)
    // QueryParser qp = new QueryParser(Version.LUCENE_36, key,analyzer);
    searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File(PATH))));
    searcher.setSimilarity(new IKSimilarity());
    Query query = IKQueryParser.parse(key, value);
    //	Query tq = qp.parse(value);
    TopDocs topDocs = searcher.search(query, 10000);
    //	searcher.search(query, results); // new function
    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    System.out.println("命中：" + topDocs.totalHits);
    Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
    Scorer fragmentScorer = new QueryScorer(query);
    Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
    Fragmenter fragmenter = new SimpleFragmenter(50);
    highlighter.setTextFragmenter(fragmenter);
    Analyzer analyzer = new IKAnalyzer();
    for (int i = 0; i < scoreDocs.length; i++) {
      ScoreDoc scoreDoc = topDocs.scoreDocs[i];
      int docSn = scoreDoc.doc; // 文档内部编号
      Document doc = searcher.doc(docSn); // 根据文档编号取出文档

      hightlightFeild(highlighter, analyzer, doc, "name");
      hightlightFeild(highlighter, analyzer, doc, "address");
      hightlightFeild(highlighter, analyzer, doc, "datatype");
      // scoreDocs[i].
      System.out.println("name:" + doc.get("name")); // new function
      System.out.println("address:" + doc.get("address"));
      System.out.println("datatype:" + doc.get("datatype"));
      System.out.println("geom:" + doc.get("geom"));
    }
  }

示例#18

0

显示文件

文件： LuceneUtils.java 项目： feicai/lucene_study

 /**
  * @Title: highlight @Description: 生成高亮文本
  *
  * @param document 索引文档对象
  * @param highlighter 高亮器
  * @param analyzer 索引分词器
  * @param field 高亮字段
  * @return
  * @throws IOException
  * @throws InvalidTokenOffsetsException
  */
 public static String highlight(
     Document document, Highlighter highlighter, Analyzer analyzer, String field)
     throws IOException {
   List<IndexableField> list = document.getFields();
   for (IndexableField fieldable : list) {
     String fieldValue = fieldable.stringValue();
     if (fieldable.name().equals(field)) {
       try {
         fieldValue = highlighter.getBestFragment(analyzer, field, fieldValue);
       } catch (InvalidTokenOffsetsException e) {
         fieldValue = fieldable.stringValue();
       }
       return (fieldValue == null || fieldValue.trim().length() == 0)
           ? fieldable.stringValue()
           : fieldValue;
     }
   }
   return null;
 }

示例#19

0

显示文件

文件： SearchTravRetHighlightTask.java 项目： Edwin-Ran/Lucene_src_learning

 @Override
 protected BenchmarkHighlighter getBenchmarkHighlighter(Query q) {
   highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
   highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
   return new BenchmarkHighlighter() {
     @Override
     public int doHighlight(
         IndexReader reader,
         int doc,
         String field,
         StoredDocument document,
         Analyzer analyzer,
         String text)
         throws Exception {
       TokenStream ts = TokenSources.getAnyTokenStream(reader, doc, field, document, analyzer);
       TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFrags);
       return frag != null ? frag.length : 0;
     }
   };
 }

示例#20

0

显示文件

文件： Searcher.java 项目： PCoronas90/AGIW

  public Search search(String queryString)
      throws ParseException, IOException, InvalidTokenOffsetsException {

    List<Result> finalRes = new ArrayList<Result>();
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
    Directory indexDir = FSDirectory.open(new File(pathOfIndex));

    Date inizio = new Date();
    IndexReader reader = DirectoryReader.open(indexDir);
    IndexSearcher indexSearcher = new IndexSearcher(reader);
    QueryParser queryParser = new QueryParser(Version.LUCENE_47, "words", analyzer);
    Query query = queryParser.parse(queryString);

    TopDocs hits = indexSearcher.search(query, reader.maxDoc());
    // TopDocs hitsSuggested= new TopDocs();

    int numberOfResults = hits.totalHits;
    Date fine = new Date();

    long time = fine.getTime() - inizio.getTime();
    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
    Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));

    // Estraggo gli snippet
    for (int i = 0; i < hits.totalHits; i++) {
      int id = hits.scoreDocs[i].doc;
      Document doc = indexSearcher.doc(id);
      String text = doc.get("words");

      // MoreLikeThis mlt = new MoreLikeThis(reader);

      // Query querySuggested = mlt.like(hits.scoreDocs[i].doc);
      // hitsSuggested = indexSearcher.search(querySuggested, reader.maxDoc());
      //
      TokenStream tokenStream =
          TokenSources.getAnyTokenStream(indexSearcher.getIndexReader(), id, "words", analyzer);
      TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 4);

      Result r = new Result();
      r.setTitle(doc.get("title"));
      String path = doc.get("path");
      r.setPath(path);
      r.setScore(hits.scoreDocs[i].score);
      String snippet = "";
      for (int j = 0; j < frag.length; j++) {
        if ((frag[j] != null) && (frag[j].getScore() > 0)) {
          snippet += frag[j].toString();
        }
      }

      String snippet1 = snippet.replace("<B>", "<b>");
      String snippetFinal = snippet1.replace("</B>", "</b>");
      r.setSnippet(snippetFinal);
      finalRes.add(r);
    }

    reader.close();
    String suggestedQuery = null;

    if (numberOfResults < minimumHits || hits.scoreDocs[0].score < minimumScore) {
      suggestedQuery = DidYouMean.suggest(queryString, indexDir);
    }

    Search searchRes = new Search(finalRes, time, queryString, suggestedQuery);
    return searchRes;
  }

示例#21

0

显示文件

文件： LuceneSearchProvider.java 项目： s147043656/jspwiki

  /**
   * Searches pages using a particular combination of flags.
   *
   * @param query The query to perform in Lucene query language
   * @param flags A set of flags
   * @return A Collection of SearchResult instances
   * @throws ProviderException if there is a problem with the backend
   */
  public Collection findPages(String query, int flags) throws ProviderException {
    IndexSearcher searcher = null;
    ArrayList<SearchResult> list = null;
    Highlighter highlighter = null;

    try {
      String[] queryfields = {
        LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS
      };
      QueryParser qp =
          new MultiFieldQueryParser(Version.LUCENE_36, queryfields, getLuceneAnalyzer());

      // QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
      Query luceneQuery = qp.parse(query);

      if ((flags & FLAG_CONTEXTS) != 0) {
        highlighter =
            new Highlighter(
                new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),
                new SimpleHTMLEncoder(),
                new QueryScorer(luceneQuery));
      }

      try {
        File dir = new File(m_luceneDirectory);
        Directory luceneDir = new SimpleFSDirectory(dir, null);
        IndexReader reader = IndexReader.open(luceneDir);
        searcher = new IndexSearcher(reader);
      } catch (Exception ex) {
        log.info("Lucene not yet ready; indexing not started", ex);
        return null;
      }

      ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs;

      list = new ArrayList<SearchResult>(hits.length);
      for (int curr = 0; curr < hits.length; curr++) {
        int docID = hits[curr].doc;
        Document doc = searcher.doc(docID);
        String pageName = doc.get(LUCENE_ID);
        WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);

        if (page != null) {
          if (page instanceof Attachment) {
            // Currently attachments don't look nice on the search-results page
            // When the search-results are cleaned up this can be enabled again.
          }

          int score = (int) (hits[curr].score * 100);

          // Get highlighted search contexts
          String text = doc.get(LUCENE_PAGE_CONTENTS);

          String[] fragments = new String[0];
          if (text != null && highlighter != null) {
            TokenStream tokenStream =
                getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text));
            fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);
          }

          SearchResult result = new SearchResultImpl(page, score, fragments);
          list.add(result);
        } else {
          log.error(
              "Lucene found a result page '"
                  + pageName
                  + "' that could not be loaded, removing from Lucene cache");
          pageRemoved(new WikiPage(m_engine, pageName));
        }
      }
    } catch (IOException e) {
      log.error("Failed during lucene search", e);
    } catch (ParseException e) {
      log.info("Broken query; cannot parse query ", e);

      throw new ProviderException(
          "You have entered a query Lucene cannot process: " + e.getMessage());
    } catch (InvalidTokenOffsetsException e) {
      log.error("Tokens are incompatible with provided text ", e);
    } finally {
      if (searcher != null) {
        try {
          searcher.close();
        } catch (IOException e) {
          log.error(e);
        }
      }
    }

    return list;
  }

示例#22

0

显示文件

文件： DefaultSolrHighlighter.java 项目： netboynb/search-core

  private void doHighlightingByHighlighter(
      Query query,
      SolrQueryRequest req,
      NamedList docSummaries,
      int docId,
      Document doc,
      String fieldName)
      throws IOException {
    final SolrIndexSearcher searcher = req.getSearcher();
    final IndexSchema schema = searcher.getSchema();

    // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) -
    // so we disable them until fixed (see LUCENE-3080)!
    // BEGIN: Hack
    final SchemaField schemaField = schema.getFieldOrNull(fieldName);
    if (schemaField != null
        && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField)
            || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField))) return;
    // END: Hack

    SolrParams params = req.getParams();
    IndexableField[] docFields = doc.getFields(fieldName);
    List<String> listFields = new ArrayList<String>();
    for (IndexableField field : docFields) {
      listFields.add(field.stringValue());
    }

    // preserve order of values in a multiValued list
    boolean preserveMulti = params.getFieldBool(fieldName, HighlightParams.PRESERVE_MULTI, false);

    String[] docTexts = (String[]) listFields.toArray(new String[listFields.size()]);

    // according to Document javadoc, doc.getValues() never returns null. check empty instead of
    // null
    if (docTexts.length == 0) return;

    TokenStream tstream = null;
    int numFragments = getMaxSnippets(fieldName, params);
    boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);

    String[] summaries = null;
    List<TextFragment> frags = new ArrayList<TextFragment>();

    TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
    TokenStream tvStream =
        TokenSources.getTokenStreamWithOffsets(searcher.getIndexReader(), docId, fieldName);
    if (tvStream != null) {
      tots = new TermOffsetsTokenStream(tvStream);
    }

    for (int j = 0; j < docTexts.length; j++) {
      if (tots != null) {
        // if we're using TermOffsets optimization, then get the next
        // field value's TokenStream (i.e. get field j's TokenStream) from tots:
        tstream = tots.getMultiValuedTokenStream(docTexts[j].length());
      } else {
        // fall back to analyzer
        tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
      }

      int maxCharsToAnalyze =
          params.getFieldInt(
              fieldName, HighlightParams.MAX_CHARS, Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);

      Highlighter highlighter;
      if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
        if (maxCharsToAnalyze < 0) {
          tstream = new CachingTokenFilter(tstream);
        } else {
          tstream = new CachingTokenFilter(new OffsetLimitTokenFilter(tstream, maxCharsToAnalyze));
        }

        // get highlighter
        highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream);

        // after highlighter initialization, reset tstream since construction of highlighter already
        // used it
        tstream.reset();
      } else {
        // use "the old way"
        highlighter = getHighlighter(query, fieldName, req);
      }

      if (maxCharsToAnalyze < 0) {
        highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
      } else {
        highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
      }

      try {
        TextFragment[] bestTextFragments =
            highlighter.getBestTextFragments(
                tstream, docTexts[j], mergeContiguousFragments, numFragments);
        for (int k = 0; k < bestTextFragments.length; k++) {
          if (preserveMulti) {
            if (bestTextFragments[k] != null) {
              frags.add(bestTextFragments[k]);
            }
          } else {
            if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
              frags.add(bestTextFragments[k]);
            }
          }
        }
      } catch (InvalidTokenOffsetsException e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
      }
    }
    // sort such that the fragments with the highest score come first
    if (!preserveMulti) {
      Collections.sort(
          frags,
          new Comparator<TextFragment>() {
            @Override
            public int compare(TextFragment arg0, TextFragment arg1) {
              return Math.round(arg1.getScore() - arg0.getScore());
            }
          });
    }

    // convert fragments back into text
    // TODO: we can include score and position information in output as snippet attributes
    if (frags.size() > 0) {
      ArrayList<String> fragTexts = new ArrayList<String>();
      for (TextFragment fragment : frags) {
        if (preserveMulti) {
          if (fragment != null) {
            fragTexts.add(fragment.toString());
          }
        } else {
          if ((fragment != null) && (fragment.getScore() > 0)) {
            fragTexts.add(fragment.toString());
          }
        }

        if (fragTexts.size() >= numFragments && !preserveMulti) break;
      }
      summaries = fragTexts.toArray(new String[0]);
      if (summaries.length > 0) docSummaries.add(fieldName, summaries);
    }
    // no summeries made, copy text from alternate field
    if (summaries == null || summaries.length == 0) {
      alternateField(docSummaries, params, doc, fieldName);
    }
  }

示例#23

0

显示文件

文件： DefaultSolrHighlighter.java 项目： ieure/lucene-solr-snapshot

  private void doHighlightingByHighlighter(
      Query query,
      SolrQueryRequest req,
      NamedList docSummaries,
      int docId,
      Document doc,
      String fieldName)
      throws IOException {
    SolrParams params = req.getParams();
    String[] docTexts = doc.getValues(fieldName);
    // according to Document javadoc, doc.getValues() never returns null. check empty instead of
    // null
    if (docTexts.length == 0) return;

    SolrIndexSearcher searcher = req.getSearcher();
    IndexSchema schema = searcher.getSchema();
    TokenStream tstream = null;
    int numFragments = getMaxSnippets(fieldName, params);
    boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);

    String[] summaries = null;
    List<TextFragment> frags = new ArrayList<TextFragment>();

    TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
    try {
      TokenStream tvStream =
          TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName);
      if (tvStream != null) {
        tots = new TermOffsetsTokenStream(tvStream);
      }
    } catch (IllegalArgumentException e) {
      // No problem. But we can't use TermOffsets optimization.
    }

    for (int j = 0; j < docTexts.length; j++) {
      if (tots != null) {
        // if we're using TermOffsets optimization, then get the next
        // field value's TokenStream (i.e. get field j's TokenStream) from tots:
        tstream = tots.getMultiValuedTokenStream(docTexts[j].length());
      } else {
        // fall back to analyzer
        tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
      }

      Highlighter highlighter;
      if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
        // TODO: this is not always necessary - eventually we would like to avoid this wrap
        //       when it is not needed.
        tstream = new CachingTokenFilter(tstream);

        // get highlighter
        highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream);

        // after highlighter initialization, reset tstream since construction of highlighter already
        // used it
        tstream.reset();
      } else {
        // use "the old way"
        highlighter = getHighlighter(query, fieldName, req);
      }

      int maxCharsToAnalyze =
          params.getFieldInt(
              fieldName, HighlightParams.MAX_CHARS, Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
      if (maxCharsToAnalyze < 0) {
        highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
      } else {
        highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
      }

      try {
        TextFragment[] bestTextFragments =
            highlighter.getBestTextFragments(
                tstream, docTexts[j], mergeContiguousFragments, numFragments);
        for (int k = 0; k < bestTextFragments.length; k++) {
          if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
            frags.add(bestTextFragments[k]);
          }
        }
      } catch (InvalidTokenOffsetsException e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
      }
    }
    // sort such that the fragments with the highest score come first
    Collections.sort(
        frags,
        new Comparator<TextFragment>() {
          public int compare(TextFragment arg0, TextFragment arg1) {
            return Math.round(arg1.getScore() - arg0.getScore());
          }
        });

    // convert fragments back into text
    // TODO: we can include score and position information in output as snippet attributes
    if (frags.size() > 0) {
      ArrayList<String> fragTexts = new ArrayList<String>();
      for (TextFragment fragment : frags) {
        if ((fragment != null) && (fragment.getScore() > 0)) {
          fragTexts.add(fragment.toString());
        }
        if (fragTexts.size() >= numFragments) break;
      }
      summaries = fragTexts.toArray(new String[0]);
      if (summaries.length > 0) docSummaries.add(fieldName, summaries);
    }
    // no summeries made, copy text from alternate field
    if (summaries == null || summaries.length == 0) {
      alternateField(docSummaries, params, doc, fieldName);
    }
  }

示例#24

0

显示文件

文件： IReader.java 项目： soma2012/somap311_29

  public Vector<Hashtable<String, String>> query(String s, String subject, String from, String to)
      throws CorruptIndexException, IOException, ParseException, InvalidTokenOffsetsException {
    WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_36);

    QueryParser parser = new QueryParser(Version.LUCENE_36, "segmented", analyzer);
    // s = Lucene.client.autoSegment(s);
    // System.out.println(s);
    String[] sprite = s.split(" ");

    ScoreDoc[] docs;

    String q = "";

    if (from.length() == 14 && to.length() == 14) {
      q += "post_date:[" + from + " TO " + to + "] AND ";
    } else if (from.length() == 14) {
      q += "post_date:[" + from + " TO " + "21000000000000" + "] AND ";
    } else if (to.length() == 14) {
      q += "post_date:[" + "20000000000000" + " TO " + to + "] AND ";
    }

    if (subject.length() == 0) q += s;
    else {
      if (!subject.equals("all")) q += s + " AND subject:" + subject;
      else q += s;
    }

    if (subject.length() == 0) docs = this.search(searcher, parser.parse(q));
    else docs = this.search(searcher, parser.parse(q + " AND sub_score:[0070 TO 1000]"));

    Vector<Hashtable<String, String>> result = new Vector<>();
    System.out.println(q);
    System.out.println(docs.length);
    SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
    Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(parser.parse(s)));

    // id score search

    for (int i = 0; i < 100 && i < docs.length; ++i) {

      Document doc = searcher.doc(docs[i].doc);

      String docResult = "";

      Hashtable<String, String> ret = new Hashtable<>();

      // docResult +="doc="+docs[i].doc+" score="+docs[i].score + "\n";

      TokenStream Stream =
          TokenSources.getAnyTokenStream(
              searcher.getIndexReader(), docs[i].doc, "post_content", analyzer);
      TextFragment[] frag;

      String content = doc.get("post_content");
      for (String tmp : sprite) {
        content = content.replaceAll(tmp, " " + tmp + " ");
      }

      // System.out.println(content);
      ////////// 주석

      docResult += highlighter.getBestFragment(analyzer, "", content);

      /*
      frag = highlighter.getBestTextFragments(Stream, content, false, 100);

            for (int j = 0; j < frag.length; j++) {
                if ((frag[j] != null) && (frag[j].getScore() > 0)) {
                  docResult+=(frag[j].toString());
                }
              }

           */

      docResult = docResult.replaceAll(" (<B>.*?</B>) ", "$1");
      // System.out.println(docResult);
      try {
        // System.out.println(safe_get(doc,"sub_score"));
        // ret.put("id", safe_get(doc,"id"));
        ret.put("content", docResult);
        ret.put("url", safe_get(doc, "url"));
        ret.put("date", safe_get(doc, "post_date"));
        ret.put("id", safe_get(doc, "post_title"));
        ret.put("sub_score", safe_get(doc, "sub_score"));
        ret.put("subject", safe_get(doc, "subject"));
        ret.put("score", Float.toString(docs[i].score));
        result.add(ret);
      } catch (Exception err) {
        result.add(ret);
        continue;
      }
    }

    return result;
  }

示例#25

0

显示文件

文件： AdvanceGetSearchResult.java 项目： jwdstef/addison-jweb

  /**
   * @param currentPageNum
   * @return
   * @throws Exception
   */
  public List<DocumentEntity> getResult(int currentPageNum) throws Exception {

    List<DocumentEntity> list = new ArrayList<DocumentEntity>();
    directory = FSDirectory.open(new File(SystemConstant.indexDir));
    reader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(reader);

    // 高亮显示设置
    Highlighter highlighter = null;

    SimpleHTMLFormatter simpleHTMLFormatter =
        new SimpleHTMLFormatter("<font color='red'><b>", "</b></font>");

    Highlighter highlighterTitle = null;

    SimpleHTMLFormatter formatTitle = new SimpleHTMLFormatter("<FONT color=#c60a00>", "</FONT>");

    ScoreDoc[] hits = this.getScoreDocs();
    Query query = this.getQuery();
    highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
    highlighterTitle = new Highlighter(formatTitle, new QueryScorer(query));
    highlighter.setTextFragmenter(new SimpleFragmenter(200)); // 这个200是指定关键字字符串的context
    // 的长度，你可以自己设定，因为不可能返回整篇正文内容
    Document doc;
    String fileName = "";
    int totalNumber = currentPageNum * eachePageNum;
    if (totalNumber > hits.length) totalNumber = hits.length;
    for (int i = (currentPageNum - 1) * eachePageNum; i < totalNumber; i++) {

      // 打印文档的内容
      doc = searcher.doc(hits[i].doc);
      // System.out.println(doc.toString());
      // if(this.docType.equals(doc.get("type"))){
      // 高亮出显示
      DocumentEntity docEntity = new DocumentEntity();
      TokenStream tokenStream =
          new PaodingAnalyzer().tokenStream("contents", new StringReader(doc.get("contents")));
      docEntity.setContents(highlighter.getBestFragment(tokenStream, doc.get("contents")));
      // System.out.println("----------"+i+"----------");
      // System.out.println(docEntity.getContents());
      fileName = doc.get("fileName");
      tokenStream = new PaodingAnalyzer().tokenStream("fileName", new StringReader(fileName));
      // 需要注意：在处理时如果文本检索结果中不包含对应的关键字返回一个null
      String forMatt = highlighterTitle.getBestFragment(tokenStream, fileName);
      if (forMatt == null) docEntity.setFilename(fileName);
      else docEntity.setFilename(forMatt);

      String type1 = doc.get("type");
      docEntity.setType(type1);
      docEntity.setId(doc.get("id"));
      if ("pdf".equalsIgnoreCase(type1)) {
        fileName = SystemConstant.CONTEXT + SystemConstant.PDFdir + fileName + "." + type1;
        docEntity.setOriginalFileName(fileName);
      } else if ("doc".equalsIgnoreCase(type1)) {
        fileName = SystemConstant.CONTEXT + SystemConstant.Docdir + fileName + "." + type1;
        docEntity.setOriginalFileName(fileName);
      }
      // System.out.println(docEntity.getType());

      list.add(docEntity);
      // System.out.println(docEntity.getFilename());
      // System.out.println("--------------------"+doc.get("fileName"));

      // }//end for if
      // else continue;
    } // end for
    return list;
  }

示例#26

0

显示文件

文件： ContextGenerator.java 项目： cbdr/WikiToolbox

  public String getContext(String sample) throws IOException {
    String result = "";
    try {
      String highlight_query_str = cfg.searchField + ":" + cfg.quotes + sample + cfg.quotes;
      String query_str = "padded_length:[" + String.format("%09d", cfg.minDocLen) + " TO *]";
      if (cfg.enableTitleSearch) {
        query_str +=
            " AND (title:"
                + cfg.quotes
                + sample
                + cfg.quotes
                + " OR "
                + cfg.searchField
                + ":"
                + cfg.quotes
                + sample
                + cfg.quotes
                + ")";
      } else {
        query_str += " AND (" + cfg.searchField + ":" + cfg.quotes + sample + cfg.quotes + ")";
      }

      Query query = parser.parse(query_str);
      Query highlight_query = parser.parse(highlight_query_str);

      if (cfg.debug == true) System.out.println("Searching (" + query + ").....");
      TopDocs topDocs = searcher.search(query, cfg.maxHits != 0 ? cfg.maxHits : Integer.MAX_VALUE);
      if (topDocs.totalHits > 0) {
        ScoreDoc[] hits = topDocs.scoreDocs;
        if (cfg.debug == true) System.out.println("Results (" + hits.length + ") :)");
        String data;
        int indx;
        SimpleHTMLFormatter htmlFormatter = null;
        Highlighter highlighter = null;
        if (cfg.displayHighlights) {
          htmlFormatter = new SimpleHTMLFormatter();
          highlighter = new Highlighter(htmlFormatter, new QueryScorer(highlight_query));
        }
        for (int i = 0; i < hits.length; i++) {
          if (cfg.displayDID) {
            result += String.format("\t%d", hits[i].doc);
          }
          if (cfg.displayScore) {
            result += String.format("\t%f", hits[i].score);
          }
          if (cfg.displayLen) {
            result += "\t" + indexReader.document(hits[i].doc).getField("length").stringValue();
          }
          if (cfg.displayTitle) {
            data = indexReader.document(hits[i].doc).getField("title").stringValue();
            if (cfg.removeParen && (indx = data.indexOf(" (")) != -1)
              data =
                  indexReader
                      .document(hits[i].doc)
                      .getField("title")
                      .stringValue()
                      .substring(0, indx);
            result += "\t" + data;
          }
          if (cfg.displayTxt || cfg.displayHighlights) {
            String text = indexReader.document(hits[i].doc).getField("text").stringValue();
            if (cfg.displayTxt) result += "\t" + text;
            if (cfg.displayHighlights) {
              TokenStream tokenStream =
                  TokenSources.getAnyTokenStream(
                      searcher.getIndexReader(), hits[i].doc, "text", stdAnalyzer);
              TextFragment[] frag;
              try {
                frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
                for (int j = 0; j < frag.length; j++) {
                  if ((frag[j] != null) && (frag[j].getScore() > 0)) {
                    result += "\t" + (frag[j].toString());
                  }
                }
              } catch (InvalidTokenOffsetsException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
              } // highlighter.getBestFragments(tokenStream, text, 3, "...");
            }
          }
          if (cfg.displayCategories) {
            IndexableField categories[] = indexReader.document(hits[i].doc).getFields("category");
            for (int j = 0;
                j < categories.length && (cfg.numCategories == 0 || j < cfg.numCategories);
                j++) {
              result += "\t" + categories[j].stringValue();
            }
          }

          result += System.lineSeparator() + System.lineSeparator() + System.lineSeparator();
        }
      } else if (cfg.debug == true) System.out.println("No results found :(");
    } catch (ParseException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } //

    return result;
  }

示例#27

0

显示文件

文件： ExtractKeywordAndSummary.java 项目： mingchuan/ewcms

  @SuppressWarnings("resource")
  public static String getTextAbstract(String title, String content) {
    try {
      content = getTextFromHtml(content);
      org.apache.lucene.search.Query q = IKQueryParser.parse("CONTENT", title);
      SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("", "");
      Highlighter highlighter = new Highlighter(formatter, new QueryScorer(q));
      highlighter.setTextFragmenter(new SimpleFragmenter(200));
      org.apache.lucene.analysis.TokenStream tokenStream =
          (new IKAnalyzer()).tokenStream("CONTENT", new StringReader(content));
      String tmp = highlighter.getBestFragment(tokenStream, content);
      if (HtmlStringUtil.isNotEmpty(tmp)) content = tmp.trim();
    } catch (Exception e) {
      e.printStackTrace();
    }
    int start = 0;
    int end = 0;
    boolean startFlag = true;
    for (int i = 0; i < content.length(); i++) {
      char c = content.charAt(i);
      if (startFlag) {
        if (Character.isWhitespace(c)
            || Character.isISOControl(c)
            || c == ','
            || c == '，'
            || c == '”'
            || c == '’'
            || c == '.'
            || c == '。'
            || c == '>'
            || c == '?'
            || c == '？'
            || c == ' '
            || c == '　'
            || c == ' '
            || c == '!'
            || c == '！'
            || c == ';'
            || c == '；'
            || c == ':'
            || c == '：'
            || c == ']'
            || c == '］') continue;
        start = i;
        startFlag = false;
      }
      if (!startFlag)
        if (c == '.' || c == '。' || c == '?' || c == '？' || c == '!' || c == '！' || c == ' '
            || c == '　' || c == ' ') {
          if (i < 8) start = i + 1;
          end = i;
          if (i != content.length() - 1
              && (content.charAt(i + 1) == '”' || content.charAt(i + 1) == '’')) end = i + 1;
        } else {
          if ((c == ',' || c == '，' || c == '>' || c == '》' || c == '、') && i < 2) start = i + 1;
          if (c == '’' || c == '”')
            if (i != content.length() - 1) {
              char next = content.charAt(i + 1);
              if (next != ',' && next == '，' && next == '、' && next == ';' && next == '；')
                end = i + 1;
            } else {
              end = i;
            }
        }
    }

    if (end != 0 && end > start) {
      content = content.substring(start, end + 1).trim();
      start = 0;
      for (int i = 0; i < content.length(); i++) {
        char c = content.charAt(i);
        if ((c == '.' || c == '。' || c == '?' || c == '？' || c == '!' || c == '！' || c == ' '
                || c == '　' || c == ' ')
            && i < 8) start = i + 1;
      }

      if (start != 0) content = content.substring(start);
      end = 0;
      if (HtmlStringUtil.isNotEmpty(content)) {
        char c = content.charAt(content.length() - 1);
        if (c != '.' && c != '。' && c != '?' && c != '？' && c != '!' && c != '！') {
          for (int i = content.length() - 2; i > 0; i--) {
            c = content.charAt(i);
            if (c != ';' && c != '；' && c != ',' && c != '，' && c != '>' && c != '》') continue;
            end = i;
            break;
          }
        }
      }
      if (end != 0) content = content.substring(0, end);
    }
    return content;
  }

示例#28

0

显示文件

文件： SearchService.java 项目： politrend/vta

  /**
   * Serch method
   *
   * @param f
   * @param luquery
   * @param entities
   * @return
   * @throws Exception
   */
  @Transactional(propagation = Propagation.REQUIRED)
  public List<SearchResultWH> searchWH(
      String[] f,
      String luquery,
      String hgQuery,
      SortField[] sortFields,
      int firstResult,
      int maxResults,
      Class<?>... entities)
      throws Exception {

    // create FullTextEntityManager ----------------------------
    FullTextEntityManager fullTextEntityManager =
        org.hibernate.search.jpa.Search.getFullTextEntityManager(entityManager);

    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
    // ---------------------------------------------------------

    QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_31, f, analyzer);

    org.apache.lucene.search.Query query = parser.parse(luquery.trim());

    System.out.println("QUERY: " + query + " entitys size:" + entities.length);

    // wrap Lucene query in a javax.persistence.Query
    // javax.persistence.Query persistenceQuery = fullTextEntityManager
    //		.createFullTextQuery(query, entities);

    org.hibernate.search.jpa.FullTextQuery persistenceQuery =
        fullTextEntityManager.createFullTextQuery(query, entities);

    // org.apache.lucene.search.Sort sort = new Sort(
    // new SortField("title", SortField.STRING));

    if (sortFields != null && sortFields.length > 0) {
      persistenceQuery.setSort(new Sort(sortFields));
      System.out.println("Sort setted");
    }

    if (firstResult >= 0) {
      persistenceQuery.setFirstResult(firstResult);
      persistenceQuery.setMaxResults(maxResults);
    }

    // execute search
    @SuppressWarnings("unchecked")
    List<Object> result = persistenceQuery.getResultList();

    // ---------------------------------------------------------------

    QueryScorer qs = null;

    if (hgQuery != null && hgQuery.trim().length() > 0) { // Если запрос для
      // подсветки
      // специфичный
      qs = new QueryScorer(parser.parse(hgQuery.trim()));
    } else {
      qs = new QueryScorer(query);
    }

    Highlighter highlighter =
        new Highlighter(
            new Formatter() {

              String preTag = "<font color=\"red\"><b>";
              String postTag = "</b></font>";

              @Override
              public String highlightTerm(String originalText, TokenGroup tokenGroup) {
                StringBuffer returnBuffer;
                if (tokenGroup.getTotalScore() > 0) {
                  returnBuffer = new StringBuffer();
                  returnBuffer.append(preTag);
                  returnBuffer.append(originalText);
                  returnBuffer.append(postTag);
                  return returnBuffer.toString();
                }
                return originalText;
              }
            },
            qs);

    highlighter.setTextFragmenter(new SimpleFragmenter(150));

    // ---------------------------------------------------------------
    List<SearchResultWH> listWH = new ArrayList<SearchResultWH>();
    for (Object o : result) {
      if (o instanceof Hotel) {
        Hotel h = (Hotel) o;
        h.getResortTypes().size(); // LAZY load
        listWH.add(makeHW(highlighter, analyzer, h));
      } else if (o instanceof ResortType) {
        listWH.add(makeHW(highlighter, analyzer, (ResortType) o));
      } else if (o instanceof PublicImage) {
        listWH.add(makeHW(highlighter, analyzer, (PublicImage) o));
      } else if (o instanceof FileResourse) {
        listWH.add(makeHW(highlighter, analyzer, (FileResourse) o));
      } else if (o instanceof News) {
        listWH.add(makeHW(highlighter, analyzer, (News) o));
      } else if (o instanceof Announce) {
        listWH.add(makeHW(highlighter, analyzer, (Announce) o));
      } else if (o instanceof Location) {
        listWH.add(makeHW(highlighter, analyzer, (Location) o));
      } else if (o instanceof LType) {
        listWH.add(makeHW(highlighter, analyzer, (LType) o));
      } else if (o instanceof AnyText) {
        listWH.add(makeHW(highlighter, analyzer, (AnyText) o));
      } else if (o instanceof Visa) {
        listWH.add(makeHW(highlighter, analyzer, (Visa) o));
      }
    }
    return listWH;
  }