Пример #1
0
  /**
   * Annoncements adapter
   *
   * @param hg
   * @param a
   * @param ann
   * @return
   * @throws Exception
   */
  public SearchResultWH makeHW(Highlighter hg, Analyzer a, Announce ann) throws Exception {
    String s = "";

    {
      String text = ann.getITopDescription() + "";

      TokenStream tokenStream = a.tokenStream("topdescription", new StringReader(text));
      s +=
          cP(
              "Совпадения в заголовке объявления",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));
    }

    {
      String text = ann.getIDescription() + "";

      TokenStream tokenStream = a.tokenStream("description", new StringReader(text));
      s +=
          cP(
              "Совпадения в тексте объявления",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));
    }

    String metatexts = "";

    {
      String text = ann.getMeta_keywords() + "";

      TokenStream tokenStream = a.tokenStream("meta_keywords", new StringReader(text));
      metatexts +=
          cPmeta(
              "Совпадения в keywords",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));

      text = ann.getMeta_description() + "";

      tokenStream = a.tokenStream("meta_description", new StringReader(text));
      metatexts +=
          cPmeta(
              "Совпадения в description",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));

      text = ann.getMeta_subject() + "";

      tokenStream = a.tokenStream("meta_subject", new StringReader(text));
      metatexts +=
          cPmeta(
              "Совпадения в subject",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));
    }

    SearchResultWH swh = new SearchResultWH(ann, "Announce", s, metatexts);
    return swh;
  }
Пример #2
0
  @Override
  public String getSnippet(
      Query query,
      String field,
      String s,
      int maxNumFragments,
      int fragmentLength,
      String fragmentSuffix,
      Formatter formatter)
      throws IOException {

    QueryScorer queryScorer = new QueryScorer(query, field);

    Highlighter highlighter = new Highlighter(formatter, queryScorer);

    highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength));

    TokenStream tokenStream = getAnalyzer().tokenStream(field, new UnsyncStringReader(s));

    try {
      String snippet =
          highlighter.getBestFragments(tokenStream, s, maxNumFragments, fragmentSuffix);

      if (Validator.isNotNull(snippet)
          && !StringUtil.endsWith(snippet, fragmentSuffix)
          && !s.equals(snippet)) {

        snippet = snippet.concat(fragmentSuffix);
      }

      return snippet;
    } catch (InvalidTokenOffsetsException itoe) {
      throw new IOException(itoe);
    }
  }
Пример #3
0
  /**
   * Visa adapter
   *
   * @param hg
   * @param a
   * @param ann
   * @return
   * @throws Exception
   */
  public SearchResultWH makeHW(Highlighter hg, Analyzer a, Visa v) throws Exception {
    String s = "";
    String text = v.getIDescription() + "";

    TokenStream tokenStream = a.tokenStream("description", new StringReader(text));
    s +=
        cP(
            "Совпадения в описании",
            hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));

    String metatexts = "";

    {
      text = v.getMeta_keywords() + "";

      tokenStream = a.tokenStream("meta_keywords", new StringReader(text));
      metatexts +=
          cPmeta(
              "Совпадения в keywords",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));

      text = v.getMeta_description() + "";

      tokenStream = a.tokenStream("meta_description", new StringReader(text));
      metatexts +=
          cPmeta(
              "Совпадения в description",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));

      text = v.getMeta_subject() + "";

      tokenStream = a.tokenStream("meta_subject", new StringReader(text));
      metatexts +=
          cPmeta(
              "Совпадения в subject",
              hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));
    }

    SearchResultWH swh = new SearchResultWH(v, "Visa", s, metatexts);
    return swh;
  }
Пример #4
0
  /**
   * Public image adapter
   *
   * @param hg
   * @param a
   * @param pi
   * @return
   * @throws Exception
   */
  public SearchResultWH makeHW(Highlighter hg, Analyzer a, PublicImage pi) throws Exception {
    String text = pi.getDescription() + "";

    TokenStream tokenStream = a.tokenStream("description", new StringReader(text));
    String s =
        cP(
            "Совпадения в описании",
            hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));

    SearchResultWH swh = new SearchResultWH(pi, "PublicImage", s);
    return swh;
  }
Пример #5
0
  /**
   * Any text adapter
   *
   * @param hg
   * @param a
   * @param ann
   * @return
   * @throws Exception
   */
  public SearchResultWH makeHW(Highlighter hg, Analyzer a, AnyText t) throws Exception {
    String s = "";
    String text = t.getIDescription() + "";

    TokenStream tokenStream = a.tokenStream("anytext", new StringReader(text));
    s +=
        cP(
            "Совпадения в тексте",
            hg.getBestFragments(tokenStream, text, MAX_NUM_FRAGMENTS_REQUIRED, "... "));

    SearchResultWH swh = new SearchResultWH(t, "AnyText", s);
    return swh;
  }
Пример #6
0
 protected String retrieveResultSummary(
     Document document, Highlighter highlighter, StandardAnalyzer analyzer)
     throws InvalidTokenOffsetsException, IOException {
   String content = document.get(FIELD_TOPIC_CONTENT);
   TokenStream tokenStream = analyzer.tokenStream(FIELD_TOPIC_CONTENT, new StringReader(content));
   String summary = highlighter.getBestFragments(tokenStream, content, 3, "...");
   if (StringUtils.isBlank(summary) && !StringUtils.isBlank(content)) {
     summary = StringEscapeUtils.escapeHtml(content.substring(0, Math.min(200, content.length())));
     if (Math.min(200, content.length()) == 200) {
       summary += "...";
     }
   }
   return summary;
 }
Пример #7
0
 public void heighlight(String field, String searchText) {
   String text =
       "In this section we'll show you how to make the simplest "
           + "programmatic query, searching for a single term, and then "
           + "we'll see how to use QueryParser to accept textual queries. "
           + "In the sections that follow, we’ll take this simple example "
           + "further by detailing all the query types built into Lucene. "
           + "We begin with the simplest search of all: searching for all "
           + "documents that contain a single term.";
   Analyzer analyzer = new StandardAnalyzer();
   QueryParser queryParser = new QueryParser(field, analyzer);
   try {
     Query query = queryParser.parse(searchText);
     SimpleHTMLFormatter formatter =
         new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
     TokenStream tokens = analyzer.tokenStream("f", new StringReader(text));
     QueryScorer scorer = new QueryScorer(query, "f");
     Highlighter highlighter = new Highlighter(formatter, scorer);
     highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
     String result = highlighter.getBestFragments(tokens, text, 3, "...");
     FileWriter writer = new FileWriter("/home/venugopal/Downloads/Lucene/lia/highter.html"); // #8
     writer.write("<html>"); // #8
     writer.write(
         "<style>\n"
             + // #8
             ".highlight {\n"
             + // #8
             " background: yellow;\n"
             + // #8
             "}\n"
             + // #8
             "</style>"); // #8
     writer.write("<body>"); // #8
     writer.write(result); // #8
     writer.write("</body></html>"); // #8
     writer.close();
   } catch (ParseException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   } catch (IOException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   } catch (InvalidTokenOffsetsException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   }
 }
Пример #8
0
  /**
   * Searches pages using a particular combination of flags.
   *
   * @param query The query to perform in Lucene query language
   * @param flags A set of flags
   * @return A Collection of SearchResult instances
   * @throws ProviderException if there is a problem with the backend
   */
  public Collection findPages(String query, int flags) throws ProviderException {
    IndexSearcher searcher = null;
    ArrayList<SearchResult> list = null;
    Highlighter highlighter = null;

    try {
      String[] queryfields = {
        LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS
      };
      QueryParser qp =
          new MultiFieldQueryParser(Version.LUCENE_36, queryfields, getLuceneAnalyzer());

      // QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
      Query luceneQuery = qp.parse(query);

      if ((flags & FLAG_CONTEXTS) != 0) {
        highlighter =
            new Highlighter(
                new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),
                new SimpleHTMLEncoder(),
                new QueryScorer(luceneQuery));
      }

      try {
        File dir = new File(m_luceneDirectory);
        Directory luceneDir = new SimpleFSDirectory(dir, null);
        IndexReader reader = IndexReader.open(luceneDir);
        searcher = new IndexSearcher(reader);
      } catch (Exception ex) {
        log.info("Lucene not yet ready; indexing not started", ex);
        return null;
      }

      ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs;

      list = new ArrayList<SearchResult>(hits.length);
      for (int curr = 0; curr < hits.length; curr++) {
        int docID = hits[curr].doc;
        Document doc = searcher.doc(docID);
        String pageName = doc.get(LUCENE_ID);
        WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);

        if (page != null) {
          if (page instanceof Attachment) {
            // Currently attachments don't look nice on the search-results page
            // When the search-results are cleaned up this can be enabled again.
          }

          int score = (int) (hits[curr].score * 100);

          // Get highlighted search contexts
          String text = doc.get(LUCENE_PAGE_CONTENTS);

          String[] fragments = new String[0];
          if (text != null && highlighter != null) {
            TokenStream tokenStream =
                getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text));
            fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);
          }

          SearchResult result = new SearchResultImpl(page, score, fragments);
          list.add(result);
        } else {
          log.error(
              "Lucene found a result page '"
                  + pageName
                  + "' that could not be loaded, removing from Lucene cache");
          pageRemoved(new WikiPage(m_engine, pageName));
        }
      }
    } catch (IOException e) {
      log.error("Failed during lucene search", e);
    } catch (ParseException e) {
      log.info("Broken query; cannot parse query ", e);

      throw new ProviderException(
          "You have entered a query Lucene cannot process: " + e.getMessage());
    } catch (InvalidTokenOffsetsException e) {
      log.error("Tokens are incompatible with provided text ", e);
    } finally {
      if (searcher != null) {
        try {
          searcher.close();
        } catch (IOException e) {
          log.error(e);
        }
      }
    }

    return list;
  }