private QueryExpression create(Request request, ResourceDefinition resourceDefinition)
      throws InvalidQueryException {
    String queryString;
    if (request.getCardinality() == Request.Cardinality.INSTANCE) {
      String idPropertyName = resourceDefinition.getIdPropertyName();
      queryString =
          String.format("%s:%s", idPropertyName, request.<String>getProperty(idPropertyName));
    } else {
      queryString = request.getQueryString();
    }

    QueryExpression queryExpression;
    if (queryString != null && !queryString.isEmpty()) {
      QueryParser queryParser = new QueryParser(Version.LUCENE_48, "name", new KeywordAnalyzer());
      queryParser.setLowercaseExpandedTerms(false);
      queryParser.setAllowLeadingWildcard(true);
      Query query;
      try {
        query = queryParser.parse((String) escape(queryString));
      } catch (ParseException e) {
        throw new InvalidQueryException(e.getMessage());
      }
      LOG.info("LuceneQuery: " + query);
      queryExpression = create(query, resourceDefinition);
    } else {
      queryExpression = new AlwaysQueryExpression();
    }
    // add query properties to request so that they are returned
    request.addAdditionalSelectProperties(queryExpression.getProperties());
    return queryExpression;
  }
Пример #2
0
  /**
   * give the id list of sentences, from Lucene index
   *
   * @param input input word
   * @param catalogName catalog (domain) name which we'd like to search in
   * @param limit how many hits are needed (0 means all)
   */
  public List<String> query(String input, String catalogName, int limit) {

    List<String> res = new ArrayList<String>();
    try {

      catalog c = catalogs.get(catalogName);
      IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(c.indexPath)));
      IndexSearcher searcher = new IndexSearcher(reader);

      QueryParser parser = new QueryParser("contents", analyzer);
      Query query = parser.parse(QueryParser.escape(input));

      int n = limit > 0 ? limit : searcher.count(query);
      if (n == 0) n = 1;
      TopDocs results = searcher.search(query, n);

      int endPos = limit;
      if (limit != 0) endPos = Math.min(results.totalHits, limit); // 1st n hits
      else endPos = results.totalHits; // all hits

      for (int i = 0; i < endPos; i++) {
        int id = results.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        res.add(doc.get("filename"));
      }
      reader.close();
      return res;

    } catch (ParseException e) {
      log(e.getMessage());
    } catch (IOException e) {
      log(e.getMessage());
    }
    return res;
  }
  public void search(
      String searchTerm, Set<Integer> categories, Set<String> languages, String sortBy) {
    if (searcher == null) {
      System.err.println("Searching before searcher was initialized.");
      return;
    }

    // fetch from cache if searchterm is null
    if (searchTerm == null) searchTerm = lastSearchTerm;
    lastCategories = categories;
    lastLanguages = languages;
    lastSearchTerm = searchTerm;
    lastSortBy = sortBy;

    try {
      lastResults = searcher.search(searchTerm, categories, languages);
      lastResults = sortResults(lastResults, sortBy);

      for (SearcherListener listener : listListeners) {
        listener.onSearch(lastResults);
      }
    } catch (ParseException e) {
      Alert.alert(MessageType.WARNING, "The search string is not valid.", window);
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
Пример #4
0
  /** This function is only for test search. */
  public static List<String> searchQuery(
      String indexDir, String queryString, int numResults, CharArraySet stopwords) {
    String field = "contents";
    List<String> hitPaths = new ArrayList<String>();

    try {
      IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir)));
      IndexSearcher searcher = new IndexSearcher(reader);

      Analyzer analyzer = new MyAnalyzer(Version.LUCENE_44, stopwords);

      QueryParser parser = new QueryParser(Version.LUCENE_44, field, analyzer);
      Query query;
      query = parser.parse(QueryParser.escape(queryString));

      TopDocs results = searcher.search(query, null, numResults);
      for (ScoreDoc hit : results.scoreDocs) {
        String path = searcher.doc(hit.doc).get("path");
        hitPaths.add(path.substring(0, path.length() - 4)); // chop off the file extension (".txt")
      }
    } catch (IOException e) {
      System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    } catch (ParseException e) {
      System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }

    return hitPaths;
  }
Пример #5
0
  //	private Analyzer analyzer = new StandardAnalyzer(matchVersion);
  @Test
  public void test01() {
    Directory directory = null;
    IndexWriter indexWriter = null;
    IndexReader indexReader = null;
    IndexWriterConfig config = new IndexWriterConfig(matchVersion, analyzer);
    try {
      directory = new RAMDirectory();
      indexWriter = new IndexWriter(directory, config);

      Document document = new Document();
      document.add(new TextField("content", "or good", Store.YES));

      indexWriter.addDocument(document);

      indexWriter.commit();

      indexReader = DirectoryReader.open(directory);
      IndexSearcher indexSearcher = new IndexSearcher(indexReader);
      QueryParser parser = new QueryParser(matchVersion, "content", analyzer);
      Query query = parser.parse("excellent");

      TopDocs topDocs = indexSearcher.search(query, 100);
      for (ScoreDoc match : topDocs.scoreDocs) {
        Document matchDoc = indexSearcher.doc(match.doc);
        System.out.println("result: " + matchDoc.get("content"));
      }
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (ParseException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      try {
        directory.close();
      } catch (IOException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
      }
      try {
        indexWriter.close();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
      try {
        indexReader.close();
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
  }
Пример #6
0
 public void heighlight(String field, String searchText) {
   String text =
       "In this section we'll show you how to make the simplest "
           + "programmatic query, searching for a single term, and then "
           + "we'll see how to use QueryParser to accept textual queries. "
           + "In the sections that follow, we’ll take this simple example "
           + "further by detailing all the query types built into Lucene. "
           + "We begin with the simplest search of all: searching for all "
           + "documents that contain a single term.";
   Analyzer analyzer = new StandardAnalyzer();
   QueryParser queryParser = new QueryParser(field, analyzer);
   try {
     Query query = queryParser.parse(searchText);
     SimpleHTMLFormatter formatter =
         new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>");
     TokenStream tokens = analyzer.tokenStream("f", new StringReader(text));
     QueryScorer scorer = new QueryScorer(query, "f");
     Highlighter highlighter = new Highlighter(formatter, scorer);
     highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
     String result = highlighter.getBestFragments(tokens, text, 3, "...");
     FileWriter writer = new FileWriter("/home/venugopal/Downloads/Lucene/lia/highter.html"); // #8
     writer.write("<html>"); // #8
     writer.write(
         "<style>\n"
             + // #8
             ".highlight {\n"
             + // #8
             " background: yellow;\n"
             + // #8
             "}\n"
             + // #8
             "</style>"); // #8
     writer.write("<body>"); // #8
     writer.write(result); // #8
     writer.write("</body></html>"); // #8
     writer.close();
   } catch (ParseException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   } catch (IOException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   } catch (InvalidTokenOffsetsException e) {
     // TODO Auto-generated catch block
     e.printStackTrace();
   }
 }
  /** This function is only for test search. */
  public static List<String> searchQuery(
      String indexDir, String queryString, int numResults, CharArraySet stopwords) {
    String field = "contents";
    List<String> hitPaths = new ArrayList<String>();

    try {
      IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir)));
      IndexSearcher searcher = new IndexSearcher(reader);

      // Used with mySimilarity.java for part E
      mySimilarity sim = new mySimilarity();
      searcher.setSimilarity(sim);

      // Used in part F
      // String field="Search_Field";
      // the second parameter calls the getAvgLength method and automatically calculates the
      // Average Length value
      // BM25Parameters.setAverageLength(field,getAvgLen(reader,field));
      // BM25Parameters.setB(0.75f);
      // BM25Parameters.setK1(2f);
      // BM25BooleanQuery query = new BM25BooleanQuery( "Cystic hydroma" ,field,analyzer);
      // System.out.println ("Searching for: " + query.toString(field));
      // TopDocs top=searcher.search(query, 10);
      // ScoreDoc[] docs = top.scoreDocs;
      // for (int i = 0; i < 10; i++){
      // System.out.println("the document with id= " + docs[i].doc + " has score ="+docs[i].score);
      // }

      Analyzer analyzer = new MyAnalyzer(Version.LUCENE_44, stopwords);

      QueryParser parser = new QueryParser(Version.LUCENE_44, field, analyzer);
      Query query;
      query = parser.parse(QueryParser.escape(queryString));

      TopDocs results = searcher.search(query, null, numResults);
      for (ScoreDoc hit : results.scoreDocs) {
        String path = searcher.doc(hit.doc).get("path");
        hitPaths.add(path.substring(0, path.length() - 4)); // chop off the file extension (".txt")
      }
    } catch (IOException e) {
      System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    } catch (ParseException e) {
      System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }

    return hitPaths;
  }
Пример #8
0
  public String giveHelp(String[] words) {
    // TODO Add here for example credits, copyright, license

    if (words.length <= 1) {
      IntroHelp.print(stdout);
      return "";
    }

    if (!indexAvailable()) {
      return "";
    }

    Analyzer multiFieldAnalyzer = Onthology.multiFieldAnalyzer();

    try {
      String searchFields[] = {"index", "synopsis", "doc"};

      QueryParser parser = new MultiFieldQueryParser(searchFields, multiFieldAnalyzer);

      StringBuilder sb = new StringBuilder();
      for (int i = 1; i < words.length; i++) {
        sb.append(" ").append(escapeForQuery(words[i]));
      }
      Query query;
      try {
        query = parser.parse(sb.toString());
      } catch (ParseException e) {
        stderr.println("Cannot parse query: " + sb + ", " + e.getMessage());
        return "";
      }

      if (words[0].equals("help")) {
        return reportHelp(words, indexSearcher.search(query, maxSearch).scoreDocs);
      } else {
        return reportApropos(words, indexSearcher.search(query, maxSearch).scoreDocs);
      }
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
    return "";
  }
Пример #9
0
  public SearchResult[] searchIndex(String queryString) {
    String field = "contents";

    IndexReader reader = null;
    List<SearchResult> searchResults = new ArrayList<SearchResult>();
    try {
      reader = DirectoryReader.open(dir);
      IndexSearcher searcher = new IndexSearcher(reader);
      Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);

      QueryParser parser = new QueryParser(Version.LUCENE_44, field, analyzer);
      Query query = parser.parse(queryString);
      TopDocs results = searcher.search(query, null, 100);
      ScoreDoc[] hits = results.scoreDocs;
      for (int i = 0; i < hits.length; i++) {
        Document doc = searcher.doc(hits[i].doc);
        String path = doc.get("path");
        if (path != null) {
          String title = doc.get("title");
          if (title != null) {
            searchResults.add(new SearchResult(path, title));
          }
        }
      }
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (ParseException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } finally {
      try {
        if (reader != null) {
          reader.close();
        }
      } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
      }
    }
    return searchResults.toArray(new SearchResult[searchResults.size()]);
  }
 @Override
 public Query getQuery(Element e) throws ParserException {
   String text = DOMUtils.getText(e);
   try {
     Query q = null;
     if (unSafeParser != null) {
       // synchronize on unsafe parser
       synchronized (unSafeParser) {
         q = unSafeParser.parse(text);
       }
     } else {
       String fieldName = DOMUtils.getAttribute(e, "fieldName", defaultField);
       // Create new parser
       QueryParser parser = createQueryParser(fieldName, analyzer);
       q = parser.parse(text);
     }
     q.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f));
     return q;
   } catch (ParseException e1) {
     throw new ParserException(e1.getMessage());
   }
 }
  private String getLuceneParseErrorMessage(
      org.apache.lucene.queryparser.classic.ParseException pee) {
    String msg = pee.toString();
    msg =
        msg.replaceAll(
            "org.apache.lucene.queryparser.classic.ParseException", "Lucene Parse Error\n");
    java.io.StringWriter sw = new StringWriter();
    VelocityEngine ve = VelocityUtil.getEngine();
    VelocityContext context = new VelocityContext();

    context.put("veloError", UtilMethods.htmlifyString(msg));

    // context.put("prettyError", UtilMethods.htmlifyString(msg));
    org.apache.velocity.Template template;
    try {
      template = ve.getTemplate(errorTemplate);
      template.merge(context, sw);
    } catch (Exception ex) {
      Logger.error(this.getClass(), "Unable to show velocityError", ex);
    }
    return sw.toString();
  }
Пример #12
0
  public static void main(String[] args) throws IOException, ParseException {
    // 0. Specify the analyzer for tokenizing text.
    // The same analyzer should be used for indexing and searching
    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);

    // 1. create the index
    Directory index = new RAMDirectory();

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, analyzer);

    IndexWriter w = new IndexWriter(index, config);
    addDoc(w, "Lucene in Action", "193398817");
    addDoc(w, "Lucene for Dummies", "55320055Z");
    addDoc(w, "Managing Gigabytes", "55063554A");
    addDoc(w, "The Art of Computer Science", "9900333X");

    w.close();

    // 2. query
    Query q = null;

    String query = "lucene";
    String[] fields = {"title", "isbn"};

    BooleanClause.Occur[] clauses = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
    try {
      q = MultiFieldQueryParser.parse(Version.LUCENE_40, query, fields, clauses, analyzer);
    } catch (org.apache.lucene.queryparser.classic.ParseException e) {
      e.printStackTrace();
    }

    // 3. search
    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
    searcher.search(q, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;

    // 4. display results
    System.out.println("Found " + hits.length + " hits.");
    for (int i = 0; i < hits.length; ++i) {
      int docId = hits[i].doc;
      Document d = searcher.doc(docId);
      System.out.println((i + 1) + ". " + d.get("isbn") + "\t" + d.get("title"));
    }

    // 分页(实现)-----start
    int currentpageNum = 2; // 当前页
    int pageSize = 1; // 每页显示多少条记录
    reader = DirectoryReader.open(index);
    searcher = new IndexSearcher(reader);
    collector = TopScoreDocCollector.create(currentpageNum * pageSize, true); // 根据当前页和每页多少,查询出结果
    searcher.search(q, collector);
    TopDocs docs = collector.topDocs();
    getResult(searcher, docs, currentpageNum, pageSize); // 分页显示
    // 分页(实现)-----end

    // reader can only be closed when there
    // is no need to access the documents any more.
    reader.close();
  }
Пример #13
0
  public String getContext(String sample) throws IOException {
    String result = "";
    try {
      String highlight_query_str = cfg.searchField + ":" + cfg.quotes + sample + cfg.quotes;
      String query_str = "padded_length:[" + String.format("%09d", cfg.minDocLen) + " TO *]";
      if (cfg.enableTitleSearch) {
        query_str +=
            " AND (title:"
                + cfg.quotes
                + sample
                + cfg.quotes
                + " OR "
                + cfg.searchField
                + ":"
                + cfg.quotes
                + sample
                + cfg.quotes
                + ")";
      } else {
        query_str += " AND (" + cfg.searchField + ":" + cfg.quotes + sample + cfg.quotes + ")";
      }

      Query query = parser.parse(query_str);
      Query highlight_query = parser.parse(highlight_query_str);

      if (cfg.debug == true) System.out.println("Searching (" + query + ").....");
      TopDocs topDocs = searcher.search(query, cfg.maxHits != 0 ? cfg.maxHits : Integer.MAX_VALUE);
      if (topDocs.totalHits > 0) {
        ScoreDoc[] hits = topDocs.scoreDocs;
        if (cfg.debug == true) System.out.println("Results (" + hits.length + ") :)");
        String data;
        int indx;
        SimpleHTMLFormatter htmlFormatter = null;
        Highlighter highlighter = null;
        if (cfg.displayHighlights) {
          htmlFormatter = new SimpleHTMLFormatter();
          highlighter = new Highlighter(htmlFormatter, new QueryScorer(highlight_query));
        }
        for (int i = 0; i < hits.length; i++) {
          if (cfg.displayDID) {
            result += String.format("\t%d", hits[i].doc);
          }
          if (cfg.displayScore) {
            result += String.format("\t%f", hits[i].score);
          }
          if (cfg.displayLen) {
            result += "\t" + indexReader.document(hits[i].doc).getField("length").stringValue();
          }
          if (cfg.displayTitle) {
            data = indexReader.document(hits[i].doc).getField("title").stringValue();
            if (cfg.removeParen && (indx = data.indexOf(" (")) != -1)
              data =
                  indexReader
                      .document(hits[i].doc)
                      .getField("title")
                      .stringValue()
                      .substring(0, indx);
            result += "\t" + data;
          }
          if (cfg.displayTxt || cfg.displayHighlights) {
            String text = indexReader.document(hits[i].doc).getField("text").stringValue();
            if (cfg.displayTxt) result += "\t" + text;
            if (cfg.displayHighlights) {
              TokenStream tokenStream =
                  TokenSources.getAnyTokenStream(
                      searcher.getIndexReader(), hits[i].doc, "text", stdAnalyzer);
              TextFragment[] frag;
              try {
                frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
                for (int j = 0; j < frag.length; j++) {
                  if ((frag[j] != null) && (frag[j].getScore() > 0)) {
                    result += "\t" + (frag[j].toString());
                  }
                }
              } catch (InvalidTokenOffsetsException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
              } // highlighter.getBestFragments(tokenStream, text, 3, "...");
            }
          }
          if (cfg.displayCategories) {
            IndexableField categories[] = indexReader.document(hits[i].doc).getFields("category");
            for (int j = 0;
                j < categories.length && (cfg.numCategories == 0 || j < cfg.numCategories);
                j++) {
              result += "\t" + categories[j].stringValue();
            }
          }

          result += System.lineSeparator() + System.lineSeparator() + System.lineSeparator();
        }
      } else if (cfg.debug == true) System.out.println("No results found :(");
    } catch (ParseException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } //

    return result;
  }
 @ExceptionHandler({ParseException.class})
 public String parseExceptionHandler(ParseException pe) {
   return "ParseException: query parse exception!" + pe.getMessage();
 }