Exemplo n.º 1
0
  /**
   * give the id list of sentences, from Lucene index
   *
   * @param input input word
   * @param catalogName catalog (domain) name which we'd like to search in
   * @param limit how many hits are needed (0 means all)
   */
  public List<String> query(String input, String catalogName, int limit) {

    List<String> res = new ArrayList<String>();
    try {

      catalog c = catalogs.get(catalogName);
      IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(c.indexPath)));
      IndexSearcher searcher = new IndexSearcher(reader);

      QueryParser parser = new QueryParser("contents", analyzer);
      Query query = parser.parse(QueryParser.escape(input));

      int n = limit > 0 ? limit : searcher.count(query);
      if (n == 0) n = 1;
      TopDocs results = searcher.search(query, n);

      int endPos = limit;
      if (limit != 0) endPos = Math.min(results.totalHits, limit); // 1st n hits
      else endPos = results.totalHits; // all hits

      for (int i = 0; i < endPos; i++) {
        int id = results.scoreDocs[i].doc;
        Document doc = searcher.doc(id);
        res.add(doc.get("filename"));
      }
      reader.close();
      return res;

    } catch (ParseException e) {
      log(e.getMessage());
    } catch (IOException e) {
      log(e.getMessage());
    }
    return res;
  }
Exemplo n.º 2
0
  /** This function is only for test search. */
  public static List<String> searchQuery(
      String indexDir, String queryString, int numResults, CharArraySet stopwords) {
    String field = "contents";
    List<String> hitPaths = new ArrayList<String>();

    try {
      IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir)));
      IndexSearcher searcher = new IndexSearcher(reader);

      Analyzer analyzer = new MyAnalyzer(Version.LUCENE_44, stopwords);

      QueryParser parser = new QueryParser(Version.LUCENE_44, field, analyzer);
      Query query;
      query = parser.parse(QueryParser.escape(queryString));

      TopDocs results = searcher.search(query, null, numResults);
      for (ScoreDoc hit : results.scoreDocs) {
        String path = searcher.doc(hit.doc).get("path");
        hitPaths.add(path.substring(0, path.length() - 4)); // chop off the file extension (".txt")
      }
    } catch (IOException e) {
      System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    } catch (ParseException e) {
      System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }

    return hitPaths;
  }
Exemplo n.º 3
0
  @Override
  public List<Document> customRanking(List<Query> queryList) throws Exception {

    List<Document> result = new ArrayList<Document>();
    Map<String, ResultRep> docMap = new LinkedHashMap<String, ResultRep>();
    BooleanQuery bq = new BooleanQuery();

    bq.setMinimumNumberShouldMatch(1);
    Set<String> tgtset = tstSet == null ? new HashSet<String>() : tstSet;
    for (String clsStr : tgtset)
      bq.add(
          new TermQuery(new Term(Configuration.IDX_FIELD_PKG_NAME, QueryParser.escape(clsStr))),
          Occur.SHOULD);
    Filter idFilter = new QueryWrapperFilter(bq);

    try {
      Float avg = new Float(queryList.size());
      Float rank;
      // BooleanQuery mainQuery;
      for (Query query : queryList) {
        // mainQuery = new BooleanQuery();
        // mainQuery.add(query, Occur.SHOULD);
        // mainQuery.add(idFilter, Occur.MUST);
        result = searcher(query, idFilter);
        for (int i = 0; i < result.size(); i++) {
          Document doc = result.get(i);
          String key = doc.get(Configuration.IDX_FIELD_METHOD_DESCRIPTION_CATCHALL);
          rank = Float.parseFloat(doc.get("SCORE"));
          rank = rank / avg;
          if (!docMap.containsKey(key)) docMap.put(key, new ResultRep(doc, rank));
          else {
            ResultRep rep = docMap.get(key);
            rep.setRank(rep.getRank() + rank);
          }
        }
      }
    } catch (Exception e) {
      e.printStackTrace();
    }

    TreeSet<ResultRep> set = new TreeSet<ResultRep>(new CustomComparator());

    for (String key : docMap.keySet()) set.add(docMap.get(key));

    result = new ArrayList<Document>();
    Iterator<ResultRep> iter = set.descendingIterator();
    ResultRep rep;
    while (iter.hasNext()) {
      rep = iter.next();
      if (tgtset.contains(rep.getDoc().get(Configuration.IDX_FIELD_CLASS_NAME).trim()))
        result.add(rep.getDoc());
    }

    if (result.size() > 10) result = result.subList(0, 10);
    return result;
  }
  private FullTextQuery buildFullTextQuery(
      UserSearchRequest request, Pageable pageable, Criteria criteria) {
    FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager);
    QueryBuilder qb =
        fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(User.class).get();

    @SuppressWarnings("rawtypes")
    BooleanJunction<BooleanJunction> junction = qb.bool();
    junction.must(qb.all().createQuery());

    if (StringUtils.hasText(request.getKeyword())) {
      Analyzer analyzer = fullTextEntityManager.getSearchFactory().getAnalyzer("synonyms");
      String[] fields =
          new String[] {
            "loginId", "name.firstName", "name.lastName",
          };
      MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
      parser.setDefaultOperator(QueryParser.Operator.AND);
      Query query = null;
      try {
        query = parser.parse(request.getKeyword());
      } catch (ParseException e1) {
        try {
          query = parser.parse(QueryParser.escape(request.getKeyword()));
        } catch (ParseException e2) {
          throw new RuntimeException(e2);
        }
      }
      junction.must(query);
    }

    if (!CollectionUtils.isEmpty(request.getRoles())) {
      for (User.Role role : request.getRoles()) {
        junction.must(qb.keyword().onField("roles").matching(role).createQuery());
      }
    }

    Query searchQuery = junction.createQuery();

    Sort sort = new Sort(new SortField("id", SortField.Type.STRING, false));

    FullTextQuery persistenceQuery =
        fullTextEntityManager
            .createFullTextQuery(searchQuery, User.class)
            .setCriteriaQuery(criteria)
            //				.setProjection("id")
            .setSort(sort);
    if (pageable != null) {
      persistenceQuery.setFirstResult(pageable.getOffset());
      persistenceQuery.setMaxResults(pageable.getPageSize());
    }
    return persistenceQuery;
  }
  /** This function is only for test search. */
  public static List<String> searchQuery(
      String indexDir, String queryString, int numResults, CharArraySet stopwords) {
    String field = "contents";
    List<String> hitPaths = new ArrayList<String>();

    try {
      IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexDir)));
      IndexSearcher searcher = new IndexSearcher(reader);

      // Used with mySimilarity.java for part E
      mySimilarity sim = new mySimilarity();
      searcher.setSimilarity(sim);

      // Used in part F
      // String field="Search_Field";
      // the second parameter calls the getAvgLength method and automatically calculates the
      // Average Length value
      // BM25Parameters.setAverageLength(field,getAvgLen(reader,field));
      // BM25Parameters.setB(0.75f);
      // BM25Parameters.setK1(2f);
      // BM25BooleanQuery query = new BM25BooleanQuery( "Cystic hydroma" ,field,analyzer);
      // System.out.println ("Searching for: " + query.toString(field));
      // TopDocs top=searcher.search(query, 10);
      // ScoreDoc[] docs = top.scoreDocs;
      // for (int i = 0; i < 10; i++){
      // System.out.println("the document with id= " + docs[i].doc + " has score ="+docs[i].score);
      // }

      Analyzer analyzer = new MyAnalyzer(Version.LUCENE_44, stopwords);

      QueryParser parser = new QueryParser(Version.LUCENE_44, field, analyzer);
      Query query;
      query = parser.parse(QueryParser.escape(queryString));

      TopDocs results = searcher.search(query, null, numResults);
      for (ScoreDoc hit : results.scoreDocs) {
        String path = searcher.doc(hit.doc).get("path");
        hitPaths.add(path.substring(0, path.length() - 4)); // chop off the file extension (".txt")
      }
    } catch (IOException e) {
      System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    } catch (ParseException e) {
      System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }

    return hitPaths;
  }
  @Override
  public Page<Comment> search(CommentSearchRequest request, Pageable pageable) {
    FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager);
    QueryBuilder qb =
        fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(Comment.class).get();

    @SuppressWarnings("rawtypes")
    BooleanJunction<BooleanJunction> junction = qb.bool();
    junction.must(qb.all().createQuery());

    if (StringUtils.hasText(request.getKeyword())) {
      Analyzer analyzer = fullTextEntityManager.getSearchFactory().getAnalyzer("synonyms");
      String[] fields = new String[] {"authorName", "content"};
      MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
      parser.setDefaultOperator(QueryParser.Operator.AND);
      Query query = null;
      try {
        query = parser.parse(request.getKeyword());
      } catch (ParseException e1) {
        try {
          query = parser.parse(QueryParser.escape(request.getKeyword()));
        } catch (ParseException e2) {
          throw new RuntimeException(e2);
        }
      }
      junction.must(query);
    }

    if (StringUtils.hasText(request.getLanguage())) {
      junction.must(
          qb.keyword().onField("post.language").matching(request.getLanguage()).createQuery());
    }

    if (request.getPostId() != null) {
      junction.must(qb.keyword().onField("post.id").matching(request.getPostId()).createQuery());
    }

    if (request.getApproved() != null) {
      junction.must(qb.keyword().onField("approved").matching(request.getApproved()).createQuery());
    }

    Query searchQuery = junction.createQuery();

    Session session = (Session) entityManager.getDelegate();
    Criteria criteria =
        session
            .createCriteria(Comment.class)
            .setFetchMode("post", FetchMode.JOIN)
            .setFetchMode("author", FetchMode.JOIN);

    Sort sort = null;
    if (pageable.getSort() != null) {
      if (pageable.getSort().getOrderFor("date") != null) {
        Order order = pageable.getSort().getOrderFor("date");
        sort =
            new Sort(
                new SortField(
                    "date", SortField.Type.STRING, order.getDirection().equals(Direction.DESC)),
                new SortField(
                    "id", SortField.Type.LONG, order.getDirection().equals(Direction.DESC)));
      }
    }

    if (sort == null) {
      sort =
          new Sort(
              new SortField("date", SortField.Type.STRING),
              new SortField("id", SortField.Type.LONG));
    }

    FullTextQuery persistenceQuery =
        fullTextEntityManager
            .createFullTextQuery(searchQuery, Comment.class)
            .setCriteriaQuery(criteria)
            .setSort(sort);
    persistenceQuery.setFirstResult(pageable.getOffset());
    persistenceQuery.setMaxResults(pageable.getPageSize());

    int resultSize = persistenceQuery.getResultSize();

    @SuppressWarnings("unchecked")
    List<Comment> results = persistenceQuery.getResultList();
    return new PageImpl<>(results, pageable, resultSize);
  }
  private FullTextQuery buildFullTextQuery(
      ArticleSearchRequest request, Pageable pageable, Criteria criteria) {
    FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager);
    QueryBuilder qb =
        fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(Article.class).get();

    @SuppressWarnings("rawtypes")
    BooleanJunction<BooleanJunction> junction = qb.bool();
    junction.must(qb.all().createQuery());

    junction.must(
        qb.keyword().onField("drafted").ignoreAnalyzer().matching("_null_").createQuery());

    if (StringUtils.hasText(request.getKeyword())) {
      Analyzer analyzer = fullTextEntityManager.getSearchFactory().getAnalyzer("synonyms");
      String[] fields =
          new String[] {
            "title", "body",
            "categories.name", "tags.name",
          };
      MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
      parser.setDefaultOperator(QueryParser.Operator.AND);
      Query query = null;
      try {
        query = parser.parse(request.getKeyword());
      } catch (ParseException e1) {
        try {
          query = parser.parse(QueryParser.escape(request.getKeyword()));
        } catch (ParseException e2) {
          throw new RuntimeException(e2);
        }
      }
      junction.must(query);
    }
    if (request.getStatus() != null) {
      junction.must(qb.keyword().onField("status").matching(request.getStatus()).createQuery());
    }
    if (StringUtils.hasText(request.getLanguage())) {
      junction.must(qb.keyword().onField("language").matching(request.getLanguage()).createQuery());
    }

    if (request.getDateFrom() != null) {
      junction.must(qb.range().onField("date").above(request.getDateFrom()).createQuery());
    }
    if (request.getDateTo() != null) {
      junction.must(qb.range().onField("date").below(request.getDateTo()).createQuery());
    }

    if (!CollectionUtils.isEmpty(request.getCategoryIds())) {
      BooleanJunction<BooleanJunction> subJunction = qb.bool();
      for (long categoryId : request.getCategoryIds()) {
        subJunction.should(
            qb.keyword().onField("categories.id").matching(categoryId).createQuery());
      }
      junction.must(subJunction.createQuery());
    }
    if (!CollectionUtils.isEmpty(request.getCategoryCodes())) {
      BooleanJunction<BooleanJunction> subJunction = qb.bool();
      for (String categoryCode : request.getCategoryCodes()) {
        subJunction.should(
            qb.keyword().onField("categories.code").matching(categoryCode).createQuery());
      }
      junction.must(subJunction.createQuery());
    }

    if (!CollectionUtils.isEmpty(request.getTagIds())) {
      BooleanJunction<BooleanJunction> subJunction = qb.bool();
      for (long tagId : request.getTagIds()) {
        subJunction.should(qb.keyword().onField("tags.id").matching(tagId).createQuery());
      }
      junction.must(subJunction.createQuery());
    }
    if (!CollectionUtils.isEmpty(request.getTagNames())) {
      BooleanJunction<BooleanJunction> subJunction = qb.bool();
      for (String tagName : request.getTagNames()) {
        subJunction.should(qb.phrase().onField("tags.name").sentence(tagName).createQuery());
      }
      junction.must(subJunction.createQuery());
    }

    if (request.getAuthorId() != null) {
      junction.must(
          qb.keyword().onField("author.id").matching(request.getAuthorId()).createQuery());
    }

    Query searchQuery = junction.createQuery();

    Sort sort =
        new Sort(
            new SortField("date", SortField.Type.STRING, true),
            new SortField("id", SortField.Type.LONG, true));

    FullTextQuery persistenceQuery =
        fullTextEntityManager
            .createFullTextQuery(searchQuery, Article.class)
            .setCriteriaQuery(criteria)
            .setSort(sort);
    if (pageable != null) {
      persistenceQuery.setFirstResult(pageable.getOffset());
      persistenceQuery.setMaxResults(pageable.getPageSize());
    }
    return persistenceQuery;
  }