@Override
  public boolean process(SearchContext searchContext, Hits hits) throws SearchException {

    QueryConfig queryConfig = searchContext.getQueryConfig();

    if (!queryConfig.isCollatedSpellCheckResultEnabled()) {
      return true;
    }

    int collatedSpellCheckResultScoresThreshold =
        queryConfig.getCollatedSpellCheckResultScoresThreshold();

    if (hits.getLength() >= collatedSpellCheckResultScoresThreshold) {
      return true;
    }

    String collatedKeywords = SearchEngineUtil.spellCheckKeywords(searchContext);

    if (collatedKeywords.equals(searchContext.getKeywords())) {
      collatedKeywords = StringPool.BLANK;
    }

    hits.setCollatedSpellCheckResult(collatedKeywords);

    return true;
  }
Пример #2
0
  /** For debugging. */
  public static void main(String[] args) throws Exception {
    final String usage = "NutchBean query";

    if (args.length == 0) {
      System.err.println(usage);
      System.exit(-1);
    }

    final Configuration conf = NutchConfiguration.create();
    final NutchBean bean = new NutchBean(conf);
    try {
      final Query query = Query.parse(args[0], conf);
      final Hits hits = bean.search(query, 10);
      System.out.println("Total hits: " + hits.getTotal());
      final int length = (int) Math.min(hits.getTotal(), 10);
      final Hit[] show = hits.getHits(0, length);
      final HitDetails[] details = bean.getDetails(show);
      final Summary[] summaries = bean.getSummary(details, query);

      for (int i = 0; i < hits.getLength(); i++) {
        System.out.println(" " + i + " " + details[i] + "\n" + summaries[i]);
      }
    } catch (Throwable t) {
      LOG.error("Exception occured while executing search: " + t, t);
      System.exit(1);
    }
    System.exit(0);
  }
Пример #3
0
  private Iterable<Path> internalPaths(Node start, Node end, boolean stopAsap) {
    if (start.equals(end)) {
      return Arrays.asList(PathImpl.singular(start));
    }

    Hits hits = new Hits();
    Collection<Long> sharedVisitedRels = new HashSet<Long>();
    MutableInteger sharedFrozenDepth = new MutableInteger(MutableInteger.NULL);
    MutableBoolean sharedStop = new MutableBoolean();
    MutableInteger sharedCurrentDepth = new MutableInteger(0);
    final DirectionData startData =
        new DirectionData(
            start,
            sharedVisitedRels,
            sharedFrozenDepth,
            sharedStop,
            sharedCurrentDepth,
            relExpander);
    final DirectionData endData =
        new DirectionData(
            end,
            sharedVisitedRels,
            sharedFrozenDepth,
            sharedStop,
            sharedCurrentDepth,
            relExpander.reversed());

    while (startData.hasNext() || endData.hasNext()) {
      goOneStep(startData, endData, hits, startData, stopAsap);
      goOneStep(endData, startData, hits, startData, stopAsap);
    }

    Collection<Hit> least = hits.least();
    return least != null ? hitsToPaths(least, start, end) : Collections.<Path>emptyList();
  }
Пример #4
0
 private void searchFor(int n, Searcher searcher) throws Exception {
   System.out.println("Searching for " + n);
   Hits hits = searcher.search(QueryParser.parse(intToEnglish(n), "contents", ANALYZER));
   System.out.println("Search for " + n + ": total=" + hits.length());
   for (int j = 0; j < Math.min(3, hits.length()); j++) {
     System.out.println("Hit for " + n + ": " + hits.doc(j).get("id"));
   }
 }
Пример #5
0
 @GET
 @Produces({MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML})
 public List<Referer> topReferers(
     @QueryParam("exclude") String excludeContaining,
     @QueryParam("max") @DefaultValue("50") int maxNumber) {
   if (excludeContaining == null) {
     return hits.topReferers(maxNumber);
   } else {
     return hits.topReferers(excludeContaining, maxNumber);
   }
 }
Пример #6
0
 /** appends a sorted list of hits to an existing set of hits. */
 public void appendSingleHits(SingleHit[] hits, String prefix, int chrom) throws IOException {
   if (hits.length == 0) {
     return;
   }
   int s = getPositionsBuffer().limit() - 1;
   SingleHit last =
       new SingleHit(
           chrom,
           getPositionsBuffer().get(s),
           getWeightsBuffer().get(s),
           Hits.getStrandOne(getLASBuffer().get(s)),
           Hits.getLengthOne(getLASBuffer().get(s)));
   if (hits[0].compareTo(last) > 0) {
     append(hits, prefix, chrom);
   } else {
     merge(hits, prefix, chrom);
   }
 }
  public Hits unmarshall(JsonUnmarshallerContext context) throws Exception {
    Hits hits = new Hits();

    int originalDepth = context.getCurrentDepth();
    String currentParentElement = context.getCurrentParentElement();
    int targetDepth = originalDepth + 1;

    JsonToken token = context.getCurrentToken();
    if (token == null) token = context.nextToken();
    if (token == VALUE_NULL) return null;

    while (true) {
      if (token == null) break;

      if (token == FIELD_NAME || token == START_OBJECT) {
        if (context.testExpression("found", targetDepth)) {
          context.nextToken();
          hits.setFound(context.getUnmarshaller(Long.class).unmarshall(context));
        }
        if (context.testExpression("start", targetDepth)) {
          context.nextToken();
          hits.setStart(context.getUnmarshaller(Long.class).unmarshall(context));
        }
        if (context.testExpression("cursor", targetDepth)) {
          context.nextToken();
          hits.setCursor(context.getUnmarshaller(String.class).unmarshall(context));
        }
        if (context.testExpression("hit", targetDepth)) {
          context.nextToken();
          hits.setHit(
              new ListUnmarshaller<Hit>(HitJsonUnmarshaller.getInstance()).unmarshall(context));
        }
      } else if (token == END_ARRAY || token == END_OBJECT) {
        if (context.getLastParsedParentElement() == null
            || context.getLastParsedParentElement().equals(currentParentElement)) {
          if (context.getCurrentDepth() <= originalDepth) break;
        }
      }
      token = context.nextToken();
    }

    return hits;
  }
Пример #8
0
  private void merge(SingleHit[] hits, String prefix, int chrom) throws IOException {
    String postmp = getPositionsFname(prefix, chrom) + ".tmp";
    String weightstmp = getWeightsFname(prefix, chrom) + ".tmp";
    String lastmp = getLaSFname(prefix, chrom) + ".tmp";
    RandomAccessFile positionsRAF = new RandomAccessFile(postmp, "rw");
    RandomAccessFile weightsRAF = new RandomAccessFile(weightstmp, "rw");
    RandomAccessFile lasRAF = new RandomAccessFile(lastmp, "rw");
    int newsize = getPositionsBuffer().limit() + hits.length;
    IntBP posfile =
        new IntBP(positionsRAF.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, newsize * 4));
    FloatBP weightfile =
        new FloatBP(weightsRAF.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, newsize * 4));
    IntBP lasfile =
        new IntBP(lasRAF.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, newsize * 4));

    int oldp = 0;
    int newp = 0;
    int pos = 0;
    IntBP oldpositions = getPositionsBuffer();
    FloatBP oldweights = getWeightsBuffer();
    IntBP oldlas = getLASBuffer();
    while (oldp < oldpositions.limit() || newp < hits.length) {
      while (newp < hits.length
          && (oldp == oldpositions.limit() || hits[newp].pos <= oldpositions.get(oldp))) {
        posfile.put(pos, hits[newp].pos);
        weightfile.put(pos, hits[newp].weight);
        lasfile.put(pos, Hits.makeLAS(hits[newp].length, hits[newp].strand));
        newp++;
        pos++;
      }
      while (oldp < oldpositions.limit()
          && (newp == hits.length || oldpositions.get(oldp) <= hits[newp].pos)) {
        posfile.put(pos, oldpositions.get(oldp));
        weightfile.put(pos, oldweights.get(oldp));
        lasfile.put(pos, oldlas.get(oldp));
        oldp++;
        pos++;
      }
      //            System.err.println(String.format("%d %d %d", pos, newp, oldp));
    }
    posfile = null;
    weightfile = null;
    lasfile = null;
    oldpositions = null;
    oldweights = null;
    oldlas = null;
    positionsRAF.close();
    weightsRAF.close();
    lasRAF.close();
    /* ideally this part with the renames would atomic... */
    (new File(postmp)).renameTo(new File(getPositionsFname(prefix, chrom)));
    (new File(weightstmp)).renameTo(new File(getWeightsFname(prefix, chrom)));
    (new File(lastmp)).renameTo(new File(getLaSFname(prefix, chrom)));
  }
Пример #9
0
  @Override
  public boolean process(SearchContext searchContext, Hits hits) throws SearchException {

    if (hits.getLength() > 0) {
      return true;
    }

    Map<String, List<String>> spellCheckResults = hits.getSpellCheckResults();

    if (spellCheckResults == null) {
      return true;
    }

    String spellCheckedKeywords = hits.getCollatedSpellCheckResult();

    searchContext.overrideKeywords(spellCheckedKeywords);

    String[] additionalQuerySuggestions = SearchEngineUtil.suggestKeywordQueries(searchContext, 5);

    if ((additionalQuerySuggestions != null) && (additionalQuerySuggestions.length > 0)) {

      searchContext.setKeywords(additionalQuerySuggestions[0]);
    }

    QueryConfig queryConfig = searchContext.getQueryConfig();

    queryConfig.setHitsProcessingEnabled(false);

    Indexer indexer = FacetedSearcher.getInstance();

    Hits alternateResults = indexer.search(searchContext);

    hits.copy(alternateResults);

    return true;
  }
Пример #10
0
  private void goOneStep(
      DirectionData directionData,
      DirectionData otherSide,
      Hits hits,
      DirectionData startSide,
      boolean stopAsap) {
    if (!directionData.hasNext()) {
      return;
    }

    Node nextNode = directionData.next();
    LevelData otherSideHit = otherSide.visitedNodes.get(nextNode);
    if (otherSideHit != null) {
      // This is a hit
      int depth = directionData.currentDepth + otherSideHit.depth;
      if (!hitDecider.isHit(depth)) {
        return;
      }

      if (directionData.sharedFrozenDepth.value == MutableInteger.NULL) {
        directionData.sharedFrozenDepth.value = depth;
      }
      if (depth <= directionData.sharedFrozenDepth.value) {
        directionData.haveFoundSomething = true;
        if (depth < directionData.sharedFrozenDepth.value) {
          directionData.sharedFrozenDepth.value = depth;
          // TODO Is it really ok to just stop the other side here?
          // I'm basing that decision on that it was the other side
          // which found the deeper paths (correct assumption?)
          otherSide.stop = true;
        }

        // Add it to the list of hits
        DirectionData startSideData = directionData == startSide ? directionData : otherSide;
        DirectionData endSideData = directionData == startSide ? otherSide : directionData;
        if (hits.add(new Hit(startSideData, endSideData, nextNode), depth) >= maxResultCount) {
          directionData.stop = true;
          otherSide.stop = true;
        } else if (stopAsap) { // This side found a hit, but wait for the other side to complete its
          // current depth
          // to see if it finds a shorter path. (i.e. stop this side and freeze the depth).
          directionData.stop = true;
        }
      }
    }
  }
Пример #11
0
  /**
   * Search for pages matching a query, eliminating excessive hits with matching values for a named
   * field. Hits after the first <code>maxHitsPerDup</code> are removed from results. The remaining
   * hits have {@link Hit#moreFromDupExcluded()} set.
   *
   * <p>If maxHitsPerDup is zero then all hits are returned.
   *
   * @param query query
   * @param numHits number of requested hits
   * @param maxHitsPerDup the maximum hits returned with matching values, or zero
   * @param dedupField field name to check for duplicates
   * @param sortField Field to sort on (or null if no sorting).
   * @param reverse True if we are to reverse sort by <code>sortField</code>.
   * @return Hits the matching hits
   * @throws IOException
   */
  public Hits search(
      Query query,
      int numHits,
      int maxHitsPerDup,
      String dedupField,
      String sortField,
      boolean reverse)
      throws IOException {
    if (maxHitsPerDup <= 0) // disable dup checking
    return search(query, numHits, dedupField, sortField, reverse);

    final float rawHitsFactor = this.conf.getFloat("searcher.hostgrouping.rawhits.factor", 2.0f);
    int numHitsRaw = (int) (numHits * rawHitsFactor);
    if (LOG.isInfoEnabled()) {
      LOG.info("searching for " + numHitsRaw + " raw hits");
    }
    Hits hits = searchBean.search(query, numHitsRaw, dedupField, sortField, reverse);
    final long total = hits.getTotal();
    final Map<String, DupHits> dupToHits = new HashMap<String, DupHits>();
    final List<Hit> resultList = new ArrayList<Hit>();
    final Set<Hit> seen = new HashSet<Hit>();
    final List<String> excludedValues = new ArrayList<String>();
    boolean totalIsExact = true;
    for (int rawHitNum = 0; rawHitNum < hits.getTotal(); rawHitNum++) {
      // get the next raw hit
      if (rawHitNum >= hits.getLength()) {
        // optimize query by prohibiting more matches on some excluded values
        final Query optQuery = (Query) query.clone();
        for (int i = 0; i < excludedValues.size(); i++) {
          if (i == MAX_PROHIBITED_TERMS) break;
          optQuery.addProhibitedTerm(excludedValues.get(i), dedupField);
        }
        numHitsRaw = (int) (numHitsRaw * rawHitsFactor);
        if (LOG.isInfoEnabled()) {
          LOG.info("re-searching for " + numHitsRaw + " raw hits, query: " + optQuery);
        }
        hits = searchBean.search(optQuery, numHitsRaw, dedupField, sortField, reverse);
        if (LOG.isInfoEnabled()) {
          LOG.info("found " + hits.getTotal() + " raw hits");
        }
        rawHitNum = -1;
        continue;
      }

      final Hit hit = hits.getHit(rawHitNum);
      if (seen.contains(hit)) continue;
      seen.add(hit);

      // get dup hits for its value
      final String value = hit.getDedupValue();
      DupHits dupHits = dupToHits.get(value);
      if (dupHits == null) dupToHits.put(value, dupHits = new DupHits());

      // does this hit exceed maxHitsPerDup?
      if (dupHits.size() == maxHitsPerDup) { // yes -- ignore the hit
        if (!dupHits.maxSizeExceeded) {

          // mark prior hits with moreFromDupExcluded
          for (int i = 0; i < dupHits.size(); i++) {
            dupHits.get(i).setMoreFromDupExcluded(true);
          }
          dupHits.maxSizeExceeded = true;

          excludedValues.add(value); // exclude dup
        }
        totalIsExact = false;
      } else { // no -- collect the hit
        resultList.add(hit);
        dupHits.add(hit);

        // are we done?
        // we need to find one more than asked for, so that we can tell if
        // there are more hits to be shown
        if (resultList.size() > numHits) break;
      }
    }

    final Hits results = new Hits(total, resultList.toArray(new Hit[resultList.size()]));
    results.setTotalIsExact(totalIsExact);
    return results;
  }
 public Collection<ElasticSearchResponse<T>> getHits() {
   return hits.getHits();
 }
Пример #13
0
 public static void main(String args[]) throws IOException, ParseException {
   Hits hits = new Hits();
   hits.computeHits();
 }