예제 #1
0
 public Correction[] findBestCandiates(
     CandidateSet[] sets, float errorFraction, double cutoffScore) throws IOException {
   if (sets.length == 0) {
     return Correction.EMPTY;
   }
   PriorityQueue<Correction> corrections =
       new PriorityQueue<Correction>(maxNumCorrections) {
         @Override
         protected boolean lessThan(Correction a, Correction b) {
           return a.compareTo(b) < 0;
         }
       };
   int numMissspellings = 1;
   if (errorFraction >= 1.0) {
     numMissspellings = (int) errorFraction;
   } else {
     numMissspellings = Math.round(errorFraction * sets.length);
   }
   findCandidates(
       sets,
       new Candidate[sets.length],
       0,
       Math.max(1, numMissspellings),
       corrections,
       cutoffScore,
       0.0);
   Correction[] result = new Correction[corrections.size()];
   for (int i = result.length - 1; i >= 0; i--) {
     result[i] = corrections.pop();
   }
   assert corrections.size() == 0;
   return result;
 }
예제 #2
0
 // See MinShouldMatchSumScorer for an explanation
 private static long cost(Collection<BulkScorer> scorers, int minShouldMatch) {
   final PriorityQueue<BulkScorer> pq =
       new PriorityQueue<BulkScorer>(scorers.size() - minShouldMatch + 1) {
         @Override
         protected boolean lessThan(BulkScorer a, BulkScorer b) {
           return a.cost() > b.cost();
         }
       };
   for (BulkScorer scorer : scorers) {
     pq.insertWithOverflow(scorer);
   }
   long cost = 0;
   for (BulkScorer scorer = pq.pop(); scorer != null; scorer = pq.pop()) {
     cost += scorer.cost();
   }
   return cost;
 }
 /**
  * As we don't know where (in what node) is the first result, we have to compare the results until
  * we achieve the first result.
  */
 private void goToFirstResult() {
   for (int i = 0; i < firstResult; i++) {
     // getting the next scoreDoc. If null, then there is no more results
     ClusteredDoc scoreDoc = (ClusteredDoc) hq.pop();
     if (scoreDoc == null) {
       return;
     }
     rechargeQueue(scoreDoc);
   }
 }
예제 #4
0
 @SuppressWarnings("unchecked")
 public T next() {
   IteratorNode ctx = (IteratorNode) _queue.top();
   T val = ctx._curVal;
   if (ctx.fetch()) {
     _queue.updateTop();
   } else {
     _queue.pop();
   }
   return val;
 }
예제 #5
0
 /**
  * Convenience routine to make it easy to return the most interesting words in a document. More
  * advanced users will call {@link #retrieveTerms(Reader, String) retrieveTerms()} directly.
  *
  * @param r the source document
  * @param fieldName field passed to analyzer to use when analyzing the content
  * @return the most interesting words in the document
  * @see #retrieveTerms(java.io.Reader, String)
  * @see #setMaxQueryTerms
  */
 public String[] retrieveInterestingTerms(Reader r, String fieldName) throws IOException {
   ArrayList<Object> al = new ArrayList<>(maxQueryTerms);
   PriorityQueue<ScoreTerm> pq = retrieveTerms(r, fieldName);
   ScoreTerm scoreTerm;
   int lim =
       maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably
   // not useful to our caller...
   // we just want to return the top words
   while (((scoreTerm = pq.pop()) != null) && lim-- > 0) {
     al.add(scoreTerm.word); // the 1st entry is the interesting word
   }
   String[] res = new String[al.size()];
   return al.toArray(res);
 }
  private void loadTo(int index) {
    int fetched = 0;

    while (orderedValues.size() <= index || fetched < fetchSize) {
      // getting the next scoreDoc. If null, then there is no more results
      ClusteredDoc scoreDoc = (ClusteredDoc) hq.pop();
      if (scoreDoc == null) {
        return;
      }

      ClusteredTopDocs topDoc = rechargeQueue(scoreDoc);

      // fetching the value
      Object value = fetchValue(scoreDoc, topDoc);

      orderedValues.add(value);

      fetched++;
    }
  }
예제 #7
0
  /** Add to an existing boolean query the More Like This query from this PriorityQueue */
  private void addToQuery(PriorityQueue<ScoreTerm> q, BooleanQuery query) {
    ScoreTerm scoreTerm;
    float bestScore = -1;

    while ((scoreTerm = q.pop()) != null) {
      TermQuery tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word));

      if (boost) {
        if (bestScore == -1) {
          bestScore = (scoreTerm.score);
        }
        float myScore = (scoreTerm.score);
        tq.setBoost(boostFactor * myScore / bestScore);
      }

      try {
        query.add(tq, BooleanClause.Occur.SHOULD);
      } catch (BooleanQuery.TooManyClauses ignore) {
        break;
      }
    }
  }
예제 #8
0
  /** Create the More like query from a PriorityQueue */
  private Query createQuery(PriorityQueue<ScoreTerm> q) {
    BooleanQuery.Builder query = new BooleanQuery.Builder();
    ScoreTerm scoreTerm;
    float bestScore = -1;

    while ((scoreTerm = q.pop()) != null) {
      Query tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word));

      if (boost) {
        if (bestScore == -1) {
          bestScore = (scoreTerm.score);
        }
        float myScore = (scoreTerm.score);
        tq = new BoostQuery(tq, boostFactor * myScore / bestScore);
      }

      try {
        query.add(tq, BooleanClause.Occur.SHOULD);
      } catch (BooleanQuery.TooManyClauses ignore) {
        break;
      }
    }
    return query.build();
  }
예제 #9
0
  /**
   * Fetch upcoming vehicle departures from a stop. It goes though all patterns passing the stop for
   * the previous, current and next service date. It uses a priority queue to keep track of the next
   * departures. The queue is shared between all dates, as services from the previous service date
   * can visit the stop later than the current service date's services. This happens eg. with
   * sleeper trains.
   *
   * <p>TODO: Add frequency based trips
   *
   * @param stop Stop object to perform the search for
   * @param startTime Start time for the search. Seconds from UNIX epoch
   * @param timeRange Searches forward for timeRange seconds from startTime
   * @param numberOfDepartures Number of departures to fetch per pattern
   * @return
   */
  public List<StopTimesInPattern> stopTimesForStop(
      Stop stop, long startTime, int timeRange, int numberOfDepartures) {

    if (startTime == 0) {
      startTime = System.currentTimeMillis() / 1000;
    }
    List<StopTimesInPattern> ret = new ArrayList<>();
    TimetableSnapshot snapshot = null;
    if (graph.timetableSnapshotSource != null) {
      snapshot = graph.timetableSnapshotSource.getTimetableSnapshot();
    }
    ServiceDate[] serviceDates = {
      new ServiceDate().previous(), new ServiceDate(), new ServiceDate().next()
    };

    for (TripPattern pattern : patternsForStop.get(stop)) {

      // Use the Lucene PriorityQueue, which has a fixed size
      PriorityQueue<TripTimeShort> pq =
          new PriorityQueue<TripTimeShort>(numberOfDepartures) {
            @Override
            protected boolean lessThan(TripTimeShort tripTimeShort, TripTimeShort t1) {
              // Calculate exact timestamp
              return (tripTimeShort.serviceDay + tripTimeShort.realtimeDeparture)
                  > (t1.serviceDay + t1.realtimeDeparture);
            }
          };

      // Loop through all possible days
      for (ServiceDate serviceDate : serviceDates) {
        ServiceDay sd =
            new ServiceDay(graph, serviceDate, calendarService, pattern.route.getAgency().getId());
        Timetable tt;
        if (snapshot != null) {
          tt = snapshot.resolve(pattern, serviceDate);
        } else {
          tt = pattern.scheduledTimetable;
        }

        if (!tt.temporallyViable(sd, startTime, timeRange, true)) continue;

        int secondsSinceMidnight = sd.secondsSinceMidnight(startTime);
        int sidx = 0;
        for (Stop currStop : pattern.stopPattern.stops) {
          if (currStop == stop) {
            for (TripTimes t : tt.tripTimes) {
              if (!sd.serviceRunning(t.serviceCode)) continue;
              if (t.getDepartureTime(sidx) != -1
                  && t.getDepartureTime(sidx) >= secondsSinceMidnight) {
                pq.insertWithOverflow(new TripTimeShort(t, sidx, stop, sd));
              }
            }

            // TODO: This needs to be adapted after #1647 is merged
            for (FrequencyEntry freq : tt.frequencyEntries) {
              if (!sd.serviceRunning(freq.tripTimes.serviceCode)) continue;
              int departureTime = freq.nextDepartureTime(sidx, secondsSinceMidnight);
              if (departureTime == -1) continue;
              int lastDeparture =
                  freq.endTime
                      + freq.tripTimes.getArrivalTime(sidx)
                      - freq.tripTimes.getDepartureTime(0);
              int i = 0;
              while (departureTime <= lastDeparture && i < numberOfDepartures) {
                pq.insertWithOverflow(
                    new TripTimeShort(freq.materialize(sidx, departureTime, true), sidx, stop, sd));
                departureTime += freq.headway;
                i++;
              }
            }
          }
          sidx++;
        }
      }

      if (pq.size() != 0) {
        StopTimesInPattern stopTimes = new StopTimesInPattern(pattern);
        while (pq.size() != 0) {
          stopTimes.times.add(0, pq.pop());
        }
        ret.add(stopTimes);
      }
    }
    return ret;
  }
예제 #10
0
  /**
   * Auxiliary method used by the {@link #merge} impls. A sort value of null is used to indicate
   * that docs should be sorted by score.
   */
  private static TopDocs mergeAux(Sort sort, int start, int size, TopDocs[] shardHits)
      throws IOException {
    final PriorityQueue<ShardRef> queue;
    if (sort == null) {
      queue = new ScoreMergeSortQueue(shardHits);
    } else {
      queue = new MergeSortQueue(sort, shardHits);
    }

    int totalHitCount = 0;
    int availHitCount = 0;
    float maxScore = Float.MIN_VALUE;
    for (int shardIDX = 0; shardIDX < shardHits.length; shardIDX++) {
      final TopDocs shard = shardHits[shardIDX];
      // totalHits can be non-zero even if no hits were
      // collected, when searchAfter was used:
      totalHitCount += shard.totalHits;
      if (shard.scoreDocs != null && shard.scoreDocs.length > 0) {
        availHitCount += shard.scoreDocs.length;
        queue.add(new ShardRef(shardIDX));
        maxScore = Math.max(maxScore, shard.getMaxScore());
        // System.out.println("  maxScore now " + maxScore + " vs " + shard.getMaxScore());
      }
    }

    if (availHitCount == 0) {
      maxScore = Float.NaN;
    }

    final ScoreDoc[] hits;
    if (availHitCount <= start) {
      hits = new ScoreDoc[0];
    } else {
      hits = new ScoreDoc[Math.min(size, availHitCount - start)];
      int requestedResultWindow = start + size;
      int numIterOnHits = Math.min(availHitCount, requestedResultWindow);
      int hitUpto = 0;
      while (hitUpto < numIterOnHits) {
        assert queue.size() > 0;
        ShardRef ref = queue.pop();
        final ScoreDoc hit = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++];
        hit.shardIndex = ref.shardIndex;
        if (hitUpto >= start) {
          hits[hitUpto - start] = hit;
        }

        // System.out.println("  hitUpto=" + hitUpto);
        // System.out.println("    doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score);

        hitUpto++;

        if (ref.hitIndex < shardHits[ref.shardIndex].scoreDocs.length) {
          // Not done with this these TopDocs yet:
          queue.add(ref);
        }
      }
    }

    if (sort == null) {
      return new TopDocs(totalHitCount, hits, maxScore);
    } else {
      return new TopFieldDocs(totalHitCount, hits, sort.getSort(), maxScore);
    }
  }