public Correction[] findBestCandiates( CandidateSet[] sets, float errorFraction, double cutoffScore) throws IOException { if (sets.length == 0) { return Correction.EMPTY; } PriorityQueue<Correction> corrections = new PriorityQueue<Correction>(maxNumCorrections) { @Override protected boolean lessThan(Correction a, Correction b) { return a.compareTo(b) < 0; } }; int numMissspellings = 1; if (errorFraction >= 1.0) { numMissspellings = (int) errorFraction; } else { numMissspellings = Math.round(errorFraction * sets.length); } findCandidates( sets, new Candidate[sets.length], 0, Math.max(1, numMissspellings), corrections, cutoffScore, 0.0); Correction[] result = new Correction[corrections.size()]; for (int i = result.length - 1; i >= 0; i--) { result[i] = corrections.pop(); } assert corrections.size() == 0; return result; }
@SuppressWarnings("unchecked") public T next() { IteratorNode ctx = (IteratorNode) _queue.top(); T val = ctx._curVal; if (ctx.fetch()) { _queue.updateTop(); } else { _queue.pop(); } return val; }
/** * Convenience routine to make it easy to return the most interesting words in a document. More * advanced users will call {@link #retrieveTerms(Reader, String) retrieveTerms()} directly. * * @param r the source document * @param fieldName field passed to analyzer to use when analyzing the content * @return the most interesting words in the document * @see #retrieveTerms(java.io.Reader, String) * @see #setMaxQueryTerms */ public String[] retrieveInterestingTerms(Reader r, String fieldName) throws IOException { ArrayList<Object> al = new ArrayList<>(maxQueryTerms); PriorityQueue<ScoreTerm> pq = retrieveTerms(r, fieldName); ScoreTerm scoreTerm; int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably // not useful to our caller... // we just want to return the top words while (((scoreTerm = pq.pop()) != null) && lim-- > 0) { al.add(scoreTerm.word); // the 1st entry is the interesting word } String[] res = new String[al.size()]; return al.toArray(res); }
public MergedIterator(final Iterator<T>[] iterators, final Comparator<T> comparator) { this(iterators.length, comparator); for (Iterator<T> iterator : iterators) { IteratorNode ctx = new IteratorNode(iterator); if (ctx.fetch()) _queue.add(ctx); } }
public MergedIterator(final List<Iterator<T>> iterators, final Comparator<T> comparator) { this(iterators.size(), comparator); for (Iterator<T> iterator : iterators) { IteratorNode ctx = new IteratorNode(iterator); if (ctx.fetch()) _queue.add(ctx); } }
// See MinShouldMatchSumScorer for an explanation private static long cost(Collection<BulkScorer> scorers, int minShouldMatch) { final PriorityQueue<BulkScorer> pq = new PriorityQueue<BulkScorer>(scorers.size() - minShouldMatch + 1) { @Override protected boolean lessThan(BulkScorer a, BulkScorer b) { return a.cost() > b.cost(); } }; for (BulkScorer scorer : scorers) { pq.insertWithOverflow(scorer); } long cost = 0; for (BulkScorer scorer = pq.pop(); scorer != null; scorer = pq.pop()) { cost += scorer.cost(); } return cost; }
/** * As we don't know where (in what node) is the first result, we have to compare the results until * we achieve the first result. */ private void goToFirstResult() { for (int i = 0; i < firstResult; i++) { // getting the next scoreDoc. If null, then there is no more results ClusteredDoc scoreDoc = (ClusteredDoc) hq.pop(); if (scoreDoc == null) { return; } rechargeQueue(scoreDoc); } }
/** * As the priority queue have one result for each node, when we fetch a result we have to recharge * the queue (getting the next score doc from the correct node) * * @param scoreDoc * @return */ private ClusteredTopDocs rechargeQueue(ClusteredDoc scoreDoc) { // "recharging" the queue // the queue has a top element of each node. As we removed a element, we have to get the next // element from this node and put on the queue. ClusteredTopDocs topDoc = topDocsResponses.get(scoreDoc.getNodeUuid()); ScoreDoc score = topDoc.getNext(); // if score == null -> this node does not have more results... if (score != null) { hq.add(score); } return topDoc; }
private void updateTop( CandidateSet[] candidates, Candidate[] path, PriorityQueue<Correction> corrections, double cutoffScore, double score) throws IOException { score = Math.exp(score); assert Math.abs(score - score(path, candidates)) < 0.00001; if (score > cutoffScore) { if (corrections.size() < maxNumCorrections) { Candidate[] c = new Candidate[candidates.length]; System.arraycopy(path, 0, c, 0, path.length); corrections.add(new Correction(score, c)); } else if (corrections.top().compareTo(score, path) < 0) { Correction top = corrections.top(); System.arraycopy(path, 0, top.candidates, 0, path.length); top.score = score; corrections.updateTop(); } } }
private void loadTo(int index) { int fetched = 0; while (orderedValues.size() <= index || fetched < fetchSize) { // getting the next scoreDoc. If null, then there is no more results ClusteredDoc scoreDoc = (ClusteredDoc) hq.pop(); if (scoreDoc == null) { return; } ClusteredTopDocs topDoc = rechargeQueue(scoreDoc); // fetching the value Object value = fetchValue(scoreDoc, topDoc); orderedValues.add(value); fetched++; } }
private void setTopDocs(HashMap<UUID, ClusteredTopDocs> topDocsResponses) { this.topDocsResponses = topDocsResponses; if (sort != null) { // reversing sort fields to FieldDocSortedHitQueue work properly for (SortField sf : sort.getSort()) { boolean reverse = (Boolean) ReflectionUtil.getValue(sf, "reverse"); ReflectionUtil.setValue(sf, "reverse", !reverse); } hq = ISPNPriorityQueueFactory.getFieldDocSortedHitQueue( topDocsResponses.size(), sort.getSort()); } else hq = ISPNPriorityQueueFactory.getHitQueue(topDocsResponses.size()); // taking the first value of each queue for (ClusteredTopDocs ctp : topDocsResponses.values()) { if (ctp.hasNext()) hq.add(ctp.getNext()); } }
/** Add to an existing boolean query the More Like This query from this PriorityQueue */ private void addToQuery(PriorityQueue<ScoreTerm> q, BooleanQuery query) { ScoreTerm scoreTerm; float bestScore = -1; while ((scoreTerm = q.pop()) != null) { TermQuery tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word)); if (boost) { if (bestScore == -1) { bestScore = (scoreTerm.score); } float myScore = (scoreTerm.score); tq.setBoost(boostFactor * myScore / bestScore); } try { query.add(tq, BooleanClause.Occur.SHOULD); } catch (BooleanQuery.TooManyClauses ignore) { break; } } }
/** Create the More like query from a PriorityQueue */ private Query createQuery(PriorityQueue<ScoreTerm> q) { BooleanQuery.Builder query = new BooleanQuery.Builder(); ScoreTerm scoreTerm; float bestScore = -1; while ((scoreTerm = q.pop()) != null) { Query tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word)); if (boost) { if (bestScore == -1) { bestScore = (scoreTerm.score); } float myScore = (scoreTerm.score); tq = new BoostQuery(tq, boostFactor * myScore / bestScore); } try { query.add(tq, BooleanClause.Occur.SHOULD); } catch (BooleanQuery.TooManyClauses ignore) { break; } } return query.build(); }
public boolean hasNext() { return _queue.size() > 0; }
/** * Auxiliary method used by the {@link #merge} impls. A sort value of null is used to indicate * that docs should be sorted by score. */ private static TopDocs mergeAux(Sort sort, int start, int size, TopDocs[] shardHits) throws IOException { final PriorityQueue<ShardRef> queue; if (sort == null) { queue = new ScoreMergeSortQueue(shardHits); } else { queue = new MergeSortQueue(sort, shardHits); } int totalHitCount = 0; int availHitCount = 0; float maxScore = Float.MIN_VALUE; for (int shardIDX = 0; shardIDX < shardHits.length; shardIDX++) { final TopDocs shard = shardHits[shardIDX]; // totalHits can be non-zero even if no hits were // collected, when searchAfter was used: totalHitCount += shard.totalHits; if (shard.scoreDocs != null && shard.scoreDocs.length > 0) { availHitCount += shard.scoreDocs.length; queue.add(new ShardRef(shardIDX)); maxScore = Math.max(maxScore, shard.getMaxScore()); // System.out.println(" maxScore now " + maxScore + " vs " + shard.getMaxScore()); } } if (availHitCount == 0) { maxScore = Float.NaN; } final ScoreDoc[] hits; if (availHitCount <= start) { hits = new ScoreDoc[0]; } else { hits = new ScoreDoc[Math.min(size, availHitCount - start)]; int requestedResultWindow = start + size; int numIterOnHits = Math.min(availHitCount, requestedResultWindow); int hitUpto = 0; while (hitUpto < numIterOnHits) { assert queue.size() > 0; ShardRef ref = queue.pop(); final ScoreDoc hit = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++]; hit.shardIndex = ref.shardIndex; if (hitUpto >= start) { hits[hitUpto - start] = hit; } // System.out.println(" hitUpto=" + hitUpto); // System.out.println(" doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score); hitUpto++; if (ref.hitIndex < shardHits[ref.shardIndex].scoreDocs.length) { // Not done with this these TopDocs yet: queue.add(ref); } } } if (sort == null) { return new TopDocs(totalHitCount, hits, maxScore); } else { return new TopFieldDocs(totalHitCount, hits, sort.getSort(), maxScore); } }
/** * Fetch upcoming vehicle departures from a stop. It goes though all patterns passing the stop for * the previous, current and next service date. It uses a priority queue to keep track of the next * departures. The queue is shared between all dates, as services from the previous service date * can visit the stop later than the current service date's services. This happens eg. with * sleeper trains. * * <p>TODO: Add frequency based trips * * @param stop Stop object to perform the search for * @param startTime Start time for the search. Seconds from UNIX epoch * @param timeRange Searches forward for timeRange seconds from startTime * @param numberOfDepartures Number of departures to fetch per pattern * @return */ public List<StopTimesInPattern> stopTimesForStop( Stop stop, long startTime, int timeRange, int numberOfDepartures) { if (startTime == 0) { startTime = System.currentTimeMillis() / 1000; } List<StopTimesInPattern> ret = new ArrayList<>(); TimetableSnapshot snapshot = null; if (graph.timetableSnapshotSource != null) { snapshot = graph.timetableSnapshotSource.getTimetableSnapshot(); } ServiceDate[] serviceDates = { new ServiceDate().previous(), new ServiceDate(), new ServiceDate().next() }; for (TripPattern pattern : patternsForStop.get(stop)) { // Use the Lucene PriorityQueue, which has a fixed size PriorityQueue<TripTimeShort> pq = new PriorityQueue<TripTimeShort>(numberOfDepartures) { @Override protected boolean lessThan(TripTimeShort tripTimeShort, TripTimeShort t1) { // Calculate exact timestamp return (tripTimeShort.serviceDay + tripTimeShort.realtimeDeparture) > (t1.serviceDay + t1.realtimeDeparture); } }; // Loop through all possible days for (ServiceDate serviceDate : serviceDates) { ServiceDay sd = new ServiceDay(graph, serviceDate, calendarService, pattern.route.getAgency().getId()); Timetable tt; if (snapshot != null) { tt = snapshot.resolve(pattern, serviceDate); } else { tt = pattern.scheduledTimetable; } if (!tt.temporallyViable(sd, startTime, timeRange, true)) continue; int secondsSinceMidnight = sd.secondsSinceMidnight(startTime); int sidx = 0; for (Stop currStop : pattern.stopPattern.stops) { if (currStop == stop) { for (TripTimes t : tt.tripTimes) { if (!sd.serviceRunning(t.serviceCode)) continue; if (t.getDepartureTime(sidx) != -1 && t.getDepartureTime(sidx) >= secondsSinceMidnight) { pq.insertWithOverflow(new TripTimeShort(t, sidx, stop, sd)); } } // TODO: This needs to be adapted after #1647 is merged for (FrequencyEntry freq : tt.frequencyEntries) { if (!sd.serviceRunning(freq.tripTimes.serviceCode)) continue; int departureTime = freq.nextDepartureTime(sidx, secondsSinceMidnight); if (departureTime == -1) continue; int lastDeparture = freq.endTime + freq.tripTimes.getArrivalTime(sidx) - freq.tripTimes.getDepartureTime(0); int i = 0; while (departureTime <= lastDeparture && i < numberOfDepartures) { pq.insertWithOverflow( new TripTimeShort(freq.materialize(sidx, departureTime, true), sidx, stop, sd)); departureTime += freq.headway; i++; } } } sidx++; } } if (pq.size() != 0) { StopTimesInPattern stopTimes = new StopTimesInPattern(pattern); while (pq.size() != 0) { stopTimes.times.add(0, pq.pop()); } ret.add(stopTimes); } } return ret; }
PQueue(int maxSz) { super.initialize(maxSz); myMaxSize = maxSz; }