@Override public boolean process(SearchContext searchContext, Hits hits) throws SearchException { QueryConfig queryConfig = searchContext.getQueryConfig(); if (!queryConfig.isCollatedSpellCheckResultEnabled()) { return true; } int collatedSpellCheckResultScoresThreshold = queryConfig.getCollatedSpellCheckResultScoresThreshold(); if (hits.getLength() >= collatedSpellCheckResultScoresThreshold) { return true; } String collatedKeywords = SearchEngineUtil.spellCheckKeywords(searchContext); if (collatedKeywords.equals(searchContext.getKeywords())) { collatedKeywords = StringPool.BLANK; } hits.setCollatedSpellCheckResult(collatedKeywords); return true; }
/** For debugging. */ public static void main(String[] args) throws Exception { final String usage = "NutchBean query"; if (args.length == 0) { System.err.println(usage); System.exit(-1); } final Configuration conf = NutchConfiguration.create(); final NutchBean bean = new NutchBean(conf); try { final Query query = Query.parse(args[0], conf); final Hits hits = bean.search(query, 10); System.out.println("Total hits: " + hits.getTotal()); final int length = (int) Math.min(hits.getTotal(), 10); final Hit[] show = hits.getHits(0, length); final HitDetails[] details = bean.getDetails(show); final Summary[] summaries = bean.getSummary(details, query); for (int i = 0; i < hits.getLength(); i++) { System.out.println(" " + i + " " + details[i] + "\n" + summaries[i]); } } catch (Throwable t) { LOG.error("Exception occured while executing search: " + t, t); System.exit(1); } System.exit(0); }
private Iterable<Path> internalPaths(Node start, Node end, boolean stopAsap) { if (start.equals(end)) { return Arrays.asList(PathImpl.singular(start)); } Hits hits = new Hits(); Collection<Long> sharedVisitedRels = new HashSet<Long>(); MutableInteger sharedFrozenDepth = new MutableInteger(MutableInteger.NULL); MutableBoolean sharedStop = new MutableBoolean(); MutableInteger sharedCurrentDepth = new MutableInteger(0); final DirectionData startData = new DirectionData( start, sharedVisitedRels, sharedFrozenDepth, sharedStop, sharedCurrentDepth, relExpander); final DirectionData endData = new DirectionData( end, sharedVisitedRels, sharedFrozenDepth, sharedStop, sharedCurrentDepth, relExpander.reversed()); while (startData.hasNext() || endData.hasNext()) { goOneStep(startData, endData, hits, startData, stopAsap); goOneStep(endData, startData, hits, startData, stopAsap); } Collection<Hit> least = hits.least(); return least != null ? hitsToPaths(least, start, end) : Collections.<Path>emptyList(); }
private void searchFor(int n, Searcher searcher) throws Exception { System.out.println("Searching for " + n); Hits hits = searcher.search(QueryParser.parse(intToEnglish(n), "contents", ANALYZER)); System.out.println("Search for " + n + ": total=" + hits.length()); for (int j = 0; j < Math.min(3, hits.length()); j++) { System.out.println("Hit for " + n + ": " + hits.doc(j).get("id")); } }
@GET @Produces({MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML}) public List<Referer> topReferers( @QueryParam("exclude") String excludeContaining, @QueryParam("max") @DefaultValue("50") int maxNumber) { if (excludeContaining == null) { return hits.topReferers(maxNumber); } else { return hits.topReferers(excludeContaining, maxNumber); } }
/** appends a sorted list of hits to an existing set of hits. */ public void appendSingleHits(SingleHit[] hits, String prefix, int chrom) throws IOException { if (hits.length == 0) { return; } int s = getPositionsBuffer().limit() - 1; SingleHit last = new SingleHit( chrom, getPositionsBuffer().get(s), getWeightsBuffer().get(s), Hits.getStrandOne(getLASBuffer().get(s)), Hits.getLengthOne(getLASBuffer().get(s))); if (hits[0].compareTo(last) > 0) { append(hits, prefix, chrom); } else { merge(hits, prefix, chrom); } }
public Hits unmarshall(JsonUnmarshallerContext context) throws Exception { Hits hits = new Hits(); int originalDepth = context.getCurrentDepth(); String currentParentElement = context.getCurrentParentElement(); int targetDepth = originalDepth + 1; JsonToken token = context.getCurrentToken(); if (token == null) token = context.nextToken(); if (token == VALUE_NULL) return null; while (true) { if (token == null) break; if (token == FIELD_NAME || token == START_OBJECT) { if (context.testExpression("found", targetDepth)) { context.nextToken(); hits.setFound(context.getUnmarshaller(Long.class).unmarshall(context)); } if (context.testExpression("start", targetDepth)) { context.nextToken(); hits.setStart(context.getUnmarshaller(Long.class).unmarshall(context)); } if (context.testExpression("cursor", targetDepth)) { context.nextToken(); hits.setCursor(context.getUnmarshaller(String.class).unmarshall(context)); } if (context.testExpression("hit", targetDepth)) { context.nextToken(); hits.setHit( new ListUnmarshaller<Hit>(HitJsonUnmarshaller.getInstance()).unmarshall(context)); } } else if (token == END_ARRAY || token == END_OBJECT) { if (context.getLastParsedParentElement() == null || context.getLastParsedParentElement().equals(currentParentElement)) { if (context.getCurrentDepth() <= originalDepth) break; } } token = context.nextToken(); } return hits; }
private void merge(SingleHit[] hits, String prefix, int chrom) throws IOException { String postmp = getPositionsFname(prefix, chrom) + ".tmp"; String weightstmp = getWeightsFname(prefix, chrom) + ".tmp"; String lastmp = getLaSFname(prefix, chrom) + ".tmp"; RandomAccessFile positionsRAF = new RandomAccessFile(postmp, "rw"); RandomAccessFile weightsRAF = new RandomAccessFile(weightstmp, "rw"); RandomAccessFile lasRAF = new RandomAccessFile(lastmp, "rw"); int newsize = getPositionsBuffer().limit() + hits.length; IntBP posfile = new IntBP(positionsRAF.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, newsize * 4)); FloatBP weightfile = new FloatBP(weightsRAF.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, newsize * 4)); IntBP lasfile = new IntBP(lasRAF.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, newsize * 4)); int oldp = 0; int newp = 0; int pos = 0; IntBP oldpositions = getPositionsBuffer(); FloatBP oldweights = getWeightsBuffer(); IntBP oldlas = getLASBuffer(); while (oldp < oldpositions.limit() || newp < hits.length) { while (newp < hits.length && (oldp == oldpositions.limit() || hits[newp].pos <= oldpositions.get(oldp))) { posfile.put(pos, hits[newp].pos); weightfile.put(pos, hits[newp].weight); lasfile.put(pos, Hits.makeLAS(hits[newp].length, hits[newp].strand)); newp++; pos++; } while (oldp < oldpositions.limit() && (newp == hits.length || oldpositions.get(oldp) <= hits[newp].pos)) { posfile.put(pos, oldpositions.get(oldp)); weightfile.put(pos, oldweights.get(oldp)); lasfile.put(pos, oldlas.get(oldp)); oldp++; pos++; } // System.err.println(String.format("%d %d %d", pos, newp, oldp)); } posfile = null; weightfile = null; lasfile = null; oldpositions = null; oldweights = null; oldlas = null; positionsRAF.close(); weightsRAF.close(); lasRAF.close(); /* ideally this part with the renames would atomic... */ (new File(postmp)).renameTo(new File(getPositionsFname(prefix, chrom))); (new File(weightstmp)).renameTo(new File(getWeightsFname(prefix, chrom))); (new File(lastmp)).renameTo(new File(getLaSFname(prefix, chrom))); }
@Override public boolean process(SearchContext searchContext, Hits hits) throws SearchException { if (hits.getLength() > 0) { return true; } Map<String, List<String>> spellCheckResults = hits.getSpellCheckResults(); if (spellCheckResults == null) { return true; } String spellCheckedKeywords = hits.getCollatedSpellCheckResult(); searchContext.overrideKeywords(spellCheckedKeywords); String[] additionalQuerySuggestions = SearchEngineUtil.suggestKeywordQueries(searchContext, 5); if ((additionalQuerySuggestions != null) && (additionalQuerySuggestions.length > 0)) { searchContext.setKeywords(additionalQuerySuggestions[0]); } QueryConfig queryConfig = searchContext.getQueryConfig(); queryConfig.setHitsProcessingEnabled(false); Indexer indexer = FacetedSearcher.getInstance(); Hits alternateResults = indexer.search(searchContext); hits.copy(alternateResults); return true; }
private void goOneStep( DirectionData directionData, DirectionData otherSide, Hits hits, DirectionData startSide, boolean stopAsap) { if (!directionData.hasNext()) { return; } Node nextNode = directionData.next(); LevelData otherSideHit = otherSide.visitedNodes.get(nextNode); if (otherSideHit != null) { // This is a hit int depth = directionData.currentDepth + otherSideHit.depth; if (!hitDecider.isHit(depth)) { return; } if (directionData.sharedFrozenDepth.value == MutableInteger.NULL) { directionData.sharedFrozenDepth.value = depth; } if (depth <= directionData.sharedFrozenDepth.value) { directionData.haveFoundSomething = true; if (depth < directionData.sharedFrozenDepth.value) { directionData.sharedFrozenDepth.value = depth; // TODO Is it really ok to just stop the other side here? // I'm basing that decision on that it was the other side // which found the deeper paths (correct assumption?) otherSide.stop = true; } // Add it to the list of hits DirectionData startSideData = directionData == startSide ? directionData : otherSide; DirectionData endSideData = directionData == startSide ? otherSide : directionData; if (hits.add(new Hit(startSideData, endSideData, nextNode), depth) >= maxResultCount) { directionData.stop = true; otherSide.stop = true; } else if (stopAsap) { // This side found a hit, but wait for the other side to complete its // current depth // to see if it finds a shorter path. (i.e. stop this side and freeze the depth). directionData.stop = true; } } } }
/** * Search for pages matching a query, eliminating excessive hits with matching values for a named * field. Hits after the first <code>maxHitsPerDup</code> are removed from results. The remaining * hits have {@link Hit#moreFromDupExcluded()} set. * * <p>If maxHitsPerDup is zero then all hits are returned. * * @param query query * @param numHits number of requested hits * @param maxHitsPerDup the maximum hits returned with matching values, or zero * @param dedupField field name to check for duplicates * @param sortField Field to sort on (or null if no sorting). * @param reverse True if we are to reverse sort by <code>sortField</code>. * @return Hits the matching hits * @throws IOException */ public Hits search( Query query, int numHits, int maxHitsPerDup, String dedupField, String sortField, boolean reverse) throws IOException { if (maxHitsPerDup <= 0) // disable dup checking return search(query, numHits, dedupField, sortField, reverse); final float rawHitsFactor = this.conf.getFloat("searcher.hostgrouping.rawhits.factor", 2.0f); int numHitsRaw = (int) (numHits * rawHitsFactor); if (LOG.isInfoEnabled()) { LOG.info("searching for " + numHitsRaw + " raw hits"); } Hits hits = searchBean.search(query, numHitsRaw, dedupField, sortField, reverse); final long total = hits.getTotal(); final Map<String, DupHits> dupToHits = new HashMap<String, DupHits>(); final List<Hit> resultList = new ArrayList<Hit>(); final Set<Hit> seen = new HashSet<Hit>(); final List<String> excludedValues = new ArrayList<String>(); boolean totalIsExact = true; for (int rawHitNum = 0; rawHitNum < hits.getTotal(); rawHitNum++) { // get the next raw hit if (rawHitNum >= hits.getLength()) { // optimize query by prohibiting more matches on some excluded values final Query optQuery = (Query) query.clone(); for (int i = 0; i < excludedValues.size(); i++) { if (i == MAX_PROHIBITED_TERMS) break; optQuery.addProhibitedTerm(excludedValues.get(i), dedupField); } numHitsRaw = (int) (numHitsRaw * rawHitsFactor); if (LOG.isInfoEnabled()) { LOG.info("re-searching for " + numHitsRaw + " raw hits, query: " + optQuery); } hits = searchBean.search(optQuery, numHitsRaw, dedupField, sortField, reverse); if (LOG.isInfoEnabled()) { LOG.info("found " + hits.getTotal() + " raw hits"); } rawHitNum = -1; continue; } final Hit hit = hits.getHit(rawHitNum); if (seen.contains(hit)) continue; seen.add(hit); // get dup hits for its value final String value = hit.getDedupValue(); DupHits dupHits = dupToHits.get(value); if (dupHits == null) dupToHits.put(value, dupHits = new DupHits()); // does this hit exceed maxHitsPerDup? if (dupHits.size() == maxHitsPerDup) { // yes -- ignore the hit if (!dupHits.maxSizeExceeded) { // mark prior hits with moreFromDupExcluded for (int i = 0; i < dupHits.size(); i++) { dupHits.get(i).setMoreFromDupExcluded(true); } dupHits.maxSizeExceeded = true; excludedValues.add(value); // exclude dup } totalIsExact = false; } else { // no -- collect the hit resultList.add(hit); dupHits.add(hit); // are we done? // we need to find one more than asked for, so that we can tell if // there are more hits to be shown if (resultList.size() > numHits) break; } } final Hits results = new Hits(total, resultList.toArray(new Hit[resultList.size()])); results.setTotalIsExact(totalIsExact); return results; }
public Collection<ElasticSearchResponse<T>> getHits() { return hits.getHits(); }
public static void main(String args[]) throws IOException, ParseException { Hits hits = new Hits(); hits.computeHits(); }