@Override public boolean equals(Object obj) { if (this == obj) return true; if (!super.equals(obj)) return false; if (getClass() != obj.getClass()) return false; RegexQuery other = (RegexQuery) obj; if (regexImpl == null) { if (other.regexImpl != null) return false; } else if (!regexImpl.equals(other.regexImpl)) return false; if (term == null) { if (other.term != null) return false; } else if (!term.equals(other.term)) return false; return true; }
@Override public boolean equals(Object obj) { if (this == obj) return true; if (!super.equals(obj)) return false; if (getClass() != obj.getClass()) return false; FuzzyQuery other = (FuzzyQuery) obj; if (maxEdits != other.maxEdits) return false; if (prefixLength != other.prefixLength) return false; if (maxExpansions != other.maxExpansions) return false; if (transpositions != other.transpositions) return false; if (term == null) { if (other.term != null) return false; } else if (!term.equals(other.term)) return false; return true; }
public boolean skipTo(Term target) throws IOException { // already here if (t != null && t.equals(target)) return true; int startIdx = tindex.index.search(target.text()); if (startIdx >= 0) { // we hit the term exactly... lucky us! if (tenum != null) tenum.close(); tenum = reader.terms(target); pos = startIdx << tindex.intervalBits; return setTerm(); } // we didn't hit the term exactly startIdx = -startIdx - 1; if (startIdx == 0) { // our target occurs *before* the first term if (tenum != null) tenum.close(); tenum = reader.terms(target); pos = 0; return setTerm(); } // back up to the start of the block startIdx--; if ((pos >> tindex.intervalBits) == startIdx && t != null && t.text().compareTo(target.text()) <= 0) { // we are already in the right block and the current term is before the term we want, // so we don't need to seek. } else { // seek to the right block if (tenum != null) tenum.close(); tenum = reader.terms(target.createTerm(tindex.index.get(startIdx))); pos = startIdx << tindex.intervalBits; setTerm(); // should be true since it's in the index } while (t != null && t.text().compareTo(target.text()) < 0) { next(); } return t != null; }
/** add a term */ public void add(Term term) { assert lastTerm.equals(new Term("")) || term.compareTo(lastTerm) > 0; try { int prefix = sharedPrefix(lastTerm.bytes, term.bytes); int suffix = term.bytes.length - prefix; if (term.field.equals(lastTerm.field)) { output.writeVInt(prefix << 1); } else { output.writeVInt(prefix << 1 | 1); output.writeString(term.field); } output.writeVInt(suffix); output.writeBytes(term.bytes.bytes, term.bytes.offset + prefix, suffix); lastTerm.bytes.copyBytes(term.bytes); lastTerm.field = term.field; } catch (IOException e) { throw new RuntimeException(e); } }
private void loadTerms(Term skipTo) { if (initTerm == null) initTerm = skipTo; // chose starting term String startTerm = CassandraUtils.hashKey( indexName + CassandraUtils.delimeter + CassandraUtils.createColumnName(skipTo)); // ending term. the initial query we don't care since // we only pull 2 terms, also we don't String endTerm = ""; // The boundary condition for this search. currently the field. String boundryTerm = CassandraUtils.hashKey( indexName + CassandraUtils.delimeter + CassandraUtils.createColumnName(skipTo.field(), CassandraUtils.finalToken)); if ((!skipTo.equals(chunkBoundryTerm) || termPosition == 0) && termCache != null) { termDocFreqBuffer = termCache.subMap(skipTo, termCache.lastKey()); } else { termDocFreqBuffer = null; } if (termDocFreqBuffer != null) { termBuffer = termDocFreqBuffer.keySet().toArray(new Term[] {}); termPosition = 0; logger.debug("Found " + startTerm + " in cache"); return; } else if (chunkCount > 1 && actualInitSize < maxChunkSize) { // include last term if (skipTo.equals(chunkBoundryTerm) && termCache.containsKey(skipTo)) { termBuffer = new Term[] {skipTo}; termDocFreqBuffer = termCache.subMap(skipTo, termCache.lastKey()); } else { termBuffer = new Term[] {}; } termPosition = 0; return; // done! } chunkCount++; // The first time we grab just a few keys int count = maxInitSize; // otherwise we grab all the rest of the keys if (chunkBoundryTerm != null) { count = maxChunkSize; startTerm = CassandraUtils.hashKey( indexName + CassandraUtils.delimeter + CassandraUtils.createColumnName(chunkBoundryTerm)); // After first pass use the boundary term, since we know on pass 2 we are using the OPP endTerm = boundryTerm; } long start = System.currentTimeMillis(); termDocFreqBuffer = new TreeMap<Term, List<ColumnOrSuperColumn>>(); ColumnParent columnParent = new ColumnParent(CassandraUtils.termVecColumnFamily); SlicePredicate slicePredicate = new SlicePredicate(); // Get all columns SliceRange sliceRange = new SliceRange(new byte[] {}, new byte[] {}, true, Integer.MAX_VALUE); slicePredicate.setSlice_range(sliceRange); List<KeySlice> columns; try { columns = client.get_range_slice( CassandraUtils.keySpace, columnParent, slicePredicate, startTerm, endTerm, count, ConsistencyLevel.ONE); } catch (InvalidRequestException e) { throw new RuntimeException(e); } catch (TException e) { throw new RuntimeException(e); } catch (UnavailableException e) { throw new RuntimeException(e); } catch (TimedOutException e) { throw new RuntimeException(e); } // term to start with next time actualInitSize = columns.size(); logger.debug( "Found " + columns.size() + " keys in range:" + startTerm + " to " + endTerm + " in " + (System.currentTimeMillis() - start) + "ms"); if (actualInitSize > 0) { for (KeySlice entry : columns) { // term keys look like wikipedia/body/wiki String termStr = entry .getKey() .substring( entry.getKey().indexOf(CassandraUtils.delimeter) + CassandraUtils.delimeter.length()); Term term = CassandraUtils.parseTerm(termStr); logger.debug(termStr + " has " + entry.getColumns().size()); // check for tombstone keys or incorrect keys (from RP) if (entry.getColumns().size() > 0 && term.field().equals(skipTo.field()) && // from this index entry .getKey() .equals( CassandraUtils.hashKey( indexName + CassandraUtils.delimeter + term.field() + CassandraUtils.delimeter + term.text()))) termDocFreqBuffer.put(term, entry.getColumns()); } if (!termDocFreqBuffer.isEmpty()) { chunkBoundryTerm = termDocFreqBuffer.lastKey(); } } // add a final key (excluded in submap below) termDocFreqBuffer.put(finalTerm, null); // put in cache for (Term termKey : termDocFreqBuffer.keySet()) { if (termCache == null) { termCache = termDocFreqBuffer; } else { termCache.putAll(termDocFreqBuffer); } indexReader.addTermEnumCache(termKey, this); } // cache the initial term too indexReader.addTermEnumCache(skipTo, this); termBuffer = termDocFreqBuffer.keySet().toArray(new Term[] {}); termPosition = 0; long end = System.currentTimeMillis(); logger.debug( "loadTerms: " + startTerm + "(" + termBuffer.length + ") took " + (end - start) + "ms"); }