@Override
 public boolean equals(Object obj) {
   if (this == obj) return true;
   if (!super.equals(obj)) return false;
   if (getClass() != obj.getClass()) return false;
   RegexQuery other = (RegexQuery) obj;
   if (regexImpl == null) {
     if (other.regexImpl != null) return false;
   } else if (!regexImpl.equals(other.regexImpl)) return false;
   if (term == null) {
     if (other.term != null) return false;
   } else if (!term.equals(other.term)) return false;
   return true;
 }
Example #2
0
 @Override
 public boolean equals(Object obj) {
   if (this == obj) return true;
   if (!super.equals(obj)) return false;
   if (getClass() != obj.getClass()) return false;
   FuzzyQuery other = (FuzzyQuery) obj;
   if (maxEdits != other.maxEdits) return false;
   if (prefixLength != other.prefixLength) return false;
   if (maxExpansions != other.maxExpansions) return false;
   if (transpositions != other.transpositions) return false;
   if (term == null) {
     if (other.term != null) return false;
   } else if (!term.equals(other.term)) return false;
   return true;
 }
  public boolean skipTo(Term target) throws IOException {
    // already here
    if (t != null && t.equals(target)) return true;

    int startIdx = tindex.index.search(target.text());

    if (startIdx >= 0) {
      // we hit the term exactly... lucky us!
      if (tenum != null) tenum.close();
      tenum = reader.terms(target);
      pos = startIdx << tindex.intervalBits;
      return setTerm();
    }

    // we didn't hit the term exactly
    startIdx = -startIdx - 1;

    if (startIdx == 0) {
      // our target occurs *before* the first term
      if (tenum != null) tenum.close();
      tenum = reader.terms(target);
      pos = 0;
      return setTerm();
    }

    // back up to the start of the block
    startIdx--;

    if ((pos >> tindex.intervalBits) == startIdx
        && t != null
        && t.text().compareTo(target.text()) <= 0) {
      // we are already in the right block and the current term is before the term we want,
      // so we don't need to seek.
    } else {
      // seek to the right block
      if (tenum != null) tenum.close();
      tenum = reader.terms(target.createTerm(tindex.index.get(startIdx)));
      pos = startIdx << tindex.intervalBits;
      setTerm(); // should be true since it's in the index
    }

    while (t != null && t.text().compareTo(target.text()) < 0) {
      next();
    }

    return t != null;
  }
Example #4
0
    /** add a term */
    public void add(Term term) {
      assert lastTerm.equals(new Term("")) || term.compareTo(lastTerm) > 0;

      try {
        int prefix = sharedPrefix(lastTerm.bytes, term.bytes);
        int suffix = term.bytes.length - prefix;
        if (term.field.equals(lastTerm.field)) {
          output.writeVInt(prefix << 1);
        } else {
          output.writeVInt(prefix << 1 | 1);
          output.writeString(term.field);
        }
        output.writeVInt(suffix);
        output.writeBytes(term.bytes.bytes, term.bytes.offset + prefix, suffix);
        lastTerm.bytes.copyBytes(term.bytes);
        lastTerm.field = term.field;
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  private void loadTerms(Term skipTo) {

    if (initTerm == null) initTerm = skipTo;

    // chose starting term
    String startTerm =
        CassandraUtils.hashKey(
            indexName + CassandraUtils.delimeter + CassandraUtils.createColumnName(skipTo));

    // ending term. the initial query we don't care since
    // we only pull 2 terms, also we don't
    String endTerm = "";

    // The boundary condition for this search. currently the field.
    String boundryTerm =
        CassandraUtils.hashKey(
            indexName
                + CassandraUtils.delimeter
                + CassandraUtils.createColumnName(skipTo.field(), CassandraUtils.finalToken));

    if ((!skipTo.equals(chunkBoundryTerm) || termPosition == 0) && termCache != null) {
      termDocFreqBuffer = termCache.subMap(skipTo, termCache.lastKey());
    } else {
      termDocFreqBuffer = null;
    }

    if (termDocFreqBuffer != null) {

      termBuffer = termDocFreqBuffer.keySet().toArray(new Term[] {});
      termPosition = 0;

      logger.debug("Found " + startTerm + " in cache");
      return;
    } else if (chunkCount > 1 && actualInitSize < maxChunkSize) {

      // include last term
      if (skipTo.equals(chunkBoundryTerm) && termCache.containsKey(skipTo)) {
        termBuffer = new Term[] {skipTo};
        termDocFreqBuffer = termCache.subMap(skipTo, termCache.lastKey());
      } else {
        termBuffer = new Term[] {};
      }

      termPosition = 0;
      return; // done!
    }

    chunkCount++;

    // The first time we grab just a few keys
    int count = maxInitSize;

    // otherwise we grab all the rest of the keys
    if (chunkBoundryTerm != null) {
      count = maxChunkSize;
      startTerm =
          CassandraUtils.hashKey(
              indexName
                  + CassandraUtils.delimeter
                  + CassandraUtils.createColumnName(chunkBoundryTerm));

      // After first pass use the boundary term, since we know on pass 2 we are using the OPP
      endTerm = boundryTerm;
    }

    long start = System.currentTimeMillis();

    termDocFreqBuffer = new TreeMap<Term, List<ColumnOrSuperColumn>>();

    ColumnParent columnParent = new ColumnParent(CassandraUtils.termVecColumnFamily);
    SlicePredicate slicePredicate = new SlicePredicate();

    // Get all columns
    SliceRange sliceRange = new SliceRange(new byte[] {}, new byte[] {}, true, Integer.MAX_VALUE);
    slicePredicate.setSlice_range(sliceRange);

    List<KeySlice> columns;
    try {
      columns =
          client.get_range_slice(
              CassandraUtils.keySpace,
              columnParent,
              slicePredicate,
              startTerm,
              endTerm,
              count,
              ConsistencyLevel.ONE);
    } catch (InvalidRequestException e) {
      throw new RuntimeException(e);
    } catch (TException e) {
      throw new RuntimeException(e);
    } catch (UnavailableException e) {
      throw new RuntimeException(e);
    } catch (TimedOutException e) {
      throw new RuntimeException(e);
    }

    // term to start with next time
    actualInitSize = columns.size();
    logger.debug(
        "Found "
            + columns.size()
            + " keys in range:"
            + startTerm
            + " to "
            + endTerm
            + " in "
            + (System.currentTimeMillis() - start)
            + "ms");

    if (actualInitSize > 0) {
      for (KeySlice entry : columns) {

        // term keys look like wikipedia/body/wiki
        String termStr =
            entry
                .getKey()
                .substring(
                    entry.getKey().indexOf(CassandraUtils.delimeter)
                        + CassandraUtils.delimeter.length());
        Term term = CassandraUtils.parseTerm(termStr);

        logger.debug(termStr + " has " + entry.getColumns().size());

        // check for tombstone keys or incorrect keys (from RP)
        if (entry.getColumns().size() > 0
            && term.field().equals(skipTo.field())
            &&
            // from this index
            entry
                .getKey()
                .equals(
                    CassandraUtils.hashKey(
                        indexName
                            + CassandraUtils.delimeter
                            + term.field()
                            + CassandraUtils.delimeter
                            + term.text()))) termDocFreqBuffer.put(term, entry.getColumns());
      }

      if (!termDocFreqBuffer.isEmpty()) {
        chunkBoundryTerm = termDocFreqBuffer.lastKey();
      }
    }

    // add a final key (excluded in submap below)
    termDocFreqBuffer.put(finalTerm, null);

    // put in cache
    for (Term termKey : termDocFreqBuffer.keySet()) {

      if (termCache == null) {
        termCache = termDocFreqBuffer;
      } else {
        termCache.putAll(termDocFreqBuffer);
      }

      indexReader.addTermEnumCache(termKey, this);
    }

    // cache the initial term too
    indexReader.addTermEnumCache(skipTo, this);

    termBuffer = termDocFreqBuffer.keySet().toArray(new Term[] {});

    termPosition = 0;

    long end = System.currentTimeMillis();

    logger.debug(
        "loadTerms: " + startTerm + "(" + termBuffer.length + ") took " + (end - start) + "ms");
  }