Exemplo n.º 1
0
 private static final int[] computeMultivaluedTD(
     ReaderAbstract reader,
     String fieldName,
     FieldCacheIndex stringIndex,
     DocIdInterface docIdInterface)
     throws IOException, SearchLibException {
   int[] countIndex = new int[stringIndex.lookup.length];
   int indexPos = 0;
   if (docIdInterface.getSize() == 0) return countIndex;
   int[] docs = new int[100];
   int[] freqs = new int[100];
   BitSetInterface bitset = docIdInterface.getBitSet();
   Term oTerm = new Term(fieldName);
   for (String term : stringIndex.lookup) {
     if (term != null) {
       Term t = oTerm.createTerm(term);
       TermDocs termDocs = reader.getTermDocs(t);
       int l;
       while ((l = termDocs.read(docs, freqs)) > 0)
         for (int i = 0; i < l; i++)
           if (freqs[i] > 0) if (bitset.get(docs[i])) countIndex[indexPos]++;
       termDocs.close();
     }
     indexPos++;
   }
   return countIndex;
 }
 private int getDocFreq(String term) {
   int result = 1;
   currentTerm = currentTerm.createTerm(term);
   try {
     TermEnum termEnum = reader.terms(currentTerm);
     if (termEnum != null && termEnum.term().equals(currentTerm)) {
       result = termEnum.docFreq();
     }
   } catch (IOException e) {
     throw new RuntimeException(e);
   }
   return result;
 }
Exemplo n.º 3
0
  public boolean skipTo(Term target) throws IOException {
    // already here
    if (t != null && t.equals(target)) return true;

    int startIdx = tindex.index.search(target.text());

    if (startIdx >= 0) {
      // we hit the term exactly... lucky us!
      if (tenum != null) tenum.close();
      tenum = reader.terms(target);
      pos = startIdx << tindex.intervalBits;
      return setTerm();
    }

    // we didn't hit the term exactly
    startIdx = -startIdx - 1;

    if (startIdx == 0) {
      // our target occurs *before* the first term
      if (tenum != null) tenum.close();
      tenum = reader.terms(target);
      pos = 0;
      return setTerm();
    }

    // back up to the start of the block
    startIdx--;

    if ((pos >> tindex.intervalBits) == startIdx
        && t != null
        && t.text().compareTo(target.text()) <= 0) {
      // we are already in the right block and the current term is before the term we want,
      // so we don't need to seek.
    } else {
      // seek to the right block
      if (tenum != null) tenum.close();
      tenum = reader.terms(target.createTerm(tindex.index.get(startIdx)));
      pos = startIdx << tindex.intervalBits;
      setTerm(); // should be true since it's in the index
    }

    while (t != null && t.text().compareTo(target.text()) < 0) {
      next();
    }

    return t != null;
  }
Exemplo n.º 4
0
  private void _includeIfUnique(
      BooleanQuery booleanQuery,
      boolean like,
      QueryParser queryParser,
      Query query,
      BooleanClause.Occur occur) {

    if (query instanceof TermQuery) {
      Set<Term> terms = new HashSet<Term>();

      TermQuery termQuery = (TermQuery) query;

      termQuery.extractTerms(terms);

      float boost = termQuery.getBoost();

      for (Term term : terms) {
        String termValue = term.text();

        if (like) {
          termValue = termValue.toLowerCase(queryParser.getLocale());

          term = term.createTerm(StringPool.STAR.concat(termValue).concat(StringPool.STAR));

          query = new WildcardQuery(term);
        } else {
          query = new TermQuery(term);
        }

        query.setBoost(boost);

        boolean included = false;

        for (BooleanClause booleanClause : booleanQuery.getClauses()) {
          if (query.equals(booleanClause.getQuery())) {
            included = true;
          }
        }

        if (!included) {
          booleanQuery.add(query, occur);
        }
      }
    } else if (query instanceof BooleanQuery) {
      BooleanQuery curBooleanQuery = (BooleanQuery) query;

      BooleanQuery containerBooleanQuery = new BooleanQuery();

      for (BooleanClause booleanClause : curBooleanQuery.getClauses()) {
        _includeIfUnique(
            containerBooleanQuery,
            like,
            queryParser,
            booleanClause.getQuery(),
            booleanClause.getOccur());
      }

      if (containerBooleanQuery.getClauses().length > 0) {
        booleanQuery.add(containerBooleanQuery, occur);
      }
    } else {
      boolean included = false;

      for (BooleanClause booleanClause : booleanQuery.getClauses()) {
        if (query.equals(booleanClause.getQuery())) {
          included = true;
        }
      }

      if (!included) {
        booleanQuery.add(query, occur);
      }
    }
  }
  private static float[] getFloats(FileFloatSource ffs, IndexReader reader) {
    float[] vals = new float[reader.maxDoc()];
    if (ffs.defVal != 0) {
      Arrays.fill(vals, ffs.defVal);
    }
    InputStream is;
    String fname = "external_" + ffs.field.getName();
    try {
      is = VersionedFile.getLatestFile(ffs.dataDir, fname);
    } catch (IOException e) {
      // log, use defaults
      SolrCore.log.error("Error opening external value source file: " + e);
      return vals;
    }

    BufferedReader r = new BufferedReader(new InputStreamReader(is));

    String idName = StringHelper.intern(ffs.keyField.getName());
    FieldType idType = ffs.keyField.getType();
    boolean sorted = true; // assume sorted until we discover it's not

    // warning: lucene's termEnum.skipTo() is not optimized... it simply does a next()
    // because of this, simply ask the reader for a new termEnum rather than
    // trying to use skipTo()

    List<String> notFound = new ArrayList<String>();
    int notFoundCount = 0;
    int otherErrors = 0;

    TermDocs termDocs = null;
    Term protoTerm = new Term(idName, "");
    TermEnum termEnum = null;
    // Number of times to try termEnum.next() before resorting to skip
    int numTimesNext = 10;

    char delimiter = '=';
    String termVal;
    boolean hasNext = true;
    String prevKey = "";

    String lastVal = "\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF";

    try {
      termDocs = reader.termDocs();
      termEnum = reader.terms(protoTerm);
      Term t = termEnum.term();
      if (t != null && t.field() == idName) { // intern'd comparison
        termVal = t.text();
      } else {
        termVal = lastVal;
      }

      for (String line; (line = r.readLine()) != null; ) {
        int delimIndex = line.indexOf(delimiter);
        if (delimIndex < 0) continue;

        int endIndex = line.length();
        /* EOLs should already be removed for BufferedReader.readLine()
        for(int endIndex = line.length();endIndex>delimIndex+1; endIndex--) {
          char ch = line.charAt(endIndex-1);
          if (ch!='\n' && ch!='\r') break;
        }
        */
        String key = line.substring(0, delimIndex);
        String val = line.substring(delimIndex + 1, endIndex);

        String internalKey = idType.toInternal(key);
        float fval;
        try {
          fval = Float.parseFloat(val);
        } catch (Exception e) {
          if (++otherErrors <= 10) {
            SolrCore.log.error(
                "Error loading external value source + fileName + "
                    + e
                    + (otherErrors < 10 ? "" : "\tSkipping future errors for this file."));
          }
          continue; // go to next line in file.. leave values as default.
        }

        if (sorted) {
          // make sure this key is greater than the previous key
          sorted = internalKey.compareTo(prevKey) >= 0;
          prevKey = internalKey;

          if (sorted) {
            int countNext = 0;
            for (; ; ) {
              int cmp = internalKey.compareTo(termVal);
              if (cmp == 0) {
                termDocs.seek(termEnum);
                while (termDocs.next()) {
                  vals[termDocs.doc()] = fval;
                }
                break;
              } else if (cmp < 0) {
                // term enum has already advanced past current key... we didn't find it.
                if (notFoundCount < 10) { // collect first 10 not found for logging
                  notFound.add(key);
                }
                notFoundCount++;
                break;
              } else {
                // termEnum is less than our current key, so skip ahead

                // try next() a few times to see if we hit or pass the target.
                // Lucene's termEnum.skipTo() is currently unoptimized (it just does next())
                // so the best thing is to simply ask the reader for a new termEnum(target)
                // if we really need to skip.
                if (++countNext > numTimesNext) {
                  termEnum = reader.terms(protoTerm.createTerm(internalKey));
                  t = termEnum.term();
                } else {
                  hasNext = termEnum.next();
                  t = hasNext ? termEnum.term() : null;
                }

                if (t != null && t.field() == idName) { // intern'd comparison
                  termVal = t.text();
                } else {
                  termVal = lastVal;
                }
              }
            } // end for(;;)
          }
        }

        if (!sorted) {
          termEnum = reader.terms(protoTerm.createTerm(internalKey));
          t = termEnum.term();
          if (t != null
              && t.field() == idName // intern'd comparison
              && internalKey.equals(t.text())) {
            termDocs.seek(termEnum);
            while (termDocs.next()) {
              vals[termDocs.doc()] = fval;
            }
          } else {
            if (notFoundCount < 10) { // collect first 10 not found for logging
              notFound.add(key);
            }
            notFoundCount++;
          }
        }
      }
    } catch (IOException e) {
      // log, use defaults
      SolrCore.log.error("Error loading external value source: " + e);
    } finally {
      // swallow exceptions on close so we don't override any
      // exceptions that happened in the loop
      if (termDocs != null)
        try {
          termDocs.close();
        } catch (Exception e) {
        }
      if (termEnum != null)
        try {
          termEnum.close();
        } catch (Exception e) {
        }
      try {
        r.close();
      } catch (Exception e) {
      }
    }

    SolrCore.log.info(
        "Loaded external value source "
            + fname
            + (notFoundCount == 0 ? "" : " :" + notFoundCount + " missing keys " + notFound));

    return vals;
  }