private static final int[] computeMultivaluedTD( ReaderAbstract reader, String fieldName, FieldCacheIndex stringIndex, DocIdInterface docIdInterface) throws IOException, SearchLibException { int[] countIndex = new int[stringIndex.lookup.length]; int indexPos = 0; if (docIdInterface.getSize() == 0) return countIndex; int[] docs = new int[100]; int[] freqs = new int[100]; BitSetInterface bitset = docIdInterface.getBitSet(); Term oTerm = new Term(fieldName); for (String term : stringIndex.lookup) { if (term != null) { Term t = oTerm.createTerm(term); TermDocs termDocs = reader.getTermDocs(t); int l; while ((l = termDocs.read(docs, freqs)) > 0) for (int i = 0; i < l; i++) if (freqs[i] > 0) if (bitset.get(docs[i])) countIndex[indexPos]++; termDocs.close(); } indexPos++; } return countIndex; }
private int getDocFreq(String term) { int result = 1; currentTerm = currentTerm.createTerm(term); try { TermEnum termEnum = reader.terms(currentTerm); if (termEnum != null && termEnum.term().equals(currentTerm)) { result = termEnum.docFreq(); } } catch (IOException e) { throw new RuntimeException(e); } return result; }
public boolean skipTo(Term target) throws IOException { // already here if (t != null && t.equals(target)) return true; int startIdx = tindex.index.search(target.text()); if (startIdx >= 0) { // we hit the term exactly... lucky us! if (tenum != null) tenum.close(); tenum = reader.terms(target); pos = startIdx << tindex.intervalBits; return setTerm(); } // we didn't hit the term exactly startIdx = -startIdx - 1; if (startIdx == 0) { // our target occurs *before* the first term if (tenum != null) tenum.close(); tenum = reader.terms(target); pos = 0; return setTerm(); } // back up to the start of the block startIdx--; if ((pos >> tindex.intervalBits) == startIdx && t != null && t.text().compareTo(target.text()) <= 0) { // we are already in the right block and the current term is before the term we want, // so we don't need to seek. } else { // seek to the right block if (tenum != null) tenum.close(); tenum = reader.terms(target.createTerm(tindex.index.get(startIdx))); pos = startIdx << tindex.intervalBits; setTerm(); // should be true since it's in the index } while (t != null && t.text().compareTo(target.text()) < 0) { next(); } return t != null; }
private void _includeIfUnique( BooleanQuery booleanQuery, boolean like, QueryParser queryParser, Query query, BooleanClause.Occur occur) { if (query instanceof TermQuery) { Set<Term> terms = new HashSet<Term>(); TermQuery termQuery = (TermQuery) query; termQuery.extractTerms(terms); float boost = termQuery.getBoost(); for (Term term : terms) { String termValue = term.text(); if (like) { termValue = termValue.toLowerCase(queryParser.getLocale()); term = term.createTerm(StringPool.STAR.concat(termValue).concat(StringPool.STAR)); query = new WildcardQuery(term); } else { query = new TermQuery(term); } query.setBoost(boost); boolean included = false; for (BooleanClause booleanClause : booleanQuery.getClauses()) { if (query.equals(booleanClause.getQuery())) { included = true; } } if (!included) { booleanQuery.add(query, occur); } } } else if (query instanceof BooleanQuery) { BooleanQuery curBooleanQuery = (BooleanQuery) query; BooleanQuery containerBooleanQuery = new BooleanQuery(); for (BooleanClause booleanClause : curBooleanQuery.getClauses()) { _includeIfUnique( containerBooleanQuery, like, queryParser, booleanClause.getQuery(), booleanClause.getOccur()); } if (containerBooleanQuery.getClauses().length > 0) { booleanQuery.add(containerBooleanQuery, occur); } } else { boolean included = false; for (BooleanClause booleanClause : booleanQuery.getClauses()) { if (query.equals(booleanClause.getQuery())) { included = true; } } if (!included) { booleanQuery.add(query, occur); } } }
private static float[] getFloats(FileFloatSource ffs, IndexReader reader) { float[] vals = new float[reader.maxDoc()]; if (ffs.defVal != 0) { Arrays.fill(vals, ffs.defVal); } InputStream is; String fname = "external_" + ffs.field.getName(); try { is = VersionedFile.getLatestFile(ffs.dataDir, fname); } catch (IOException e) { // log, use defaults SolrCore.log.error("Error opening external value source file: " + e); return vals; } BufferedReader r = new BufferedReader(new InputStreamReader(is)); String idName = StringHelper.intern(ffs.keyField.getName()); FieldType idType = ffs.keyField.getType(); boolean sorted = true; // assume sorted until we discover it's not // warning: lucene's termEnum.skipTo() is not optimized... it simply does a next() // because of this, simply ask the reader for a new termEnum rather than // trying to use skipTo() List<String> notFound = new ArrayList<String>(); int notFoundCount = 0; int otherErrors = 0; TermDocs termDocs = null; Term protoTerm = new Term(idName, ""); TermEnum termEnum = null; // Number of times to try termEnum.next() before resorting to skip int numTimesNext = 10; char delimiter = '='; String termVal; boolean hasNext = true; String prevKey = ""; String lastVal = "\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF\uFFFF"; try { termDocs = reader.termDocs(); termEnum = reader.terms(protoTerm); Term t = termEnum.term(); if (t != null && t.field() == idName) { // intern'd comparison termVal = t.text(); } else { termVal = lastVal; } for (String line; (line = r.readLine()) != null; ) { int delimIndex = line.indexOf(delimiter); if (delimIndex < 0) continue; int endIndex = line.length(); /* EOLs should already be removed for BufferedReader.readLine() for(int endIndex = line.length();endIndex>delimIndex+1; endIndex--) { char ch = line.charAt(endIndex-1); if (ch!='\n' && ch!='\r') break; } */ String key = line.substring(0, delimIndex); String val = line.substring(delimIndex + 1, endIndex); String internalKey = idType.toInternal(key); float fval; try { fval = Float.parseFloat(val); } catch (Exception e) { if (++otherErrors <= 10) { SolrCore.log.error( "Error loading external value source + fileName + " + e + (otherErrors < 10 ? "" : "\tSkipping future errors for this file.")); } continue; // go to next line in file.. leave values as default. } if (sorted) { // make sure this key is greater than the previous key sorted = internalKey.compareTo(prevKey) >= 0; prevKey = internalKey; if (sorted) { int countNext = 0; for (; ; ) { int cmp = internalKey.compareTo(termVal); if (cmp == 0) { termDocs.seek(termEnum); while (termDocs.next()) { vals[termDocs.doc()] = fval; } break; } else if (cmp < 0) { // term enum has already advanced past current key... we didn't find it. if (notFoundCount < 10) { // collect first 10 not found for logging notFound.add(key); } notFoundCount++; break; } else { // termEnum is less than our current key, so skip ahead // try next() a few times to see if we hit or pass the target. // Lucene's termEnum.skipTo() is currently unoptimized (it just does next()) // so the best thing is to simply ask the reader for a new termEnum(target) // if we really need to skip. if (++countNext > numTimesNext) { termEnum = reader.terms(protoTerm.createTerm(internalKey)); t = termEnum.term(); } else { hasNext = termEnum.next(); t = hasNext ? termEnum.term() : null; } if (t != null && t.field() == idName) { // intern'd comparison termVal = t.text(); } else { termVal = lastVal; } } } // end for(;;) } } if (!sorted) { termEnum = reader.terms(protoTerm.createTerm(internalKey)); t = termEnum.term(); if (t != null && t.field() == idName // intern'd comparison && internalKey.equals(t.text())) { termDocs.seek(termEnum); while (termDocs.next()) { vals[termDocs.doc()] = fval; } } else { if (notFoundCount < 10) { // collect first 10 not found for logging notFound.add(key); } notFoundCount++; } } } } catch (IOException e) { // log, use defaults SolrCore.log.error("Error loading external value source: " + e); } finally { // swallow exceptions on close so we don't override any // exceptions that happened in the loop if (termDocs != null) try { termDocs.close(); } catch (Exception e) { } if (termEnum != null) try { termEnum.close(); } catch (Exception e) { } try { r.close(); } catch (Exception e) { } } SolrCore.log.info( "Loaded external value source " + fname + (notFoundCount == 0 ? "" : " :" + notFoundCount + " missing keys " + notFound)); return vals; }