public static TermsEnum filter( TermsEnum toFilter, Terms terms, AtomicReader reader, Settings settings) throws IOException { int docCount = terms.getDocCount(); if (docCount == -1) { docCount = reader.maxDoc(); } final double minFrequency = settings.getAsDouble("min", 0d); final double maxFrequency = settings.getAsDouble("max", docCount + 1d); final double minSegmentSize = settings.getAsInt("min_segment_size", 0); if (minSegmentSize < docCount) { final int minFreq = minFrequency >= 1.0 ? (int) minFrequency : (int) (docCount * minFrequency); final int maxFreq = maxFrequency >= 1.0 ? (int) maxFrequency : (int) (docCount * maxFrequency); assert minFreq < maxFreq; return new FrequencyFilter(toFilter, minFreq, maxFreq); } return toFilter; }
@Override public DoubleArrayAtomicFieldData loadDirect(AtomicReaderContext context) throws Exception { AtomicReader reader = context.reader(); Terms terms = reader.terms(getFieldNames().indexName()); if (terms == null) { return DoubleArrayAtomicFieldData.EMPTY; } // TODO: how can we guess the number of terms? numerics end up creating more terms per value... final TDoubleArrayList values = new TDoubleArrayList(); ArrayList<int[]> ordinals = new ArrayList<int[]>(); int[] idx = new int[reader.maxDoc()]; ordinals.add(new int[reader.maxDoc()]); values.add(0); // first "t" indicates null value int termOrd = 1; // current term number TermsEnum termsEnum = terms.iterator(null); try { DocsEnum docsEnum = null; for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { values.add(FieldCache.NUMERIC_UTILS_DOUBLE_PARSER.parseDouble(term)); docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0); for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { int[] ordinal; if (idx[docId] >= ordinals.size()) { ordinal = new int[reader.maxDoc()]; ordinals.add(ordinal); } else { ordinal = ordinals.get(idx[docId]); } ordinal[docId] = termOrd; idx[docId]++; } termOrd++; } } catch (RuntimeException e) { if (e.getClass().getName().endsWith("StopFillCacheException")) { // all is well, in case numeric parsers are used. } else { throw e; } } if (ordinals.size() == 1) { int[] nativeOrdinals = ordinals.get(0); FixedBitSet set = new FixedBitSet(reader.maxDoc()); double[] sValues = new double[reader.maxDoc()]; boolean allHaveValue = true; for (int i = 0; i < nativeOrdinals.length; i++) { int nativeOrdinal = nativeOrdinals[i]; if (nativeOrdinal == 0) { allHaveValue = false; } else { set.set(i); sValues[i] = values.get(nativeOrdinal); } } if (allHaveValue) { return new DoubleArrayAtomicFieldData.Single(sValues, reader.maxDoc()); } else { return new DoubleArrayAtomicFieldData.SingleFixedSet(sValues, reader.maxDoc(), set); } } else { int[][] nativeOrdinals = new int[ordinals.size()][]; for (int i = 0; i < nativeOrdinals.length; i++) { nativeOrdinals[i] = ordinals.get(i); } return new DoubleArrayAtomicFieldData.WithOrdinals( values.toArray(new double[values.size()]), reader.maxDoc(), Ordinals.Factories.createFromFlatOrdinals( nativeOrdinals, termOrd, fieldDataType.getSettings())); } }