public static Map<String, Integer> termFrequencies( IndexSearcher indexSearcher, Query documentFilterQuery, String fieldName, String propName, String altName) { try { String luceneField = ComplexFieldUtil.propertyField(fieldName, propName, altName); Weight weight = indexSearcher.createNormalizedWeight(documentFilterQuery, false); Map<String, Integer> freq = new HashMap<>(); IndexReader indexReader = indexSearcher.getIndexReader(); for (LeafReaderContext arc : indexReader.leaves()) { if (weight == null) throw new RuntimeException("weight == null"); if (arc == null) throw new RuntimeException("arc == null"); if (arc.reader() == null) throw new RuntimeException("arc.reader() == null"); Scorer scorer = weight.scorer(arc, arc.reader().getLiveDocs()); if (scorer != null) { while (scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { getFrequenciesFromTermVector( indexReader, scorer.docID() + arc.docBase, luceneField, freq); } } } return freq; } catch (IOException e) { throw ExUtil.wrapRuntimeException(e); } }
public String optDesensitize(String value) { final String s = ComplexFieldUtil.SENSITIVE_ALT_NAME; final String i = ComplexFieldUtil.INSENSITIVE_ALT_NAME; final String ci = ComplexFieldUtil.CASE_INSENSITIVE_ALT_NAME; final String di = ComplexFieldUtil.DIACRITICS_INSENSITIVE_ALT_NAME; String[] parts = ComplexFieldUtil.getNameComponents(luceneField()); String alt = parts.length >= 3 ? parts[2] : ""; if (alt.equals(s)) { // Don't desensitize return value; } if (alt.equals(i)) { // Fully desensitize; return StringUtil.removeAccents(value).toLowerCase(); } if (alt.equals(ci)) { // Only case-insensitive return value.toLowerCase(); } if (alt.equals(di)) { // Only diacritics-insensitive return StringUtil.removeAccents(value); } // Unknown alternative; don't change value return value; }
/** * Returns the correct current Lucene field name to use, based on the complex field name, property * name and list of alternatives. * * @param includeAlternative if true, also includes the default alternative at the end of the * field name (alternatives determine stuff like case-/diacritics-sensitivity). * @return null if field, property or alternative not found; valid Lucene field name otherwise */ public String luceneField(boolean includeAlternative) { // Determine available alternatives based on sensitivity preferences. String[] alternatives = includeAlternative ? getAlternatives() : null; if (searcher == null) { // Mostly for testing. Don't check, just combine field parts. if (alternatives == null || alternatives.length == 0) return ComplexFieldUtil.propertyField(fieldName, propName); return ComplexFieldUtil.propertyField(fieldName, propName, alternatives[0]); } // Find the field and the property. ComplexFieldDesc cfd = searcher.getIndexStructure().getComplexFieldDesc(fieldName); if (cfd == null) return null; if (ComplexFieldUtil.isBookkeepingSubfield(propName)) { // Not a property but a bookkeeping subfield (prob. starttag/endtag); ok, return it // (can be removed when old field naming scheme is removed) return ComplexFieldUtil.bookkeepingField(fieldName, propName); } // Find the property PropertyDesc pd = cfd.getPropertyDesc(propName); if (pd == null) return ComplexFieldUtil.propertyField( fieldName, propName); // doesn't exist? use plain property name if (alternatives == null || alternatives.length == 0) { // Don't use any alternatives return ComplexFieldUtil.propertyField(fieldName, propName); } // Find the first available alternative to use for (String alt : alternatives) { if (pd.hasAlternative(alt)) { // NOTE: is this loop necessary at all? getAlternatives() only // returns available alternatives, so the first one should always // be okay, right? return ComplexFieldUtil.propertyField(fieldName, propName, alt); } } // No valid alternative found. Use plain property. // NOTE: should never happen, and doesn't make sense anymore as there are // no 'plain properties' anymore. return ComplexFieldUtil.propertyField(fieldName, propName); }
/** * Get a simple execution context for a field. Used for testing/debugging purposes. * * @param fieldName field to get an execution context for * @return the context */ public static QueryExecutionContext getSimple(String fieldName) { String mainPropName = ComplexFieldUtil.getDefaultMainPropName(); return new QueryExecutionContext(null, fieldName, mainPropName, false, false); }