private List<Tuple<Text, Integer>> getExpectedFacetEntries( Set<String> fieldValues, Map<String, Integer> controlFacetsField, int size, TermsFacet.ComparatorType sort, List<String> excludes, String regex, boolean allTerms) { Pattern pattern = null; if (regex != null) { pattern = Regex.compile(regex, null); } List<Tuple<Text, Integer>> entries = new ArrayList<Tuple<Text, Integer>>(); for (Map.Entry<String, Integer> e : controlFacetsField.entrySet()) { if (excludes.contains(e.getKey())) { continue; } if (pattern != null && !pattern.matcher(e.getKey()).matches()) { continue; } entries.add(new Tuple<Text, Integer>(new StringText(e.getKey()), e.getValue())); } if (allTerms) { for (String fieldValue : fieldValues) { if (!controlFacetsField.containsKey(fieldValue)) { if (excludes.contains(fieldValue)) { continue; } if (pattern != null && !pattern.matcher(fieldValue).matches()) { continue; } entries.add(new Tuple<Text, Integer>(new StringText(fieldValue), 0)); } } } switch (sort) { case COUNT: Collections.sort(entries, count); break; case REVERSE_COUNT: Collections.sort(entries, count_reverse); break; case TERM: Collections.sort(entries, term); break; case REVERSE_TERM: Collections.sort(entries, term_reverse); break; } return size >= entries.size() ? entries : entries.subList(0, size); }
@Inject public PatternTokenizerFactory( IndexSettings indexSettings, @Assisted String name, @Assisted Settings settings) { super(indexSettings, name, settings); String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/); if (sPattern == null) { throw new IllegalArgumentException( "pattern is missing for [" + name + "] tokenizer of type 'pattern'"); } this.pattern = Regex.compile(sPattern, settings.get("flags")); this.group = settings.getAsInt("group", -1); }
@Override public FacetExecutor parse(String facetName, XContentParser parser, SearchContext context) throws IOException { String field = null; int size = 10; int shardSize = -1; String[] fieldsNames = null; ImmutableSet<BytesRef> excluded = ImmutableSet.of(); String regex = null; String regexFlags = null; TermsFacet.ComparatorType comparatorType = TermsFacet.ComparatorType.COUNT; String scriptLang = null; String script = null; ScriptService.ScriptType scriptType = null; Map<String, Object> params = null; boolean allTerms = false; String executionHint = null; String currentFieldName = null; XContentParser.Token token; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token == XContentParser.Token.START_OBJECT) { if ("params".equals(currentFieldName)) { params = parser.map(); } else { throw new ElasticsearchParseException( "unknown parameter [" + currentFieldName + "] while parsing terms facet [" + facetName + "]"); } } else if (token == XContentParser.Token.START_ARRAY) { if ("exclude".equals(currentFieldName)) { ImmutableSet.Builder<BytesRef> builder = ImmutableSet.builder(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { builder.add(parser.bytes()); } excluded = builder.build(); } else if ("fields".equals(currentFieldName)) { List<String> fields = Lists.newArrayListWithCapacity(4); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { fields.add(parser.text()); } fieldsNames = fields.toArray(new String[fields.size()]); } else { throw new ElasticsearchParseException( "unknown parameter [" + currentFieldName + "] while parsing terms facet [" + facetName + "]"); } } else if (token.isValue()) { if ("field".equals(currentFieldName)) { field = parser.text(); } else if (ScriptService.SCRIPT_INLINE.match(currentFieldName)) { script = parser.text(); scriptType = ScriptService.ScriptType.INLINE; } else if (ScriptService.SCRIPT_ID.match(currentFieldName)) { script = parser.text(); scriptType = ScriptService.ScriptType.INDEXED; } else if (ScriptService.SCRIPT_FILE.match(currentFieldName)) { script = parser.text(); scriptType = ScriptService.ScriptType.FILE; } else if (ScriptService.SCRIPT_LANG.match(currentFieldName)) { scriptLang = parser.text(); } else if ("size".equals(currentFieldName)) { size = parser.intValue(); } else if ("shard_size".equals(currentFieldName) || "shardSize".equals(currentFieldName)) { shardSize = parser.intValue(); } else if ("all_terms".equals(currentFieldName) || "allTerms".equals(currentFieldName)) { allTerms = parser.booleanValue(); } else if ("regex".equals(currentFieldName)) { regex = parser.text(); } else if ("regex_flags".equals(currentFieldName) || "regexFlags".equals(currentFieldName)) { regexFlags = parser.text(); } else if ("order".equals(currentFieldName) || "comparator".equals(currentFieldName)) { comparatorType = TermsFacet.ComparatorType.fromString(parser.text()); } else if ("execution_hint".equals(currentFieldName) || "executionHint".equals(currentFieldName)) { executionHint = parser.textOrNull(); } else { throw new ElasticsearchParseException( "unknown parameter [" + currentFieldName + "] while parsing terms facet [" + facetName + "]"); } } } if (fieldsNames != null && fieldsNames.length == 1) { field = fieldsNames[0]; fieldsNames = null; } Pattern pattern = null; if (regex != null) { pattern = Regex.compile(regex, regexFlags); } SearchScript searchScript = null; if (script != null) { searchScript = context.scriptService().search(context.lookup(), scriptLang, script, scriptType, params); } // shard_size cannot be smaller than size as we need to at least fetch <size> entries from every // shards in order to return <size> if (shardSize < size) { shardSize = size; } if (fieldsNames != null) { // in case of multi files, we only collect the fields that are mapped and facet on them. ArrayList<FieldMapper> mappers = new ArrayList<>(fieldsNames.length); for (int i = 0; i < fieldsNames.length; i++) { FieldMapper mapper = context.smartNameFieldMapper(fieldsNames[i]); if (mapper != null) { mappers.add(mapper); } } if (mappers.isEmpty()) { // non of the fields is mapped return new UnmappedFieldExecutor(size, comparatorType); } return new FieldsTermsStringFacetExecutor( mappers.toArray(new FieldMapper[mappers.size()]), size, shardSize, comparatorType, allTerms, context, excluded, pattern, searchScript); } if (field == null && script != null) { return new ScriptTermsStringFieldFacetExecutor( size, shardSize, comparatorType, context, excluded, pattern, scriptLang, script, scriptType, params, context.cacheRecycler()); } if (field == null) { throw new ElasticsearchParseException( "terms facet [" + facetName + "] must have a field, fields or script parameter"); } FieldMapper fieldMapper = context.smartNameFieldMapper(field); if (fieldMapper == null) { return new UnmappedFieldExecutor(size, comparatorType); } IndexFieldData indexFieldData = context.fieldData().getForField(fieldMapper); if (indexFieldData instanceof IndexNumericFieldData) { IndexNumericFieldData indexNumericFieldData = (IndexNumericFieldData) indexFieldData; if (indexNumericFieldData.getNumericType().isFloatingPoint()) { return new TermsDoubleFacetExecutor( indexNumericFieldData, size, shardSize, comparatorType, allTerms, context, excluded, searchScript, context.cacheRecycler()); } else { return new TermsLongFacetExecutor( indexNumericFieldData, size, shardSize, comparatorType, allTerms, context, excluded, searchScript, context.cacheRecycler()); } } else { if (script != null || "map".equals(executionHint)) { return new TermsStringFacetExecutor( indexFieldData, size, shardSize, comparatorType, allTerms, context, excluded, pattern, searchScript); } else if (indexFieldData instanceof IndexOrdinalsFieldData) { return new TermsStringOrdinalsFacetExecutor( (IndexOrdinalsFieldData) indexFieldData, size, shardSize, comparatorType, allTerms, context, excluded, pattern, ordinalsCacheAbove); } else { return new TermsStringFacetExecutor( indexFieldData, size, shardSize, comparatorType, allTerms, context, excluded, pattern, searchScript); } } }