private void handleFieldWildcards(TermVectorRequest request) { Set<String> fieldNames = new HashSet<>(); for (String pattern : request.selectedFields()) { fieldNames.addAll(indexShard.mapperService().simpleMatchToIndexNames(pattern)); } request.selectedFields(fieldNames.toArray(Strings.EMPTY_ARRAY)); }
private Set<String> getFieldsToGenerate( Map<String, String> perAnalyzerField, Fields fieldsObject) { Set<String> selectedFields = new HashSet<>(); for (String fieldName : fieldsObject) { if (perAnalyzerField.containsKey(fieldName)) { selectedFields.add(fieldName); } } return selectedFields; }
private Fields addGeneratedTermVectors( Engine.GetResult get, Fields termVectorsByField, TermVectorRequest request, Set<String> selectedFields) throws IOException { /* only keep valid fields */ Set<String> validFields = new HashSet<>(); for (String field : selectedFields) { FieldMapper fieldMapper = indexShard.mapperService().smartNameFieldMapper(field); if (!isValidField(fieldMapper)) { continue; } // already retrieved, only if the analyzer hasn't been overridden at the field if (fieldMapper.fieldType().storeTermVectors() && (request.perFieldAnalyzer() == null || !request.perFieldAnalyzer().containsKey(field))) { continue; } validFields.add(field); } if (validFields.isEmpty()) { return termVectorsByField; } /* generate term vectors from fetched document fields */ GetResult getResult = indexShard .getService() .get( get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false); Fields generatedTermVectors = generateTermVectors( getResult.getFields().values(), request.offsets(), request.perFieldAnalyzer()); /* merge with existing Fields */ if (termVectorsByField == null) { return generatedTermVectors; } else { return mergeFields(termVectorsByField, generatedTermVectors); } }
/** * determines if the passed term is likely to be of interest in "more like" comparisons * * @param term The word being considered * @return true if should be ignored, false if should be used in further analysis */ private boolean isNoiseWord(String term) { int len = term.length(); if (minWordLen > 0 && len < minWordLen) { return true; } if (maxWordLen > 0 && len > maxWordLen) { return true; } return stopWords != null && stopWords.contains(term); }
/** * Return a query that will return docs like the passed Fields. * * @return a query that will return docs like the passed Fields. */ public Query like(Fields... likeFields) throws IOException { // get all field names Set<String> fieldNames = new HashSet<>(); for (Fields fields : likeFields) { for (String fieldName : fields) { fieldNames.add(fieldName); } } // term selection is per field, then appended to a single boolean query BooleanQuery bq = new BooleanQuery(); for (String fieldName : fieldNames) { Map<String, Int> termFreqMap = new HashMap<>(); for (Fields fields : likeFields) { Terms vector = fields.terms(fieldName); if (vector != null) { addTermFrequencies(termFreqMap, vector, fieldName); } } addToQuery(createQueue(termFreqMap, fieldName), bq); } return bq; }
@Test public void testDuelDoubles() throws Exception { final String mapping = XContentFactory.jsonBuilder() .startObject() .startObject("type") .startObject("properties") .startObject("float") .field("type", "float") .startObject("fielddata") .field("format", "doc_values") .endObject() .endObject() .startObject("double") .field("type", "double") .startObject("fielddata") .field("format", "doc_values") .endObject() .endObject() .endObject() .endObject() .endObject() .string(); final DocumentMapper mapper = mapperService.documentMapperParser().parse(mapping); Random random = getRandom(); int atLeast = scaledRandomIntBetween(1000, 1500); final int maxNumValues = randomBoolean() ? 1 : randomIntBetween(2, 40); float[] values = new float[maxNumValues]; for (int i = 0; i < atLeast; i++) { int numValues = randomInt(maxNumValues); float def = randomBoolean() ? randomFloat() : Float.NaN; // FD loses values if they are duplicated, so we must deduplicate for this test Set<Float> vals = new HashSet<Float>(); for (int j = 0; j < numValues; ++j) { if (randomBoolean()) { vals.add(def); } else { vals.add(randomFloat()); } } numValues = vals.size(); int upto = 0; for (Float f : vals) { values[upto++] = f.floatValue(); } XContentBuilder doc = XContentFactory.jsonBuilder().startObject().startArray("float"); for (int j = 0; j < numValues; ++j) { doc = doc.value(values[j]); } doc = doc.endArray().startArray("double"); for (int j = 0; j < numValues; ++j) { doc = doc.value(values[j]); } doc = doc.endArray().endObject(); final ParsedDocument d = mapper.parse("type", Integer.toString(i), doc.bytes()); writer.addDocument(d.rootDoc()); if (random.nextInt(10) == 0) { refreshReader(); } } AtomicReaderContext context = refreshReader(); Map<FieldDataType, Type> typeMap = new HashMap<>(); typeMap.put( new FieldDataType("double", ImmutableSettings.builder().put("format", "array")), Type.Double); typeMap.put( new FieldDataType("float", ImmutableSettings.builder().put("format", "array")), Type.Float); typeMap.put( new FieldDataType("double", ImmutableSettings.builder().put("format", "doc_values")), Type.Double); typeMap.put( new FieldDataType("float", ImmutableSettings.builder().put("format", "doc_values")), Type.Float); ArrayList<Entry<FieldDataType, Type>> list = new ArrayList<>(typeMap.entrySet()); while (!list.isEmpty()) { Entry<FieldDataType, Type> left; Entry<FieldDataType, Type> right; if (list.size() > 1) { left = list.remove(random.nextInt(list.size())); right = list.remove(random.nextInt(list.size())); } else { right = left = list.remove(0); } ifdService.clear(); IndexNumericFieldData leftFieldData = getForField(left.getKey(), left.getValue().name().toLowerCase(Locale.ROOT)); ifdService.clear(); IndexNumericFieldData rightFieldData = getForField(right.getKey(), right.getValue().name().toLowerCase(Locale.ROOT)); duelFieldDataDouble(random, context, leftFieldData, rightFieldData); duelFieldDataDouble(random, context, rightFieldData, leftFieldData); DirectoryReader perSegment = DirectoryReader.open(writer, true); CompositeReaderContext composite = perSegment.getContext(); List<AtomicReaderContext> leaves = composite.leaves(); for (AtomicReaderContext atomicReaderContext : leaves) { duelFieldDataDouble(random, atomicReaderContext, leftFieldData, rightFieldData); } } }
/** determines if the passed term is to be skipped all together */ private boolean isSkipTerm(@Nullable String field, String value) { return field != null && skipTerms != null && skipTerms.contains(new Term(field, value)); }