private void addFieldData( ParseContext context, FieldMapper<String> mapper, Collection<String> data) throws IOException { if (data != null && !data.isEmpty()) { if (mappers.get(mapper.names().indexName()) == null) { // New mapper context.setWithinNewMapper(); try { parseData(context, mapper, data); FieldMapperListener.Aggregator newFields = new FieldMapperListener.Aggregator(); ObjectMapperListener.Aggregator newObjects = new ObjectMapperListener.Aggregator(); mapper.traverse(newFields); mapper.traverse(newObjects); // callback on adding those fields! context.docMapper().addFieldMappers(newFields.mappers); context.docMapper().addObjectMappers(newObjects.mappers); context.setMappingsModified(); synchronized (mutex) { UpdateInPlaceMap<String, FieldMapper<String>>.Mutator mappingMutator = this.mappers.mutator(); mappingMutator.put(mapper.names().indexName(), mapper); mappingMutator.close(); } } finally { context.clearWithinNewMapper(); } } else { // Mapper already added parseData(context, mapper, data); } } }
@Override public void close() { for (FieldMapper<String> mapper : mappers.values()) { mapper.close(); } // disposeHelper(); }
private Fields generateTermVectorsFromDoc(TermVectorRequest request, boolean doAllFields) throws IOException { // parse the document, at the moment we do update the mapping, just like percolate ParsedDocument parsedDocument = parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc()); // select the right fields and generate term vectors ParseContext.Document doc = parsedDocument.rootDoc(); Collection<String> seenFields = new HashSet<>(); Collection<GetField> getFields = new HashSet<>(); for (IndexableField field : doc.getFields()) { FieldMapper fieldMapper = indexShard.mapperService().smartNameFieldMapper(field.name()); if (seenFields.contains(field.name())) { continue; } else { seenFields.add(field.name()); } if (!isValidField(fieldMapper)) { continue; } if (request.selectedFields() == null && !doAllFields && !fieldMapper.fieldType().storeTermVectors()) { continue; } if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) { continue; } String[] values = doc.getValues(field.name()); getFields.add(new GetField(field.name(), Arrays.asList((Object[]) values))); } return generateTermVectors(getFields, request.offsets(), request.perFieldAnalyzer()); }
@Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(simpleName()); builder.field("type", CONTENT_TYPE); if (indexCreatedBefore2x) { builder.field("path", pathType.name().toLowerCase(Locale.ROOT)); } builder.startObject("fields"); contentMapper.toXContent(builder, params); authorMapper.toXContent(builder, params); titleMapper.toXContent(builder, params); nameMapper.toXContent(builder, params); dateMapper.toXContent(builder, params); keywordsMapper.toXContent(builder, params); contentTypeMapper.toXContent(builder, params); contentLengthMapper.toXContent(builder, params); languageMapper.toXContent(builder, params); multiFields.toXContent(builder, params); builder.endObject(); multiFields.toXContent(builder, params); builder.endObject(); return builder; }
private void parseData( ParseContext context, FieldMapper<String> mapper, Collection<String> values) throws IOException { for (String value : values) { Field field = new Field( mapper.names().indexName(), value, isUriField(mapper.name()) ? Defaults.URI_FIELD_TYPE : Defaults.LABEL_FIELD_TYPE); context.doc().add(field); } }
private Fields addGeneratedTermVectors( Engine.GetResult get, Fields termVectorsByField, TermVectorRequest request, Set<String> selectedFields) throws IOException { /* only keep valid fields */ Set<String> validFields = new HashSet<>(); for (String field : selectedFields) { FieldMapper fieldMapper = indexShard.mapperService().smartNameFieldMapper(field); if (!isValidField(fieldMapper)) { continue; } // already retrieved, only if the analyzer hasn't been overridden at the field if (fieldMapper.fieldType().storeTermVectors() && (request.perFieldAnalyzer() == null || !request.perFieldAnalyzer().containsKey(field))) { continue; } validFields.add(field); } if (validFields.isEmpty()) { return termVectorsByField; } /* generate term vectors from fetched document fields */ GetResult getResult = indexShard .getService() .get( get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false); Fields generatedTermVectors = generateTermVectors( getResult.getFields().values(), request.offsets(), request.perFieldAnalyzer()); /* merge with existing Fields */ if (termVectorsByField == null) { return generatedTermVectors; } else { return mergeFields(termVectorsByField, generatedTermVectors); } }
private boolean isValidField(FieldMapper field) { // must be a string if (!(field instanceof StringFieldMapper)) { return false; } // and must be indexed if (field.fieldType().indexOptions() == IndexOptions.NONE) { return false; } return true; }
@Override public void traverse(FieldMapperListener fieldMapperListener) { for (FieldMapper<String> mapper : mappers.values()) { mapper.traverse(fieldMapperListener); } }
@Override public Mapper parse(ParseContext context) throws IOException { byte[] content = null; String contentType = null; int indexedChars = defaultIndexedChars; boolean langDetect = defaultLangDetect; String name = null; String language = null; XContentParser parser = context.parser(); XContentParser.Token token = parser.currentToken(); if (token == XContentParser.Token.VALUE_STRING) { content = parser.binaryValue(); } else { String currentFieldName = null; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token == XContentParser.Token.VALUE_STRING) { if ("_content".equals(currentFieldName)) { content = parser.binaryValue(); } else if ("_content_type".equals(currentFieldName)) { contentType = parser.text(); } else if ("_name".equals(currentFieldName)) { name = parser.text(); } else if ("_language".equals(currentFieldName)) { language = parser.text(); } } else if (token == XContentParser.Token.VALUE_NUMBER) { if ("_indexed_chars".equals(currentFieldName) || "_indexedChars".equals(currentFieldName)) { indexedChars = parser.intValue(); } } else if (token == XContentParser.Token.VALUE_BOOLEAN) { if ("_detect_language".equals(currentFieldName) || "_detectLanguage".equals(currentFieldName)) { langDetect = parser.booleanValue(); } } } } // Throw clean exception when no content is provided Fix #23 if (content == null) { throw new MapperParsingException("No content is provided."); } Metadata metadata = new Metadata(); if (contentType != null) { metadata.add(Metadata.CONTENT_TYPE, contentType); } if (name != null) { metadata.add(Metadata.RESOURCE_NAME_KEY, name); } String parsedContent; try { parsedContent = TikaImpl.parse(content, metadata, indexedChars); } catch (Throwable e) { // #18: we could ignore errors when Tika does not parse data if (!ignoreErrors) { logger.trace("exception caught", e); throw new MapperParsingException( "Failed to extract [" + indexedChars + "] characters of text for [" + name + "] : " + e.getMessage(), e); } else { logger.debug( "Failed to extract [{}] characters of text for [{}]: [{}]", indexedChars, name, e.getMessage()); logger.trace("exception caught", e); } return null; } context = context.createExternalValueContext(parsedContent); contentMapper.parse(context); if (langDetect) { try { if (language != null) { metadata.add(Metadata.CONTENT_LANGUAGE, language); } else { LanguageIdentifier identifier = new LanguageIdentifier(parsedContent); language = identifier.getLanguage(); } context = context.createExternalValueContext(language); languageMapper.parse(context); } catch (Throwable t) { logger.debug("Cannot detect language: [{}]", t.getMessage()); } } if (name != null) { try { context = context.createExternalValueContext(name); nameMapper.parse(context); } catch (MapperParsingException e) { if (!ignoreErrors) throw e; if (logger.isDebugEnabled()) logger.debug( "Ignoring MapperParsingException catch while parsing name: [{}]", e.getMessage()); } } if (metadata.get(Metadata.DATE) != null) { try { context = context.createExternalValueContext(metadata.get(Metadata.DATE)); dateMapper.parse(context); } catch (MapperParsingException e) { if (!ignoreErrors) throw e; if (logger.isDebugEnabled()) logger.debug( "Ignoring MapperParsingException catch while parsing date: [{}]: [{}]", e.getMessage(), context.externalValue()); } } if (metadata.get(Metadata.TITLE) != null) { try { context = context.createExternalValueContext(metadata.get(Metadata.TITLE)); titleMapper.parse(context); } catch (MapperParsingException e) { if (!ignoreErrors) throw e; if (logger.isDebugEnabled()) logger.debug( "Ignoring MapperParsingException catch while parsing title: [{}]: [{}]", e.getMessage(), context.externalValue()); } } if (metadata.get(Metadata.AUTHOR) != null) { try { context = context.createExternalValueContext(metadata.get(Metadata.AUTHOR)); authorMapper.parse(context); } catch (MapperParsingException e) { if (!ignoreErrors) throw e; if (logger.isDebugEnabled()) logger.debug( "Ignoring MapperParsingException catch while parsing author: [{}]: [{}]", e.getMessage(), context.externalValue()); } } if (metadata.get(Metadata.KEYWORDS) != null) { try { context = context.createExternalValueContext(metadata.get(Metadata.KEYWORDS)); keywordsMapper.parse(context); } catch (MapperParsingException e) { if (!ignoreErrors) throw e; if (logger.isDebugEnabled()) logger.debug( "Ignoring MapperParsingException catch while parsing keywords: [{}]: [{}]", e.getMessage(), context.externalValue()); } } if (contentType == null) { contentType = metadata.get(Metadata.CONTENT_TYPE); } if (contentType != null) { try { context = context.createExternalValueContext(contentType); contentTypeMapper.parse(context); } catch (MapperParsingException e) { if (!ignoreErrors) throw e; if (logger.isDebugEnabled()) logger.debug( "Ignoring MapperParsingException catch while parsing content_type: [{}]: [{}]", e.getMessage(), context.externalValue()); } } int length = content.length; // If we have CONTENT_LENGTH from Tika we use it if (metadata.get(Metadata.CONTENT_LENGTH) != null) { length = Integer.parseInt(metadata.get(Metadata.CONTENT_LENGTH)); } try { context = context.createExternalValueContext(length); contentLengthMapper.parse(context); } catch (MapperParsingException e) { if (!ignoreErrors) throw e; if (logger.isDebugEnabled()) logger.debug( "Ignoring MapperParsingException catch while parsing content_length: [{}]: [{}]", e.getMessage(), context.externalValue()); } // multiFields.parse(this, context); return null; }