void processQuery(Query query, ParseContext context) { ParseContext.Document doc = context.doc(); FieldType pft = (FieldType) this.fieldType(); QueryAnalyzer.Result result; try { result = QueryAnalyzer.analyze(query); } catch (QueryAnalyzer.UnsupportedQueryException e) { doc.add( new Field( pft.extractionResultField.name(), EXTRACTION_FAILED, extractionResultField.fieldType())); return; } for (Term term : result.terms) { BytesRefBuilder builder = new BytesRefBuilder(); builder.append(new BytesRef(term.field())); builder.append(FIELD_VALUE_SEPARATOR); builder.append(term.bytes()); doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType())); } if (result.verified) { doc.add( new Field( extractionResultField.name(), EXTRACTION_COMPLETE, extractionResultField.fieldType())); } else { doc.add( new Field( extractionResultField.name(), EXTRACTION_PARTIAL, extractionResultField.fieldType())); } }
private Fields generateTermVectorsFromDoc(TermVectorRequest request, boolean doAllFields) throws IOException { // parse the document, at the moment we do update the mapping, just like percolate ParsedDocument parsedDocument = parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc()); // select the right fields and generate term vectors ParseContext.Document doc = parsedDocument.rootDoc(); Collection<String> seenFields = new HashSet<>(); Collection<GetField> getFields = new HashSet<>(); for (IndexableField field : doc.getFields()) { FieldMapper fieldMapper = indexShard.mapperService().smartNameFieldMapper(field.name()); if (seenFields.contains(field.name())) { continue; } else { seenFields.add(field.name()); } if (!isValidField(fieldMapper)) { continue; } if (request.selectedFields() == null && !doAllFields && !fieldMapper.fieldType().storeTermVectors()) { continue; } if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) { continue; } String[] values = doc.getValues(field.name()); getFields.add(new GetField(field.name(), Arrays.asList((Object[]) values))); } return generateTermVectors(getFields, request.offsets(), request.perFieldAnalyzer()); }
protected void testMapper(String filename, boolean errorExpected) throws IOException { byte[] html = copyToBytesFromClasspath( "/org/elasticsearch/index/mapper/attachment/test/sample-files/" + filename); BytesReference json = jsonBuilder() .startObject() .startObject("file") .field("_name", filename) .field("_content", html) .endObject() .endObject() .bytes(); ParseContext.Document doc = docMapper.parse("person", "person", "1", json).rootDoc(); if (!errorExpected) { assertThat( doc.get(docMapper.mappers().getMapper("file.content").fieldType().name()), not(isEmptyOrNullString())); logger.debug( "-> extracted content: {}", doc.get(docMapper.mappers().getMapper("file").fieldType().name())); logger.debug("-> extracted metadata:"); printMetadataContent(doc, AUTHOR); printMetadataContent(doc, CONTENT_LENGTH); printMetadataContent(doc, CONTENT_TYPE); printMetadataContent(doc, DATE); printMetadataContent(doc, KEYWORDS); printMetadataContent(doc, LANGUAGE); printMetadataContent(doc, NAME); printMetadataContent(doc, TITLE); } }
@Override protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException { if (fieldType().isEnabled() == false) { return; } for (ParseContext.Document document : context.docs()) { final List<String> paths = new ArrayList<>(); for (IndexableField field : document.getFields()) { paths.add(field.name()); } for (String path : paths) { for (String fieldName : extractFieldNames(path)) { if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { document.add(new Field(fieldType().name(), fieldName, fieldType())); } } } } }
@Test public void testMultipleDocsEncryptedLast() throws IOException { DocumentMapperParser mapperParser = new DocumentMapperParser( new Index("test"), ImmutableSettings.EMPTY, new AnalysisService(new Index("test")), null, null, null); mapperParser.putTypeParser(AttachmentMapper.CONTENT_TYPE, new AttachmentMapper.TypeParser()); String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/multipledocs/test-mapping.json"); DocumentMapper docMapper = mapperParser.parse(mapping); byte[] html = copyToBytesFromClasspath( "/org/elasticsearch/index/mapper/xcontent/htmlWithValidDateMeta.html"); byte[] pdf = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/xcontent/encrypted.pdf"); BytesReference json = jsonBuilder() .startObject() .field("_id", 1) .field("file1", html) .field("file2", pdf) .endObject() .bytes(); ParseContext.Document doc = docMapper.parse(json).rootDoc(); assertThat( doc.get(docMapper.mappers().smartName("file1").mapper().names().indexName()), containsString("World")); assertThat( doc.get(docMapper.mappers().smartName("file1.title").mapper().names().indexName()), equalTo("Hello")); assertThat( doc.get(docMapper.mappers().smartName("file1.author").mapper().names().indexName()), equalTo("kimchy")); assertThat( doc.get(docMapper.mappers().smartName("file1.keywords").mapper().names().indexName()), equalTo("elasticsearch,cool,bonsai")); assertThat( doc.get(docMapper.mappers().smartName("file1.content_type").mapper().names().indexName()), equalTo("text/html; charset=ISO-8859-1")); assertThat( doc.getField( docMapper.mappers().smartName("file1.content_length").mapper().names().indexName()) .numericValue() .longValue(), is(344L)); assertThat( doc.get(docMapper.mappers().smartName("file2").mapper().names().indexName()), nullValue()); assertThat( doc.get(docMapper.mappers().smartName("file2.title").mapper().names().indexName()), nullValue()); assertThat( doc.get(docMapper.mappers().smartName("file2.author").mapper().names().indexName()), nullValue()); assertThat( doc.get(docMapper.mappers().smartName("file2.keywords").mapper().names().indexName()), nullValue()); assertThat( doc.get(docMapper.mappers().smartName("file2.content_type").mapper().names().indexName()), nullValue()); assertThat( doc.getField( docMapper.mappers().smartName("file2.content_length").mapper().names().indexName()), nullValue()); }
private void printMetadataContent(ParseContext.Document doc, String field) { logger.debug( "- [{}]: [{}]", field, doc.get(docMapper.mappers().getMapper("file." + field).fieldType().name())); }