コード例 #1
0
 void processQuery(Query query, ParseContext context) {
   ParseContext.Document doc = context.doc();
   FieldType pft = (FieldType) this.fieldType();
   QueryAnalyzer.Result result;
   try {
     result = QueryAnalyzer.analyze(query);
   } catch (QueryAnalyzer.UnsupportedQueryException e) {
     doc.add(
         new Field(
             pft.extractionResultField.name(),
             EXTRACTION_FAILED,
             extractionResultField.fieldType()));
     return;
   }
   for (Term term : result.terms) {
     BytesRefBuilder builder = new BytesRefBuilder();
     builder.append(new BytesRef(term.field()));
     builder.append(FIELD_VALUE_SEPARATOR);
     builder.append(term.bytes());
     doc.add(new Field(queryTermsField.name(), builder.toBytesRef(), queryTermsField.fieldType()));
   }
   if (result.verified) {
     doc.add(
         new Field(
             extractionResultField.name(),
             EXTRACTION_COMPLETE,
             extractionResultField.fieldType()));
   } else {
     doc.add(
         new Field(
             extractionResultField.name(), EXTRACTION_PARTIAL, extractionResultField.fieldType()));
   }
 }
コード例 #2
0
  private Fields generateTermVectorsFromDoc(TermVectorRequest request, boolean doAllFields)
      throws IOException {
    // parse the document, at the moment we do update the mapping, just like percolate
    ParsedDocument parsedDocument =
        parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc());

    // select the right fields and generate term vectors
    ParseContext.Document doc = parsedDocument.rootDoc();
    Collection<String> seenFields = new HashSet<>();
    Collection<GetField> getFields = new HashSet<>();
    for (IndexableField field : doc.getFields()) {
      FieldMapper fieldMapper = indexShard.mapperService().smartNameFieldMapper(field.name());
      if (seenFields.contains(field.name())) {
        continue;
      } else {
        seenFields.add(field.name());
      }
      if (!isValidField(fieldMapper)) {
        continue;
      }
      if (request.selectedFields() == null
          && !doAllFields
          && !fieldMapper.fieldType().storeTermVectors()) {
        continue;
      }
      if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) {
        continue;
      }
      String[] values = doc.getValues(field.name());
      getFields.add(new GetField(field.name(), Arrays.asList((Object[]) values)));
    }
    return generateTermVectors(getFields, request.offsets(), request.perFieldAnalyzer());
  }
コード例 #3
0
  protected void testMapper(String filename, boolean errorExpected) throws IOException {
    byte[] html =
        copyToBytesFromClasspath(
            "/org/elasticsearch/index/mapper/attachment/test/sample-files/" + filename);

    BytesReference json =
        jsonBuilder()
            .startObject()
            .startObject("file")
            .field("_name", filename)
            .field("_content", html)
            .endObject()
            .endObject()
            .bytes();

    ParseContext.Document doc = docMapper.parse("person", "person", "1", json).rootDoc();
    if (!errorExpected) {
      assertThat(
          doc.get(docMapper.mappers().getMapper("file.content").fieldType().name()),
          not(isEmptyOrNullString()));
      logger.debug(
          "-> extracted content: {}",
          doc.get(docMapper.mappers().getMapper("file").fieldType().name()));
      logger.debug("-> extracted metadata:");
      printMetadataContent(doc, AUTHOR);
      printMetadataContent(doc, CONTENT_LENGTH);
      printMetadataContent(doc, CONTENT_TYPE);
      printMetadataContent(doc, DATE);
      printMetadataContent(doc, KEYWORDS);
      printMetadataContent(doc, LANGUAGE);
      printMetadataContent(doc, NAME);
      printMetadataContent(doc, TITLE);
    }
  }
コード例 #4
0
 @Override
 protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
   if (fieldType().isEnabled() == false) {
     return;
   }
   for (ParseContext.Document document : context.docs()) {
     final List<String> paths = new ArrayList<>();
     for (IndexableField field : document.getFields()) {
       paths.add(field.name());
     }
     for (String path : paths) {
       for (String fieldName : extractFieldNames(path)) {
         if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
           document.add(new Field(fieldType().name(), fieldName, fieldType()));
         }
       }
     }
   }
 }
  @Test
  public void testMultipleDocsEncryptedLast() throws IOException {
    DocumentMapperParser mapperParser =
        new DocumentMapperParser(
            new Index("test"),
            ImmutableSettings.EMPTY,
            new AnalysisService(new Index("test")),
            null,
            null,
            null);
    mapperParser.putTypeParser(AttachmentMapper.CONTENT_TYPE, new AttachmentMapper.TypeParser());

    String mapping =
        copyToStringFromClasspath("/org/elasticsearch/index/mapper/multipledocs/test-mapping.json");
    DocumentMapper docMapper = mapperParser.parse(mapping);
    byte[] html =
        copyToBytesFromClasspath(
            "/org/elasticsearch/index/mapper/xcontent/htmlWithValidDateMeta.html");
    byte[] pdf = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/xcontent/encrypted.pdf");

    BytesReference json =
        jsonBuilder()
            .startObject()
            .field("_id", 1)
            .field("file1", html)
            .field("file2", pdf)
            .endObject()
            .bytes();

    ParseContext.Document doc = docMapper.parse(json).rootDoc();
    assertThat(
        doc.get(docMapper.mappers().smartName("file1").mapper().names().indexName()),
        containsString("World"));
    assertThat(
        doc.get(docMapper.mappers().smartName("file1.title").mapper().names().indexName()),
        equalTo("Hello"));
    assertThat(
        doc.get(docMapper.mappers().smartName("file1.author").mapper().names().indexName()),
        equalTo("kimchy"));
    assertThat(
        doc.get(docMapper.mappers().smartName("file1.keywords").mapper().names().indexName()),
        equalTo("elasticsearch,cool,bonsai"));
    assertThat(
        doc.get(docMapper.mappers().smartName("file1.content_type").mapper().names().indexName()),
        equalTo("text/html; charset=ISO-8859-1"));
    assertThat(
        doc.getField(
                docMapper.mappers().smartName("file1.content_length").mapper().names().indexName())
            .numericValue()
            .longValue(),
        is(344L));

    assertThat(
        doc.get(docMapper.mappers().smartName("file2").mapper().names().indexName()), nullValue());
    assertThat(
        doc.get(docMapper.mappers().smartName("file2.title").mapper().names().indexName()),
        nullValue());
    assertThat(
        doc.get(docMapper.mappers().smartName("file2.author").mapper().names().indexName()),
        nullValue());
    assertThat(
        doc.get(docMapper.mappers().smartName("file2.keywords").mapper().names().indexName()),
        nullValue());
    assertThat(
        doc.get(docMapper.mappers().smartName("file2.content_type").mapper().names().indexName()),
        nullValue());
    assertThat(
        doc.getField(
            docMapper.mappers().smartName("file2.content_length").mapper().names().indexName()),
        nullValue());
  }
コード例 #6
0
 private void printMetadataContent(ParseContext.Document doc, String field) {
   logger.debug(
       "- [{}]: [{}]",
       field,
       doc.get(docMapper.mappers().getMapper("file." + field).fieldType().name()));
 }