コード例 #1
0
ファイル: OntologyMapper.java プロジェクト: hardikgw/BioSolr
  private void addFieldData(
      ParseContext context, FieldMapper<String> mapper, Collection<String> data)
      throws IOException {
    if (data != null && !data.isEmpty()) {
      if (mappers.get(mapper.names().indexName()) == null) {
        // New mapper
        context.setWithinNewMapper();
        try {
          parseData(context, mapper, data);

          FieldMapperListener.Aggregator newFields = new FieldMapperListener.Aggregator();
          ObjectMapperListener.Aggregator newObjects = new ObjectMapperListener.Aggregator();
          mapper.traverse(newFields);
          mapper.traverse(newObjects);
          // callback on adding those fields!
          context.docMapper().addFieldMappers(newFields.mappers);
          context.docMapper().addObjectMappers(newObjects.mappers);

          context.setMappingsModified();

          synchronized (mutex) {
            UpdateInPlaceMap<String, FieldMapper<String>>.Mutator mappingMutator =
                this.mappers.mutator();
            mappingMutator.put(mapper.names().indexName(), mapper);
            mappingMutator.close();
          }
        } finally {
          context.clearWithinNewMapper();
        }
      } else {
        // Mapper already added
        parseData(context, mapper, data);
      }
    }
  }
コード例 #2
0
ファイル: OntologyMapper.java プロジェクト: hardikgw/BioSolr
 @Override
 public void close() {
   for (FieldMapper<String> mapper : mappers.values()) {
     mapper.close();
   }
   //		disposeHelper();
 }
コード例 #3
0
  private Fields generateTermVectorsFromDoc(TermVectorRequest request, boolean doAllFields)
      throws IOException {
    // parse the document, at the moment we do update the mapping, just like percolate
    ParsedDocument parsedDocument =
        parseDocument(indexShard.shardId().getIndex(), request.type(), request.doc());

    // select the right fields and generate term vectors
    ParseContext.Document doc = parsedDocument.rootDoc();
    Collection<String> seenFields = new HashSet<>();
    Collection<GetField> getFields = new HashSet<>();
    for (IndexableField field : doc.getFields()) {
      FieldMapper fieldMapper = indexShard.mapperService().smartNameFieldMapper(field.name());
      if (seenFields.contains(field.name())) {
        continue;
      } else {
        seenFields.add(field.name());
      }
      if (!isValidField(fieldMapper)) {
        continue;
      }
      if (request.selectedFields() == null
          && !doAllFields
          && !fieldMapper.fieldType().storeTermVectors()) {
        continue;
      }
      if (request.selectedFields() != null && !request.selectedFields().contains(field.name())) {
        continue;
      }
      String[] values = doc.getValues(field.name());
      getFields.add(new GetField(field.name(), Arrays.asList((Object[]) values)));
    }
    return generateTermVectors(getFields, request.offsets(), request.perFieldAnalyzer());
  }
コード例 #4
0
  @Override
  public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
    builder.startObject(simpleName());
    builder.field("type", CONTENT_TYPE);
    if (indexCreatedBefore2x) {
      builder.field("path", pathType.name().toLowerCase(Locale.ROOT));
    }

    builder.startObject("fields");
    contentMapper.toXContent(builder, params);
    authorMapper.toXContent(builder, params);
    titleMapper.toXContent(builder, params);
    nameMapper.toXContent(builder, params);
    dateMapper.toXContent(builder, params);
    keywordsMapper.toXContent(builder, params);
    contentTypeMapper.toXContent(builder, params);
    contentLengthMapper.toXContent(builder, params);
    languageMapper.toXContent(builder, params);
    multiFields.toXContent(builder, params);
    builder.endObject();

    multiFields.toXContent(builder, params);
    builder.endObject();
    return builder;
  }
コード例 #5
0
ファイル: OntologyMapper.java プロジェクト: hardikgw/BioSolr
 private void parseData(
     ParseContext context, FieldMapper<String> mapper, Collection<String> values)
     throws IOException {
   for (String value : values) {
     Field field =
         new Field(
             mapper.names().indexName(),
             value,
             isUriField(mapper.name()) ? Defaults.URI_FIELD_TYPE : Defaults.LABEL_FIELD_TYPE);
     context.doc().add(field);
   }
 }
コード例 #6
0
  private Fields addGeneratedTermVectors(
      Engine.GetResult get,
      Fields termVectorsByField,
      TermVectorRequest request,
      Set<String> selectedFields)
      throws IOException {
    /* only keep valid fields */
    Set<String> validFields = new HashSet<>();
    for (String field : selectedFields) {
      FieldMapper fieldMapper = indexShard.mapperService().smartNameFieldMapper(field);
      if (!isValidField(fieldMapper)) {
        continue;
      }
      // already retrieved, only if the analyzer hasn't been overridden at the field
      if (fieldMapper.fieldType().storeTermVectors()
          && (request.perFieldAnalyzer() == null
              || !request.perFieldAnalyzer().containsKey(field))) {
        continue;
      }
      validFields.add(field);
    }

    if (validFields.isEmpty()) {
      return termVectorsByField;
    }

    /* generate term vectors from fetched document fields */
    GetResult getResult =
        indexShard
            .getService()
            .get(
                get,
                request.id(),
                request.type(),
                validFields.toArray(Strings.EMPTY_ARRAY),
                null,
                false);
    Fields generatedTermVectors =
        generateTermVectors(
            getResult.getFields().values(), request.offsets(), request.perFieldAnalyzer());

    /* merge with existing Fields */
    if (termVectorsByField == null) {
      return generatedTermVectors;
    } else {
      return mergeFields(termVectorsByField, generatedTermVectors);
    }
  }
コード例 #7
0
 private boolean isValidField(FieldMapper field) {
   // must be a string
   if (!(field instanceof StringFieldMapper)) {
     return false;
   }
   // and must be indexed
   if (field.fieldType().indexOptions() == IndexOptions.NONE) {
     return false;
   }
   return true;
 }
コード例 #8
0
ファイル: OntologyMapper.java プロジェクト: hardikgw/BioSolr
 @Override
 public void traverse(FieldMapperListener fieldMapperListener) {
   for (FieldMapper<String> mapper : mappers.values()) {
     mapper.traverse(fieldMapperListener);
   }
 }
コード例 #9
0
  @Override
  public Mapper parse(ParseContext context) throws IOException {
    byte[] content = null;
    String contentType = null;
    int indexedChars = defaultIndexedChars;
    boolean langDetect = defaultLangDetect;
    String name = null;
    String language = null;

    XContentParser parser = context.parser();
    XContentParser.Token token = parser.currentToken();
    if (token == XContentParser.Token.VALUE_STRING) {
      content = parser.binaryValue();
    } else {
      String currentFieldName = null;
      while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
        if (token == XContentParser.Token.FIELD_NAME) {
          currentFieldName = parser.currentName();
        } else if (token == XContentParser.Token.VALUE_STRING) {
          if ("_content".equals(currentFieldName)) {
            content = parser.binaryValue();
          } else if ("_content_type".equals(currentFieldName)) {
            contentType = parser.text();
          } else if ("_name".equals(currentFieldName)) {
            name = parser.text();
          } else if ("_language".equals(currentFieldName)) {
            language = parser.text();
          }
        } else if (token == XContentParser.Token.VALUE_NUMBER) {
          if ("_indexed_chars".equals(currentFieldName)
              || "_indexedChars".equals(currentFieldName)) {
            indexedChars = parser.intValue();
          }
        } else if (token == XContentParser.Token.VALUE_BOOLEAN) {
          if ("_detect_language".equals(currentFieldName)
              || "_detectLanguage".equals(currentFieldName)) {
            langDetect = parser.booleanValue();
          }
        }
      }
    }

    // Throw clean exception when no content is provided Fix #23
    if (content == null) {
      throw new MapperParsingException("No content is provided.");
    }

    Metadata metadata = new Metadata();
    if (contentType != null) {
      metadata.add(Metadata.CONTENT_TYPE, contentType);
    }
    if (name != null) {
      metadata.add(Metadata.RESOURCE_NAME_KEY, name);
    }

    String parsedContent;
    try {
      parsedContent = TikaImpl.parse(content, metadata, indexedChars);
    } catch (Throwable e) {
      // #18: we could ignore errors when Tika does not parse data
      if (!ignoreErrors) {
        logger.trace("exception caught", e);
        throw new MapperParsingException(
            "Failed to extract ["
                + indexedChars
                + "] characters of text for ["
                + name
                + "] : "
                + e.getMessage(),
            e);
      } else {
        logger.debug(
            "Failed to extract [{}] characters of text for [{}]: [{}]",
            indexedChars,
            name,
            e.getMessage());
        logger.trace("exception caught", e);
      }
      return null;
    }

    context = context.createExternalValueContext(parsedContent);
    contentMapper.parse(context);

    if (langDetect) {
      try {
        if (language != null) {
          metadata.add(Metadata.CONTENT_LANGUAGE, language);
        } else {
          LanguageIdentifier identifier = new LanguageIdentifier(parsedContent);
          language = identifier.getLanguage();
        }
        context = context.createExternalValueContext(language);
        languageMapper.parse(context);
      } catch (Throwable t) {
        logger.debug("Cannot detect language: [{}]", t.getMessage());
      }
    }

    if (name != null) {
      try {
        context = context.createExternalValueContext(name);
        nameMapper.parse(context);
      } catch (MapperParsingException e) {
        if (!ignoreErrors) throw e;
        if (logger.isDebugEnabled())
          logger.debug(
              "Ignoring MapperParsingException catch while parsing name: [{}]", e.getMessage());
      }
    }

    if (metadata.get(Metadata.DATE) != null) {
      try {
        context = context.createExternalValueContext(metadata.get(Metadata.DATE));
        dateMapper.parse(context);
      } catch (MapperParsingException e) {
        if (!ignoreErrors) throw e;
        if (logger.isDebugEnabled())
          logger.debug(
              "Ignoring MapperParsingException catch while parsing date: [{}]: [{}]",
              e.getMessage(),
              context.externalValue());
      }
    }

    if (metadata.get(Metadata.TITLE) != null) {
      try {
        context = context.createExternalValueContext(metadata.get(Metadata.TITLE));
        titleMapper.parse(context);
      } catch (MapperParsingException e) {
        if (!ignoreErrors) throw e;
        if (logger.isDebugEnabled())
          logger.debug(
              "Ignoring MapperParsingException catch while parsing title: [{}]: [{}]",
              e.getMessage(),
              context.externalValue());
      }
    }

    if (metadata.get(Metadata.AUTHOR) != null) {
      try {
        context = context.createExternalValueContext(metadata.get(Metadata.AUTHOR));
        authorMapper.parse(context);
      } catch (MapperParsingException e) {
        if (!ignoreErrors) throw e;
        if (logger.isDebugEnabled())
          logger.debug(
              "Ignoring MapperParsingException catch while parsing author: [{}]: [{}]",
              e.getMessage(),
              context.externalValue());
      }
    }

    if (metadata.get(Metadata.KEYWORDS) != null) {
      try {
        context = context.createExternalValueContext(metadata.get(Metadata.KEYWORDS));
        keywordsMapper.parse(context);
      } catch (MapperParsingException e) {
        if (!ignoreErrors) throw e;
        if (logger.isDebugEnabled())
          logger.debug(
              "Ignoring MapperParsingException catch while parsing keywords: [{}]: [{}]",
              e.getMessage(),
              context.externalValue());
      }
    }

    if (contentType == null) {
      contentType = metadata.get(Metadata.CONTENT_TYPE);
    }
    if (contentType != null) {
      try {
        context = context.createExternalValueContext(contentType);
        contentTypeMapper.parse(context);
      } catch (MapperParsingException e) {
        if (!ignoreErrors) throw e;
        if (logger.isDebugEnabled())
          logger.debug(
              "Ignoring MapperParsingException catch while parsing content_type: [{}]: [{}]",
              e.getMessage(),
              context.externalValue());
      }
    }

    int length = content.length;
    // If we have CONTENT_LENGTH from Tika we use it
    if (metadata.get(Metadata.CONTENT_LENGTH) != null) {
      length = Integer.parseInt(metadata.get(Metadata.CONTENT_LENGTH));
    }

    try {
      context = context.createExternalValueContext(length);
      contentLengthMapper.parse(context);
    } catch (MapperParsingException e) {
      if (!ignoreErrors) throw e;
      if (logger.isDebugEnabled())
        logger.debug(
            "Ignoring MapperParsingException catch while parsing content_length: [{}]: [{}]",
            e.getMessage(),
            context.externalValue());
    }

    //        multiFields.parse(this, context);

    return null;
  }