Пример #1
0
  private void serializeObject(final ParseContext context, String currentFieldName)
      throws IOException {
    if (currentFieldName == null) {
      throw new MapperParsingException(
          "object mapping ["
              + name
              + "] trying to serialize an object with no field associated with it, current value ["
              + context.parser().textOrNull()
              + "]");
    }
    context.path().add(currentFieldName);

    Mapper objectMapper = mappers.get(currentFieldName);
    if (objectMapper != null) {
      objectMapper.parse(context);
    } else {
      Dynamic dynamic = this.dynamic;
      if (dynamic == null) {
        dynamic = context.root().dynamic();
      }
      if (dynamic == Dynamic.STRICT) {
        throw new StrictDynamicMappingException(fullPath, currentFieldName);
      } else if (dynamic == Dynamic.TRUE) {
        // we sync here just so we won't add it twice. Its not the end of the world
        // to sync here since next operations will get it before
        synchronized (mutex) {
          objectMapper = mappers.get(currentFieldName);
          if (objectMapper == null) {
            // remove the current field name from path, since template search and the object builder
            // add it as well...
            context.path().remove();
            Mapper.Builder builder =
                context.root().findTemplateBuilder(context, currentFieldName, "object");
            if (builder == null) {
              builder = MapperBuilders.object(currentFieldName).enabled(true).pathType(pathType);
              // if this is a non root object, then explicitly set the dynamic behavior if set
              if (!(this instanceof RootObjectMapper) && this.dynamic != Defaults.DYNAMIC) {
                ((Builder) builder).dynamic(this.dynamic);
              }
            }
            BuilderContext builderContext =
                new BuilderContext(context.indexSettings(), context.path());
            objectMapper = builder.build(builderContext);
            putDynamicMapper(context, currentFieldName, objectMapper);
          } else {
            objectMapper.parse(context);
          }
        }
      } else {
        // not dynamic, read everything up to end object
        context.parser().skipChildren();
      }
    }

    context.path().remove();
  }
Пример #2
0
 private void serializeArray(ParseContext context, String lastFieldName) throws IOException {
   String arrayFieldName = lastFieldName;
   Mapper mapper = mappers.get(lastFieldName);
   if (mapper != null && mapper instanceof ArrayValueMapperParser) {
     mapper.parse(context);
   } else {
     XContentParser parser = context.parser();
     XContentParser.Token token;
     while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
       if (token == XContentParser.Token.START_OBJECT) {
         serializeObject(context, lastFieldName);
       } else if (token == XContentParser.Token.START_ARRAY) {
         serializeArray(context, lastFieldName);
       } else if (token == XContentParser.Token.FIELD_NAME) {
         lastFieldName = parser.currentName();
       } else if (token == XContentParser.Token.VALUE_NULL) {
         serializeNullValue(context, lastFieldName);
       } else if (token == null) {
         throw new MapperParsingException(
             "object mapping for ["
                 + name
                 + "] with array for ["
                 + arrayFieldName
                 + "] tried to parse as array, but got EOF, is there a mismatch in types for the same field?");
       } else {
         serializeValue(context, lastFieldName, token);
       }
     }
   }
 }
Пример #3
0
 private void serializeValue(
     final ParseContext context, String currentFieldName, XContentParser.Token token)
     throws IOException {
   if (currentFieldName == null) {
     throw new MapperParsingException(
         "object mapping ["
             + name
             + "] trying to serialize a value with no field associated with it, current value ["
             + context.parser().textOrNull()
             + "]");
   }
   Mapper mapper = mappers.get(currentFieldName);
   if (mapper != null) {
     mapper.parse(context);
   } else {
     parseDynamicValue(context, currentFieldName, token);
   }
 }
Пример #4
0
  private void serializeValue(
      final ParseContext context, String currentFieldName, XContentParser.Token token)
      throws IOException {
    if (currentFieldName == null) {
      throw new MapperParsingException(
          "object mapping ["
              + name
              + "] trying to serialize a value with no field associated with it, current value ["
              + context.parser().textOrNull()
              + "]");
    }
    Mapper mapper = mappers.get(currentFieldName);
    if (mapper != null) {
      mapper.parse(context);
      return;
    }
    Dynamic dynamic = this.dynamic;
    if (dynamic == null) {
      dynamic = context.root().dynamic();
    }
    if (dynamic == Dynamic.STRICT) {
      throw new StrictDynamicMappingException(fullPath, currentFieldName);
    }
    if (dynamic == Dynamic.FALSE) {
      return;
    }
    // we sync here since we don't want to add this field twice to the document mapper
    // its not the end of the world, since we add it to the mappers once we create it
    // so next time we won't even get here for this field
    boolean newMapper = false;
    synchronized (mutex) {
      mapper = mappers.get(currentFieldName);
      if (mapper == null) {
        newMapper = true;
        BuilderContext builderContext = new BuilderContext(context.indexSettings(), context.path());
        if (token == XContentParser.Token.VALUE_STRING) {
          boolean resolved = false;

          // do a quick test to see if its fits a dynamic template, if so, use it.
          // we need to do it here so we can handle things like attachment templates, where calling
          // text (to see if its a date) causes the binary value to be cleared
          if (!resolved) {
            Mapper.Builder builder =
                context.root().findTemplateBuilder(context, currentFieldName, "string", null);
            if (builder != null) {
              mapper = builder.build(builderContext);
              resolved = true;
            }
          }

          if (!resolved && context.parser().textLength() == 0) {
            // empty string with no mapping, treat it like null value
            return;
          }

          if (!resolved && context.root().dateDetection()) {
            String text = context.parser().text();
            // a safe check since "1" gets parsed as well
            if (Strings.countOccurrencesOf(text, ":") > 1
                || Strings.countOccurrencesOf(text, "-") > 1
                || Strings.countOccurrencesOf(text, "/") > 1) {
              for (FormatDateTimeFormatter dateTimeFormatter :
                  context.root().dynamicDateTimeFormatters()) {
                try {
                  dateTimeFormatter.parser().parseMillis(text);
                  Mapper.Builder builder =
                      context.root().findTemplateBuilder(context, currentFieldName, "date");
                  if (builder == null) {
                    builder = dateField(currentFieldName).dateTimeFormatter(dateTimeFormatter);
                  }
                  mapper = builder.build(builderContext);
                  resolved = true;
                  break;
                } catch (Exception e) {
                  // failure to parse this, continue
                }
              }
            }
          }
          if (!resolved && context.root().numericDetection()) {
            String text = context.parser().text();
            try {
              Long.parseLong(text);
              Mapper.Builder builder =
                  context.root().findTemplateBuilder(context, currentFieldName, "long");
              if (builder == null) {
                builder = longField(currentFieldName);
              }
              mapper = builder.build(builderContext);
              resolved = true;
            } catch (Exception e) {
              // not a long number
            }
            if (!resolved) {
              try {
                Double.parseDouble(text);
                Mapper.Builder builder =
                    context.root().findTemplateBuilder(context, currentFieldName, "double");
                if (builder == null) {
                  builder = doubleField(currentFieldName);
                }
                mapper = builder.build(builderContext);
                resolved = true;
              } catch (Exception e) {
                // not a long number
              }
            }
          }
          // DON'T do automatic ip detection logic, since it messes up with docs that have hosts and
          // ips
          // check if its an ip
          //                if (!resolved && text.indexOf('.') != -1) {
          //                    try {
          //                        IpFieldMapper.ipToLong(text);
          //                        XContentMapper.Builder builder =
          // context.root().findTemplateBuilder(context, currentFieldName, "ip");
          //                        if (builder == null) {
          //                            builder = ipField(currentFieldName);
          //                        }
          //                        mapper = builder.build(builderContext);
          //                        resolved = true;
          //                    } catch (Exception e) {
          //                        // failure to parse, not ip...
          //                    }
          //                }
          if (!resolved) {
            Mapper.Builder builder =
                context.root().findTemplateBuilder(context, currentFieldName, "string");
            if (builder == null) {
              builder = stringField(currentFieldName);
            }
            mapper = builder.build(builderContext);
          }
        } else if (token == XContentParser.Token.VALUE_NUMBER) {
          XContentParser.NumberType numberType = context.parser().numberType();
          if (numberType == XContentParser.NumberType.INT) {
            if (context.parser().estimatedNumberType()) {
              Mapper.Builder builder =
                  context.root().findTemplateBuilder(context, currentFieldName, "long");
              if (builder == null) {
                builder = longField(currentFieldName);
              }
              mapper = builder.build(builderContext);
            } else {
              Mapper.Builder builder =
                  context.root().findTemplateBuilder(context, currentFieldName, "integer");
              if (builder == null) {
                builder = integerField(currentFieldName);
              }
              mapper = builder.build(builderContext);
            }
          } else if (numberType == XContentParser.NumberType.LONG) {
            Mapper.Builder builder =
                context.root().findTemplateBuilder(context, currentFieldName, "long");
            if (builder == null) {
              builder = longField(currentFieldName);
            }
            mapper = builder.build(builderContext);
          } else if (numberType == XContentParser.NumberType.FLOAT) {
            if (context.parser().estimatedNumberType()) {
              Mapper.Builder builder =
                  context.root().findTemplateBuilder(context, currentFieldName, "double");
              if (builder == null) {
                builder = doubleField(currentFieldName);
              }
              mapper = builder.build(builderContext);
            } else {
              Mapper.Builder builder =
                  context.root().findTemplateBuilder(context, currentFieldName, "float");
              if (builder == null) {
                builder = floatField(currentFieldName);
              }
              mapper = builder.build(builderContext);
            }
          } else if (numberType == XContentParser.NumberType.DOUBLE) {
            Mapper.Builder builder =
                context.root().findTemplateBuilder(context, currentFieldName, "double");
            if (builder == null) {
              builder = doubleField(currentFieldName);
            }
            mapper = builder.build(builderContext);
          }
        } else if (token == XContentParser.Token.VALUE_BOOLEAN) {
          Mapper.Builder builder =
              context.root().findTemplateBuilder(context, currentFieldName, "boolean");
          if (builder == null) {
            builder = booleanField(currentFieldName);
          }
          mapper = builder.build(builderContext);
        } else if (token == XContentParser.Token.VALUE_EMBEDDED_OBJECT) {
          Mapper.Builder builder =
              context.root().findTemplateBuilder(context, currentFieldName, "binary");
          if (builder == null) {
            builder = binaryField(currentFieldName);
          }
          mapper = builder.build(builderContext);
        } else {
          Mapper.Builder builder =
              context.root().findTemplateBuilder(context, currentFieldName, null);
          if (builder != null) {
            mapper = builder.build(builderContext);
          } else {
            // TODO how do we identify dynamically that its a binary value?
            throw new ElasticSearchIllegalStateException(
                "Can't handle serializing a dynamic type with content token ["
                    + token
                    + "] and field name ["
                    + currentFieldName
                    + "]");
          }
        }
        putMapper(mapper);
        context.setMappingsModified();
      }
    }
    if (newMapper) {
      mapper.traverse(context.newFieldMappers());
    }
    mapper.parse(context);
  }
Пример #5
0
  public void parse(ParseContext context) throws IOException {
    if (!enabled) {
      context.parser().skipChildren();
      return;
    }
    XContentParser parser = context.parser();

    String currentFieldName = parser.currentName();
    XContentParser.Token token = parser.currentToken();
    if (token == XContentParser.Token.VALUE_NULL) {
      // the object is null ("obj1" : null), simply bail
      return;
    }

    if (token.isValue() && !allowValue()) {
      // if we are parsing an object but it is just a value, its only allowed on root level parsers
      // with there
      // is a field name with the same name as the type
      throw new MapperParsingException(
          "object mapping for [" + name + "] tried to parse as object, but found a concrete value");
    }

    Document restoreDoc = null;
    if (nested.isNested()) {
      Document nestedDoc = new Document();
      // pre add the uid field if possible (id was already provided)
      IndexableField uidField = context.doc().getField(UidFieldMapper.NAME);
      if (uidField != null) {
        // we don't need to add it as a full uid field in nested docs, since we don't need
        // versioning
        // we also rely on this for UidField#loadVersion

        // this is a deeply nested field
        if (uidField.stringValue() != null) {
          nestedDoc.add(
              new Field(
                  UidFieldMapper.NAME,
                  uidField.stringValue(),
                  UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
        } else {
          nestedDoc.add(
              new Field(
                  UidFieldMapper.NAME,
                  ((UidField) uidField).uid(),
                  UidFieldMapper.Defaults.NESTED_FIELD_TYPE));
        }
      }
      // the type of the nested doc starts with __, so we can identify that its a nested one in
      // filters
      // note, we don't prefix it with the type of the doc since it allows us to execute a nested
      // query
      // across types (for example, with similar nested objects)
      nestedDoc.add(
          new Field(
              TypeFieldMapper.NAME, nestedTypePathAsString, TypeFieldMapper.Defaults.FIELD_TYPE));
      restoreDoc = context.switchDoc(nestedDoc);
      context.addDoc(nestedDoc);
    }

    ContentPath.Type origPathType = context.path().pathType();
    context.path().pathType(pathType);

    // if we are at the end of the previous object, advance
    if (token == XContentParser.Token.END_OBJECT) {
      token = parser.nextToken();
    }
    if (token == XContentParser.Token.START_OBJECT) {
      // if we are just starting an OBJECT, advance, this is the object we are parsing, we need the
      // name first
      token = parser.nextToken();
    }

    while (token != XContentParser.Token.END_OBJECT) {
      if (token == XContentParser.Token.START_OBJECT) {
        serializeObject(context, currentFieldName);
      } else if (token == XContentParser.Token.START_ARRAY) {
        serializeArray(context, currentFieldName);
      } else if (token == XContentParser.Token.FIELD_NAME) {
        currentFieldName = parser.currentName();
      } else if (token == XContentParser.Token.VALUE_NULL) {
        serializeNullValue(context, currentFieldName);
      } else if (token == null) {
        throw new MapperParsingException(
            "object mapping for ["
                + name
                + "] tried to parse as object, but got EOF, has a concrete value been provided to it?");
      } else if (token.isValue()) {
        serializeValue(context, currentFieldName, token);
      }
      token = parser.nextToken();
    }
    // restore the enable path flag
    context.path().pathType(origPathType);
    if (nested.isNested()) {
      Document nestedDoc = context.switchDoc(restoreDoc);
      if (nested.isIncludeInParent()) {
        for (IndexableField field : nestedDoc.getFields()) {
          if (field.name().equals(UidFieldMapper.NAME)
              || field.name().equals(TypeFieldMapper.NAME)) {
            continue;
          } else {
            context.doc().add(field);
          }
        }
      }
      if (nested.isIncludeInRoot()) {
        // don't add it twice, if its included in parent, and we are handling the master doc...
        if (!(nested.isIncludeInParent() && context.doc() == context.rootDoc())) {
          for (IndexableField field : nestedDoc.getFields()) {
            if (field.name().equals(UidFieldMapper.NAME)
                || field.name().equals(TypeFieldMapper.NAME)) {
              continue;
            } else {
              context.rootDoc().add(field);
            }
          }
        }
      }
    }
  }
Пример #6
0
  @Override
  public void parse(ParseContext context) throws IOException {
    String iri;
    XContentParser parser = context.parser();
    XContentParser.Token token = parser.currentToken();

    if (token == XContentParser.Token.VALUE_STRING) {
      iri = parser.text();
    } else {
      throw new MapperParsingException(name() + " does not contain String value");
    }

    ContentPath.Type origPathType = context.path().pathType();
    context.path().pathType(ContentPath.Type.FULL);
    context.path().add(names.name());

    try {
      OntologyHelper helper = getHelper(ontologySettings, threadPool);

      OntologyData data = findOntologyData(helper, iri);
      if (data == null) {
        logger.debug("Cannot find OWL class for IRI {}", iri);
      } else {
        addFieldData(
            context,
            getPredefinedMapper(FieldMappings.URI, context),
            Collections.singletonList(iri));

        // Look up the label(s)
        addFieldData(context, getPredefinedMapper(FieldMappings.LABEL, context), data.getLabels());

        // Look up the synonyms
        addFieldData(
            context, getPredefinedMapper(FieldMappings.SYNONYMS, context), data.getLabels());

        // Add the child details
        addRelatedNodesWithLabels(
            context,
            data.getChildIris(),
            getPredefinedMapper(FieldMappings.CHILD_URI, context),
            data.getChildLabels(),
            getPredefinedMapper(FieldMappings.CHILD_LABEL, context));

        // Add the parent details
        addRelatedNodesWithLabels(
            context,
            data.getParentIris(),
            getPredefinedMapper(FieldMappings.PARENT_URI, context),
            data.getParentLabels(),
            getPredefinedMapper(FieldMappings.PARENT_LABEL, context));

        if (ontologySettings.isIncludeIndirect()) {
          // Add the descendant details
          addRelatedNodesWithLabels(
              context,
              data.getDescendantIris(),
              getPredefinedMapper(FieldMappings.DESCENDANT_URI, context),
              data.getDescendantLabels(),
              getPredefinedMapper(FieldMappings.DESCENDANT_LABEL, context));

          // Add the ancestor details
          addRelatedNodesWithLabels(
              context,
              data.getAncestorIris(),
              getPredefinedMapper(FieldMappings.ANCESTOR_URI, context),
              data.getAncestorLabels(),
              getPredefinedMapper(FieldMappings.ANCESTOR_LABEL, context));
        }

        if (ontologySettings.isIncludeRelations()) {
          // Add the related nodes
          Map<String, Collection<String>> relations = data.getRelationIris();

          for (String relation : relations.keySet()) {
            // Sanitise the relation name
            String sanRelation = relation.replaceAll("\\W+", "_");
            String uriMapperName = sanRelation + DYNAMIC_URI_FIELD_SUFFIX;
            String labelMapperName = sanRelation + DYNAMIC_LABEL_FIELD_SUFFIX;

            // Get the mapper for the relation
            FieldMapper<String> uriMapper =
                mappers.get(context.path().fullPathAsText(uriMapperName));
            FieldMapper<String> labelMapper =
                mappers.get(context.path().fullPathAsText(labelMapperName));

            if (uriMapper == null) {
              // No mappers created yet - build new ones for URI and label
              BuilderContext builderContext =
                  new BuilderContext(context.indexSettings(), context.path());
              uriMapper =
                  MapperBuilders.stringField(uriMapperName)
                      .store(true)
                      .index(true)
                      .tokenized(false)
                      .build(builderContext);
              labelMapper =
                  MapperBuilders.stringField(labelMapperName)
                      .store(true)
                      .index(true)
                      .tokenized(true)
                      .build(builderContext);
            }

            addRelatedNodesWithLabels(
                context,
                relations.get(relation),
                uriMapper,
                helper.findLabelsForIRIs(relations.get(relation)),
                labelMapper);
          }
        }
      }

      helper.updateLastCallTime();
    } catch (OntologyHelperException e) {
      throw new ElasticsearchException("Could not initialise ontology helper", e);
    } finally {
      context.path().remove();
      context.path().pathType(origPathType);
    }
  }
Пример #7
0
  public void parseDynamicValue(
      final ParseContext context, String currentFieldName, XContentParser.Token token)
      throws IOException {
    Dynamic dynamic = this.dynamic;
    if (dynamic == null) {
      dynamic = context.root().dynamic();
    }
    if (dynamic == Dynamic.STRICT) {
      throw new StrictDynamicMappingException(fullPath, currentFieldName);
    }
    if (dynamic == Dynamic.FALSE) {
      return;
    }
    // we sync here since we don't want to add this field twice to the document mapper
    // its not the end of the world, since we add it to the mappers once we create it
    // so next time we won't even get here for this field
    synchronized (mutex) {
      Mapper mapper = mappers.get(currentFieldName);
      if (mapper == null) {
        BuilderContext builderContext = new BuilderContext(context.indexSettings(), context.path());
        if (token == XContentParser.Token.VALUE_STRING) {
          boolean resolved = false;

          // do a quick test to see if its fits a dynamic template, if so, use it.
          // we need to do it here so we can handle things like attachment templates, where calling
          // text (to see if its a date) causes the binary value to be cleared
          if (!resolved) {
            Mapper.Builder builder =
                context.root().findTemplateBuilder(context, currentFieldName, "string", null);
            if (builder != null) {
              mapper = builder.build(builderContext);
              resolved = true;
            }
          }

          if (!resolved && context.parser().textLength() == 0) {
            // empty string with no mapping, treat it like null value
            return;
          }

          if (!resolved && context.root().dateDetection()) {
            String text = context.parser().text();
            // a safe check since "1" gets parsed as well
            if (Strings.countOccurrencesOf(text, ":") > 1
                || Strings.countOccurrencesOf(text, "-") > 1
                || Strings.countOccurrencesOf(text, "/") > 1) {
              for (FormatDateTimeFormatter dateTimeFormatter :
                  context.root().dynamicDateTimeFormatters()) {
                try {
                  dateTimeFormatter.parser().parseMillis(text);
                  Mapper.Builder builder =
                      context.root().findTemplateBuilder(context, currentFieldName, "date");
                  if (builder == null) {
                    builder = dateField(currentFieldName).dateTimeFormatter(dateTimeFormatter);
                  }
                  mapper = builder.build(builderContext);
                  resolved = true;
                  break;
                } catch (Exception e) {
                  // failure to parse this, continue
                }
              }
            }
          }
          if (!resolved && context.root().numericDetection()) {
            String text = context.parser().text();
            try {
              Long.parseLong(text);
              Mapper.Builder builder =
                  context.root().findTemplateBuilder(context, currentFieldName, "long");
              if (builder == null) {
                builder = longField(currentFieldName);
              }
              mapper = builder.build(builderContext);
              resolved = true;
            } catch (Exception e) {
              // not a long number
            }
            if (!resolved) {
              try {
                Double.parseDouble(text);
                Mapper.Builder builder =
                    context.root().findTemplateBuilder(context, currentFieldName, "double");
                if (builder == null) {
                  builder = doubleField(currentFieldName);
                }
                mapper = builder.build(builderContext);
                resolved = true;
              } catch (Exception e) {
                // not a long number
              }
            }
          }
          if (!resolved) {
            Mapper.Builder builder =
                context.root().findTemplateBuilder(context, currentFieldName, "string");
            if (builder == null) {
              builder = stringField(currentFieldName);
            }
            mapper = builder.build(builderContext);
          }
        } else if (token == XContentParser.Token.VALUE_NUMBER) {
          XContentParser.NumberType numberType = context.parser().numberType();
          if (numberType == XContentParser.NumberType.INT) {
            if (context.parser().estimatedNumberType()) {
              Mapper.Builder builder =
                  context.root().findTemplateBuilder(context, currentFieldName, "long");
              if (builder == null) {
                builder = longField(currentFieldName);
              }
              mapper = builder.build(builderContext);
            } else {
              Mapper.Builder builder =
                  context.root().findTemplateBuilder(context, currentFieldName, "integer");
              if (builder == null) {
                builder = integerField(currentFieldName);
              }
              mapper = builder.build(builderContext);
            }
          } else if (numberType == XContentParser.NumberType.LONG) {
            Mapper.Builder builder =
                context.root().findTemplateBuilder(context, currentFieldName, "long");
            if (builder == null) {
              builder = longField(currentFieldName);
            }
            mapper = builder.build(builderContext);
          } else if (numberType == XContentParser.NumberType.FLOAT) {
            if (context.parser().estimatedNumberType()) {
              Mapper.Builder builder =
                  context.root().findTemplateBuilder(context, currentFieldName, "double");
              if (builder == null) {
                builder = doubleField(currentFieldName);
              }
              mapper = builder.build(builderContext);
            } else {
              Mapper.Builder builder =
                  context.root().findTemplateBuilder(context, currentFieldName, "float");
              if (builder == null) {
                builder = floatField(currentFieldName);
              }
              mapper = builder.build(builderContext);
            }
          } else if (numberType == XContentParser.NumberType.DOUBLE) {
            Mapper.Builder builder =
                context.root().findTemplateBuilder(context, currentFieldName, "double");
            if (builder == null) {
              builder = doubleField(currentFieldName);
            }
            mapper = builder.build(builderContext);
          }
        } else if (token == XContentParser.Token.VALUE_BOOLEAN) {
          Mapper.Builder builder =
              context.root().findTemplateBuilder(context, currentFieldName, "boolean");
          if (builder == null) {
            builder = booleanField(currentFieldName);
          }
          mapper = builder.build(builderContext);
        } else if (token == XContentParser.Token.VALUE_EMBEDDED_OBJECT) {
          Mapper.Builder builder =
              context.root().findTemplateBuilder(context, currentFieldName, "binary");
          if (builder == null) {
            builder = binaryField(currentFieldName);
          }
          mapper = builder.build(builderContext);
        } else {
          Mapper.Builder builder =
              context.root().findTemplateBuilder(context, currentFieldName, null);
          if (builder != null) {
            mapper = builder.build(builderContext);
          } else {
            // TODO how do we identify dynamically that its a binary value?
            throw new ElasticsearchIllegalStateException(
                "Can't handle serializing a dynamic type with content token ["
                    + token
                    + "] and field name ["
                    + currentFieldName
                    + "]");
          }
        }

        if (context.isWithinNewMapper()) {
          mapper.parse(context);
        } else {
          context.setWithinNewMapper();
          try {
            mapper.parse(context);
            FieldMapperListener.Aggregator newFields = new FieldMapperListener.Aggregator();
            mapper.traverse(newFields);
            context.docMapper().addFieldMappers(newFields.mappers);
          } finally {
            context.clearWithinNewMapper();
          }
        }

        // only put after we traversed and did the callbacks, so other parsing won't see it only
        // after we
        // properly traversed it and adding the mappers
        putMapper(mapper);
        context.setMappingsModified();
      } else {
        mapper.parse(context);
      }
    }
  }
Пример #8
0
  @Override
  public Mapper parse(ParseContext context) throws IOException {
    byte[] content = null;
    String contentType = null;
    int indexedChars = defaultIndexedChars;
    boolean langDetect = defaultLangDetect;
    String name = null;
    String language = null;

    XContentParser parser = context.parser();
    XContentParser.Token token = parser.currentToken();
    if (token == XContentParser.Token.VALUE_STRING) {
      content = parser.binaryValue();
    } else {
      String currentFieldName = null;
      while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
        if (token == XContentParser.Token.FIELD_NAME) {
          currentFieldName = parser.currentName();
        } else if (token == XContentParser.Token.VALUE_STRING) {
          if ("_content".equals(currentFieldName)) {
            content = parser.binaryValue();
          } else if ("_content_type".equals(currentFieldName)) {
            contentType = parser.text();
          } else if ("_name".equals(currentFieldName)) {
            name = parser.text();
          } else if ("_language".equals(currentFieldName)) {
            language = parser.text();
          }
        } else if (token == XContentParser.Token.VALUE_NUMBER) {
          if ("_indexed_chars".equals(currentFieldName)
              || "_indexedChars".equals(currentFieldName)) {
            indexedChars = parser.intValue();
          }
        } else if (token == XContentParser.Token.VALUE_BOOLEAN) {
          if ("_detect_language".equals(currentFieldName)
              || "_detectLanguage".equals(currentFieldName)) {
            langDetect = parser.booleanValue();
          }
        }
      }
    }

    // Throw clean exception when no content is provided Fix #23
    if (content == null) {
      throw new MapperParsingException("No content is provided.");
    }

    Metadata metadata = new Metadata();
    if (contentType != null) {
      metadata.add(Metadata.CONTENT_TYPE, contentType);
    }
    if (name != null) {
      metadata.add(Metadata.RESOURCE_NAME_KEY, name);
    }

    String parsedContent;
    try {
      parsedContent = TikaImpl.parse(content, metadata, indexedChars);
    } catch (Throwable e) {
      // #18: we could ignore errors when Tika does not parse data
      if (!ignoreErrors) {
        logger.trace("exception caught", e);
        throw new MapperParsingException(
            "Failed to extract ["
                + indexedChars
                + "] characters of text for ["
                + name
                + "] : "
                + e.getMessage(),
            e);
      } else {
        logger.debug(
            "Failed to extract [{}] characters of text for [{}]: [{}]",
            indexedChars,
            name,
            e.getMessage());
        logger.trace("exception caught", e);
      }
      return null;
    }

    context = context.createExternalValueContext(parsedContent);
    contentMapper.parse(context);

    if (langDetect) {
      try {
        if (language != null) {
          metadata.add(Metadata.CONTENT_LANGUAGE, language);
        } else {
          LanguageIdentifier identifier = new LanguageIdentifier(parsedContent);
          language = identifier.getLanguage();
        }
        context = context.createExternalValueContext(language);
        languageMapper.parse(context);
      } catch (Throwable t) {
        logger.debug("Cannot detect language: [{}]", t.getMessage());
      }
    }

    if (name != null) {
      try {
        context = context.createExternalValueContext(name);
        nameMapper.parse(context);
      } catch (MapperParsingException e) {
        if (!ignoreErrors) throw e;
        if (logger.isDebugEnabled())
          logger.debug(
              "Ignoring MapperParsingException catch while parsing name: [{}]", e.getMessage());
      }
    }

    if (metadata.get(Metadata.DATE) != null) {
      try {
        context = context.createExternalValueContext(metadata.get(Metadata.DATE));
        dateMapper.parse(context);
      } catch (MapperParsingException e) {
        if (!ignoreErrors) throw e;
        if (logger.isDebugEnabled())
          logger.debug(
              "Ignoring MapperParsingException catch while parsing date: [{}]: [{}]",
              e.getMessage(),
              context.externalValue());
      }
    }

    if (metadata.get(Metadata.TITLE) != null) {
      try {
        context = context.createExternalValueContext(metadata.get(Metadata.TITLE));
        titleMapper.parse(context);
      } catch (MapperParsingException e) {
        if (!ignoreErrors) throw e;
        if (logger.isDebugEnabled())
          logger.debug(
              "Ignoring MapperParsingException catch while parsing title: [{}]: [{}]",
              e.getMessage(),
              context.externalValue());
      }
    }

    if (metadata.get(Metadata.AUTHOR) != null) {
      try {
        context = context.createExternalValueContext(metadata.get(Metadata.AUTHOR));
        authorMapper.parse(context);
      } catch (MapperParsingException e) {
        if (!ignoreErrors) throw e;
        if (logger.isDebugEnabled())
          logger.debug(
              "Ignoring MapperParsingException catch while parsing author: [{}]: [{}]",
              e.getMessage(),
              context.externalValue());
      }
    }

    if (metadata.get(Metadata.KEYWORDS) != null) {
      try {
        context = context.createExternalValueContext(metadata.get(Metadata.KEYWORDS));
        keywordsMapper.parse(context);
      } catch (MapperParsingException e) {
        if (!ignoreErrors) throw e;
        if (logger.isDebugEnabled())
          logger.debug(
              "Ignoring MapperParsingException catch while parsing keywords: [{}]: [{}]",
              e.getMessage(),
              context.externalValue());
      }
    }

    if (contentType == null) {
      contentType = metadata.get(Metadata.CONTENT_TYPE);
    }
    if (contentType != null) {
      try {
        context = context.createExternalValueContext(contentType);
        contentTypeMapper.parse(context);
      } catch (MapperParsingException e) {
        if (!ignoreErrors) throw e;
        if (logger.isDebugEnabled())
          logger.debug(
              "Ignoring MapperParsingException catch while parsing content_type: [{}]: [{}]",
              e.getMessage(),
              context.externalValue());
      }
    }

    int length = content.length;
    // If we have CONTENT_LENGTH from Tika we use it
    if (metadata.get(Metadata.CONTENT_LENGTH) != null) {
      length = Integer.parseInt(metadata.get(Metadata.CONTENT_LENGTH));
    }

    try {
      context = context.createExternalValueContext(length);
      contentLengthMapper.parse(context);
    } catch (MapperParsingException e) {
      if (!ignoreErrors) throw e;
      if (logger.isDebugEnabled())
        logger.debug(
            "Ignoring MapperParsingException catch while parsing content_length: [{}]: [{}]",
            e.getMessage(),
            context.externalValue());
    }

    //        multiFields.parse(this, context);

    return null;
  }