示例#1
0
  /**
   * Builds a mapping between the header for the incoming data to be scored and the header used to
   * train the model. Uses attribute names to match between the two. Also constructs a list of
   * missing attributes and a list of type mismatches.
   *
   * @param modelHeader the header of the data used to train the model
   * @param incomingHeader the header of the incoming data
   * @throws DistributedWekaException if more than 50% of the attributes expected by the model are
   *     missing or have a type mismatch with the incoming data
   */
  protected void buildAttributeMap(Instances modelHeader, Instances incomingHeader)
      throws DistributedWekaException {
    m_attributeMap = new int[modelHeader.numAttributes()];

    int problemCount = 0;
    for (int i = 0; i < modelHeader.numAttributes(); i++) {
      Attribute modAtt = modelHeader.attribute(i);
      Attribute incomingAtt = incomingHeader.attribute(modAtt.name());

      if (incomingAtt == null) {
        // missing model attribute
        m_attributeMap[i] = -1;
        m_missingMismatch.put(modAtt.name(), "missing from incoming data");
        problemCount++;
      } else if (modAtt.type() != incomingAtt.type()) {
        // type mismatch
        m_attributeMap[i] = -1;
        m_missingMismatch.put(
            modAtt.name(),
            "type mismatch - "
                + "model: "
                + Attribute.typeToString(modAtt)
                + " != incoming: "
                + Attribute.typeToString(incomingAtt));
        problemCount++;
      } else {
        m_attributeMap[i] = incomingAtt.index();
      }
    }

    // -1 for the class (if set)
    int adjustForClass = modelHeader.classIndex() >= 0 ? 1 : 0;
    if (problemCount > (modelHeader.numAttributes() - adjustForClass) / 2) {
      throw new DistributedWekaException(
          "More than 50% of the attributes that the model "
              + "is expecting to see are either missing or have a type mismatch in the "
              + "incoming data.");
    }
  }
示例#2
0
  public MappingInfo(Instances dataSet, MiningSchema miningSchema, Logger log) throws Exception {
    m_log = log;
    // miningSchema.convertStringAttsToNominal();
    Instances fieldsI = miningSchema.getMiningSchemaAsInstances();

    m_fieldsMap = new int[fieldsI.numAttributes()];
    m_nominalValueMaps = new int[fieldsI.numAttributes()][];

    for (int i = 0; i < fieldsI.numAttributes(); i++) {
      String schemaAttName = fieldsI.attribute(i).name();
      boolean found = false;
      for (int j = 0; j < dataSet.numAttributes(); j++) {
        if (dataSet.attribute(j).name().equals(schemaAttName)) {
          Attribute miningSchemaAtt = fieldsI.attribute(i);
          Attribute incomingAtt = dataSet.attribute(j);
          // check type match
          if (miningSchemaAtt.type() != incomingAtt.type()) {
            throw new Exception(
                "[MappingInfo] type mismatch for field "
                    + schemaAttName
                    + ". Mining schema type "
                    + miningSchemaAtt.toString()
                    + ". Incoming type "
                    + incomingAtt.toString()
                    + ".");
          }

          // check nominal values (number, names...)
          if (miningSchemaAtt.numValues() != incomingAtt.numValues()) {
            String warningString =
                "[MappingInfo] WARNING: incoming nominal attribute "
                    + incomingAtt.name()
                    + " does not have the same "
                    + "number of values as the corresponding mining "
                    + "schema attribute.";
            if (m_log != null) {
              m_log.logMessage(warningString);
            } else {
              System.err.println(warningString);
            }
          }
          if (miningSchemaAtt.isNominal() || miningSchemaAtt.isString()) {
            int[] valuesMap = new int[incomingAtt.numValues()];
            for (int k = 0; k < incomingAtt.numValues(); k++) {
              String incomingNomVal = incomingAtt.value(k);
              int indexInSchema = miningSchemaAtt.indexOfValue(incomingNomVal);
              if (indexInSchema < 0) {
                String warningString =
                    "[MappingInfo] WARNING: incoming nominal attribute "
                        + incomingAtt.name()
                        + " has value "
                        + incomingNomVal
                        + " that doesn't occur in the mining schema.";
                if (m_log != null) {
                  m_log.logMessage(warningString);
                } else {
                  System.err.println(warningString);
                }
                valuesMap[k] = UNKNOWN_NOMINAL_VALUE;
              } else {
                valuesMap[k] = indexInSchema;
              }
            }
            m_nominalValueMaps[i] = valuesMap;
          }

          /*if (miningSchemaAtt.isNominal()) {
            for (int k = 0; k < miningSchemaAtt.numValues(); k++) {
              if (!miningSchemaAtt.value(k).equals(incomingAtt.value(k))) {
                throw new Exception("[PMMLUtils] value " + k + " (" +
                                    miningSchemaAtt.value(k) + ") does not match " +
                                    "incoming value (" + incomingAtt.value(k) +
                                    ") for attribute " + miningSchemaAtt.name() +
                                    ".");

              }
            }
          }*/
          found = true;
          m_fieldsMap[i] = j;
        }
      }
      if (!found) {
        throw new Exception(
            "[MappingInfo] Unable to find a match for mining schema "
                + "attribute "
                + schemaAttName
                + " in the "
                + "incoming instances!");
      }
    }

    // check class attribute (if set)
    if (fieldsI.classIndex() >= 0) {
      if (dataSet.classIndex() < 0) {
        // first see if we can find a matching class
        String className = fieldsI.classAttribute().name();
        Attribute classMatch = dataSet.attribute(className);
        if (classMatch == null) {
          throw new Exception(
              "[MappingInfo] Can't find match for target field "
                  + className
                  + "in incoming instances!");
        }
        dataSet.setClass(classMatch);
      } else if (!fieldsI.classAttribute().name().equals(dataSet.classAttribute().name())) {
        throw new Exception(
            "[MappingInfo] class attribute in mining schema does not match "
                + "class attribute in incoming instances!");
      }
    }

    // Set up the textual description of the mapping
    fieldsMappingString(fieldsI, dataSet);
  }