Esempio n. 1
0
 /** Helper method to represent an Attribute as a single-element vector. */
 private int[] toValueIndexArray(Attribute attribute) {
   TIntArrayList out = new TIntArrayList();
   for (String value : values) {
     int valueIndex = attribute.indexOfValue(value);
     if (valueIndex < 0)
       throw new NoSuchElementException(
           "no such value: '" + value + "' in attribute '" + attribute.toString() + "'");
     out.add(valueIndex);
   }
   return out.toNativeArray();
 }
Esempio n. 2
0
  /**
   * Constructs an instance suitable for passing to the model for scoring
   *
   * @param incoming the incoming instance
   * @return an instance with values mapped to be consistent with what the model is expecting
   */
  protected Instance mapIncomingFieldsToModelFields(Instance incoming) {
    Instances modelHeader = m_model.getHeader();
    double[] vals = new double[modelHeader.numAttributes()];

    for (int i = 0; i < modelHeader.numAttributes(); i++) {

      if (m_attributeMap[i] < 0) {
        // missing or type mismatch
        vals[i] = Utils.missingValue();
        continue;
      }

      Attribute modelAtt = modelHeader.attribute(i);
      Attribute incomingAtt = incoming.dataset().attribute(m_attributeMap[i]);

      if (incoming.isMissing(incomingAtt.index())) {
        vals[i] = Utils.missingValue();
        continue;
      }

      if (modelAtt.isNumeric()) {
        vals[i] = incoming.value(m_attributeMap[i]);
      } else if (modelAtt.isNominal()) {
        String incomingVal = incoming.stringValue(m_attributeMap[i]);
        int modelIndex = modelAtt.indexOfValue(incomingVal);

        if (modelIndex < 0) {
          vals[i] = Utils.missingValue();
        } else {
          vals[i] = modelIndex;
        }
      } else if (modelAtt.isString()) {
        vals[i] = 0;
        modelAtt.setStringValue(incoming.stringValue(m_attributeMap[i]));
      }
    }

    if (modelHeader.classIndex() >= 0) {
      // set class to missing value
      vals[modelHeader.classIndex()] = Utils.missingValue();
    }

    Instance newInst = null;
    if (incoming instanceof SparseInstance) {
      newInst = new SparseInstance(incoming.weight(), vals);
    } else {
      newInst = new DenseInstance(incoming.weight(), vals);
    }

    newInst.setDataset(modelHeader);
    return newInst;
  }
  /**
   * Input an instance for filtering. The instance is processed and made available for output
   * immediately.
   *
   * @param instance the input instance.
   * @return true if the filtered instance may now be collected with output().
   * @throws IllegalStateException if no input structure has been defined.
   */
  public boolean input(Instance instance) {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }

    if (isOutputFormatDefined()) {
      Instance newInstance = (Instance) instance.copy();

      // make sure that we get the right indexes set for the converted
      // string attributes when operating on a second batch of instances
      for (int i = 0; i < newInstance.numAttributes(); i++) {
        if (newInstance.attribute(i).isString()
            && !newInstance.isMissing(i)
            && m_AttIndices.isInRange(i)) {
          Attribute outAtt = getOutputFormat().attribute(newInstance.attribute(i).name());
          String inVal = newInstance.stringValue(i);
          int outIndex = outAtt.indexOfValue(inVal);
          if (outIndex < 0) {
            newInstance.setMissing(i);
          } else {
            newInstance.setValue(i, outIndex);
          }
        }
      }
      push(newInstance);
      return true;
    }

    bufferInput(instance);
    return false;
  }
Esempio n. 4
0
    @Override
    public void init(Instances structure, Environment env) {
      super.init(structure, env);

      m_resolvedLhsName = m_lhsAttributeName;
      m_resolvedRhsOperand = m_rhsOperand;
      try {
        m_resolvedLhsName = m_env.substitute(m_resolvedLhsName);
        m_resolvedRhsOperand = m_env.substitute(m_resolvedRhsOperand);
      } catch (Exception ex) {
      }

      Attribute lhs = null;
      // try as an index or "special" label first
      if (m_resolvedLhsName.toLowerCase().startsWith("/first")) {
        lhs = structure.attribute(0);
      } else if (m_resolvedLhsName.toLowerCase().startsWith("/last")) {
        lhs = structure.attribute(structure.numAttributes() - 1);
      } else {
        // try as an index
        try {
          int indx = Integer.parseInt(m_resolvedLhsName);
          indx--;
          lhs = structure.attribute(indx);
        } catch (NumberFormatException ex) {
        }
      }

      if (lhs == null) {
        lhs = structure.attribute(m_resolvedLhsName);
      }
      if (lhs == null) {
        throw new IllegalArgumentException(
            "Data does not contain attribute " + "\"" + m_resolvedLhsName + "\"");
      }
      m_lhsAttIndex = lhs.index();

      if (m_rhsIsAttribute) {
        Attribute rhs = null;

        // try as an index or "special" label first
        if (m_resolvedRhsOperand.toLowerCase().equals("/first")) {
          rhs = structure.attribute(0);
        } else if (m_resolvedRhsOperand.toLowerCase().equals("/last")) {
          rhs = structure.attribute(structure.numAttributes() - 1);
        } else {
          // try as an index
          try {
            int indx = Integer.parseInt(m_resolvedRhsOperand);
            indx--;
            rhs = structure.attribute(indx);
          } catch (NumberFormatException ex) {
          }
        }

        if (rhs == null) {
          rhs = structure.attribute(m_resolvedRhsOperand);
        }
        if (rhs == null) {
          throw new IllegalArgumentException(
              "Data does not contain attribute " + "\"" + m_resolvedRhsOperand + "\"");
        }
        m_rhsAttIndex = rhs.index();
      } else if (m_operator != ExpressionType.CONTAINS
          && m_operator != ExpressionType.STARTSWITH
          && m_operator != ExpressionType.ENDSWITH
          && m_operator != ExpressionType.REGEX
          && m_operator != ExpressionType.ISMISSING) {
        // make sure the operand is parseable as a number (unless missing has
        // been specified - equals only)
        if (lhs.isNominal()) {
          m_numericOperand = lhs.indexOfValue(m_resolvedRhsOperand);

          if (m_numericOperand < 0) {
            throw new IllegalArgumentException(
                "Unknown nominal value '"
                    + m_resolvedRhsOperand
                    + "' for attribute '"
                    + lhs.name()
                    + "'");
          }
        } else {
          try {
            m_numericOperand = Double.parseDouble(m_resolvedRhsOperand);
          } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                "\"" + m_resolvedRhsOperand + "\" is not parseable as a number!");
          }
        }
      }

      if (m_operator == ExpressionType.REGEX) {
        m_regexPattern = Pattern.compile(m_resolvedRhsOperand);
      }
    }
Esempio n. 5
0
  public MappingInfo(Instances dataSet, MiningSchema miningSchema, Logger log) throws Exception {
    m_log = log;
    // miningSchema.convertStringAttsToNominal();
    Instances fieldsI = miningSchema.getMiningSchemaAsInstances();

    m_fieldsMap = new int[fieldsI.numAttributes()];
    m_nominalValueMaps = new int[fieldsI.numAttributes()][];

    for (int i = 0; i < fieldsI.numAttributes(); i++) {
      String schemaAttName = fieldsI.attribute(i).name();
      boolean found = false;
      for (int j = 0; j < dataSet.numAttributes(); j++) {
        if (dataSet.attribute(j).name().equals(schemaAttName)) {
          Attribute miningSchemaAtt = fieldsI.attribute(i);
          Attribute incomingAtt = dataSet.attribute(j);
          // check type match
          if (miningSchemaAtt.type() != incomingAtt.type()) {
            throw new Exception(
                "[MappingInfo] type mismatch for field "
                    + schemaAttName
                    + ". Mining schema type "
                    + miningSchemaAtt.toString()
                    + ". Incoming type "
                    + incomingAtt.toString()
                    + ".");
          }

          // check nominal values (number, names...)
          if (miningSchemaAtt.numValues() != incomingAtt.numValues()) {
            String warningString =
                "[MappingInfo] WARNING: incoming nominal attribute "
                    + incomingAtt.name()
                    + " does not have the same "
                    + "number of values as the corresponding mining "
                    + "schema attribute.";
            if (m_log != null) {
              m_log.logMessage(warningString);
            } else {
              System.err.println(warningString);
            }
          }
          if (miningSchemaAtt.isNominal() || miningSchemaAtt.isString()) {
            int[] valuesMap = new int[incomingAtt.numValues()];
            for (int k = 0; k < incomingAtt.numValues(); k++) {
              String incomingNomVal = incomingAtt.value(k);
              int indexInSchema = miningSchemaAtt.indexOfValue(incomingNomVal);
              if (indexInSchema < 0) {
                String warningString =
                    "[MappingInfo] WARNING: incoming nominal attribute "
                        + incomingAtt.name()
                        + " has value "
                        + incomingNomVal
                        + " that doesn't occur in the mining schema.";
                if (m_log != null) {
                  m_log.logMessage(warningString);
                } else {
                  System.err.println(warningString);
                }
                valuesMap[k] = UNKNOWN_NOMINAL_VALUE;
              } else {
                valuesMap[k] = indexInSchema;
              }
            }
            m_nominalValueMaps[i] = valuesMap;
          }

          /*if (miningSchemaAtt.isNominal()) {
            for (int k = 0; k < miningSchemaAtt.numValues(); k++) {
              if (!miningSchemaAtt.value(k).equals(incomingAtt.value(k))) {
                throw new Exception("[PMMLUtils] value " + k + " (" +
                                    miningSchemaAtt.value(k) + ") does not match " +
                                    "incoming value (" + incomingAtt.value(k) +
                                    ") for attribute " + miningSchemaAtt.name() +
                                    ".");

              }
            }
          }*/
          found = true;
          m_fieldsMap[i] = j;
        }
      }
      if (!found) {
        throw new Exception(
            "[MappingInfo] Unable to find a match for mining schema "
                + "attribute "
                + schemaAttName
                + " in the "
                + "incoming instances!");
      }
    }

    // check class attribute (if set)
    if (fieldsI.classIndex() >= 0) {
      if (dataSet.classIndex() < 0) {
        // first see if we can find a matching class
        String className = fieldsI.classAttribute().name();
        Attribute classMatch = dataSet.attribute(className);
        if (classMatch == null) {
          throw new Exception(
              "[MappingInfo] Can't find match for target field "
                  + className
                  + "in incoming instances!");
        }
        dataSet.setClass(classMatch);
      } else if (!fieldsI.classAttribute().name().equals(dataSet.classAttribute().name())) {
        throw new Exception(
            "[MappingInfo] class attribute in mining schema does not match "
                + "class attribute in incoming instances!");
      }
    }

    // Set up the textual description of the mapping
    fieldsMappingString(fieldsI, dataSet);
  }