示例#1
0
  /**
   * Adds this tree recursively to the buffer.
   *
   * @param id the unqiue id for the method
   * @param buffer the buffer to add the source code to
   * @return the last ID being used
   * @throws Exception if something goes wrong
   */
  protected int toSource(int id, StringBuffer buffer) throws Exception {
    int result;
    int i;
    int newID;
    StringBuffer[] subBuffers;

    buffer.append("\n");
    buffer.append("  protected static double node" + id + "(Object[] i) {\n");

    // leaf?
    if (m_Attribute == null) {
      result = id;
      if (Double.isNaN(m_ClassValue)) buffer.append("    return Double.NaN;");
      else buffer.append("    return " + m_ClassValue + ";");
      if (m_ClassAttribute != null)
        buffer.append(" // " + m_ClassAttribute.value((int) m_ClassValue));
      buffer.append("\n");
      buffer.append("  }\n");
    } else {
      buffer.append("    // " + m_Attribute.name() + "\n");

      // subtree calls
      subBuffers = new StringBuffer[m_Attribute.numValues()];
      newID = id;
      for (i = 0; i < m_Attribute.numValues(); i++) {
        newID++;

        buffer.append("    ");
        if (i > 0) buffer.append("else ");
        buffer.append(
            "if (((String) i["
                + m_Attribute.index()
                + "]).equals(\""
                + m_Attribute.value(i)
                + "\"))\n");
        buffer.append("      return node" + newID + "(i);\n");

        subBuffers[i] = new StringBuffer();
        newID = m_Successors[i].toSource(newID, subBuffers[i]);
      }
      buffer.append("    else\n");
      buffer.append(
          "      throw new IllegalArgumentException(\"Value '\" + i["
              + m_Attribute.index()
              + "] + \"' is not allowed!\");\n");
      buffer.append("  }\n");

      // output subtree code
      for (i = 0; i < m_Attribute.numValues(); i++) {
        buffer.append(subBuffers[i].toString());
      }
      subBuffers = null;

      result = newID;
    }

    return result;
  }
  public double classifyInstance(Instance inst) throws Exception {

    if (m_attribute == null) {
      return m_intercept;
    } else {
      if (inst.isMissing(m_attribute.index())) {
        throw new Exception("UnivariateLinearRegression: No missing values!");
      }
      return m_intercept + m_slope * inst.value(m_attribute.index());
    }
  }
示例#3
0
  /**
   * Method for building an Id3 tree.
   *
   * @param data the training data
   * @exception Exception if decision tree can't be built successfully
   */
  private void makeTree(Instances data) throws Exception {

    // Check if no instances have reached this node.
    if (data.numInstances() == 0) {
      m_Attribute = null;
      m_ClassValue = Utils.missingValue();
      m_Distribution = new double[data.numClasses()];
      return;
    }

    // Compute attribute with maximum information gain.
    double[] infoGains = new double[data.numAttributes()];
    Enumeration attEnum = data.enumerateAttributes();
    while (attEnum.hasMoreElements()) {
      Attribute att = (Attribute) attEnum.nextElement();
      infoGains[att.index()] = computeInfoGain(data, att);
    }
    m_Attribute = data.attribute(Utils.maxIndex(infoGains));

    // Make leaf if information gain is zero.
    // Otherwise create successors.
    if (Utils.eq(infoGains[m_Attribute.index()], 0)) {
      m_Attribute = null;
      m_Distribution = new double[data.numClasses()];
      Enumeration instEnum = data.enumerateInstances();
      while (instEnum.hasMoreElements()) {
        Instance inst = (Instance) instEnum.nextElement();
        m_Distribution[(int) inst.classValue()]++;
      }
      Utils.normalize(m_Distribution);
      m_ClassValue = Utils.maxIndex(m_Distribution);
      m_ClassAttribute = data.classAttribute();
    } else {
      Instances[] splitData = splitData(data, m_Attribute);
      m_Successors = new Id3[m_Attribute.numValues()];
      for (int j = 0; j < m_Attribute.numValues(); j++) {
        m_Successors[j] = new Id3();
        m_Successors[j].makeTree(splitData[j]);
      }
    }
  }
示例#4
0
  /**
   * Constructs an instance suitable for passing to the model for scoring
   *
   * @param incoming the incoming instance
   * @return an instance with values mapped to be consistent with what the model is expecting
   */
  protected Instance mapIncomingFieldsToModelFields(Instance incoming) {
    Instances modelHeader = m_model.getHeader();
    double[] vals = new double[modelHeader.numAttributes()];

    for (int i = 0; i < modelHeader.numAttributes(); i++) {

      if (m_attributeMap[i] < 0) {
        // missing or type mismatch
        vals[i] = Utils.missingValue();
        continue;
      }

      Attribute modelAtt = modelHeader.attribute(i);
      Attribute incomingAtt = incoming.dataset().attribute(m_attributeMap[i]);

      if (incoming.isMissing(incomingAtt.index())) {
        vals[i] = Utils.missingValue();
        continue;
      }

      if (modelAtt.isNumeric()) {
        vals[i] = incoming.value(m_attributeMap[i]);
      } else if (modelAtt.isNominal()) {
        String incomingVal = incoming.stringValue(m_attributeMap[i]);
        int modelIndex = modelAtt.indexOfValue(incomingVal);

        if (modelIndex < 0) {
          vals[i] = Utils.missingValue();
        } else {
          vals[i] = modelIndex;
        }
      } else if (modelAtt.isString()) {
        vals[i] = 0;
        modelAtt.setStringValue(incoming.stringValue(m_attributeMap[i]));
      }
    }

    if (modelHeader.classIndex() >= 0) {
      // set class to missing value
      vals[modelHeader.classIndex()] = Utils.missingValue();
    }

    Instance newInst = null;
    if (incoming instanceof SparseInstance) {
      newInst = new SparseInstance(incoming.weight(), vals);
    } else {
      newInst = new DenseInstance(incoming.weight(), vals);
    }

    newInst.setDataset(modelHeader);
    return newInst;
  }
示例#5
0
  /**
   * Builds a mapping between the header for the incoming data to be scored and the header used to
   * train the model. Uses attribute names to match between the two. Also constructs a list of
   * missing attributes and a list of type mismatches.
   *
   * @param modelHeader the header of the data used to train the model
   * @param incomingHeader the header of the incoming data
   * @throws DistributedWekaException if more than 50% of the attributes expected by the model are
   *     missing or have a type mismatch with the incoming data
   */
  protected void buildAttributeMap(Instances modelHeader, Instances incomingHeader)
      throws DistributedWekaException {
    m_attributeMap = new int[modelHeader.numAttributes()];

    int problemCount = 0;
    for (int i = 0; i < modelHeader.numAttributes(); i++) {
      Attribute modAtt = modelHeader.attribute(i);
      Attribute incomingAtt = incomingHeader.attribute(modAtt.name());

      if (incomingAtt == null) {
        // missing model attribute
        m_attributeMap[i] = -1;
        m_missingMismatch.put(modAtt.name(), "missing from incoming data");
        problemCount++;
      } else if (modAtt.type() != incomingAtt.type()) {
        // type mismatch
        m_attributeMap[i] = -1;
        m_missingMismatch.put(
            modAtt.name(),
            "type mismatch - "
                + "model: "
                + Attribute.typeToString(modAtt)
                + " != incoming: "
                + Attribute.typeToString(incomingAtt));
        problemCount++;
      } else {
        m_attributeMap[i] = incomingAtt.index();
      }
    }

    // -1 for the class (if set)
    int adjustForClass = modelHeader.classIndex() >= 0 ? 1 : 0;
    if (problemCount > (modelHeader.numAttributes() - adjustForClass) / 2) {
      throw new DistributedWekaException(
          "More than 50% of the attributes that the model "
              + "is expecting to see are either missing or have a type mismatch in the "
              + "incoming data.");
    }
  }
  /**
   * Sets the format of the input instances.
   *
   * @param instanceInfo an Instances object containing the input instance structure (any instances
   *     contained in the object are ignored - only the structure is required).
   * @return true if the outputFormat may be collected immediately
   * @throws Exception if the format couldn't be set successfully
   */
  @Override
  public boolean setInputFormat(Instances instanceInfo) throws Exception {

    super.setInputFormat(instanceInfo);

    int classIndex = instanceInfo.classIndex();

    // setup the map
    if (m_renameVals != null && m_renameVals.length() > 0) {
      String[] vals = m_renameVals.split(",");

      for (String val : vals) {
        String[] parts = val.split(":");
        if (parts.length != 2) {
          throw new WekaException("Invalid replacement string: " + val);
        }

        if (parts[0].length() == 0 || parts[1].length() == 0) {
          throw new WekaException("Invalid replacement string: " + val);
        }

        m_renameMap.put(
            m_ignoreCase ? parts[0].toLowerCase().trim() : parts[0].trim(), parts[1].trim());
      }
    }

    // try selected atts as a numeric range first
    Range tempRange = new Range();
    tempRange.setInvert(m_invert);
    if (m_selectedColsString == null) {
      m_selectedColsString = "";
    }

    try {
      tempRange.setRanges(m_selectedColsString);
      tempRange.setUpper(instanceInfo.numAttributes() - 1);
      m_selectedAttributes = tempRange.getSelection();
      m_selectedCols = tempRange;
    } catch (Exception r) {
      // OK, now try as named attributes
      StringBuffer indexes = new StringBuffer();
      String[] attNames = m_selectedColsString.split(",");
      boolean first = true;
      for (String n : attNames) {
        n = n.trim();
        Attribute found = instanceInfo.attribute(n);
        if (found == null) {
          throw new WekaException(
              "Unable to find attribute '" + n + "' in the incoming instances'");
        }
        if (first) {
          indexes.append("" + (found.index() + 1));
          first = false;
        } else {
          indexes.append("," + (found.index() + 1));
        }
      }

      tempRange = new Range();
      tempRange.setRanges(indexes.toString());
      tempRange.setUpper(instanceInfo.numAttributes() - 1);
      m_selectedAttributes = tempRange.getSelection();
      m_selectedCols = tempRange;
    }

    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    for (int i = 0; i < instanceInfo.numAttributes(); i++) {
      if (m_selectedCols.isInRange(i)) {
        if (instanceInfo.attribute(i).isNominal()) {
          List<String> valsForAtt = new ArrayList<String>();
          for (int j = 0; j < instanceInfo.attribute(i).numValues(); j++) {
            String origV = instanceInfo.attribute(i).value(j);

            String replace =
                m_ignoreCase ? m_renameMap.get(origV.toLowerCase()) : m_renameMap.get(origV);
            if (replace != null && !valsForAtt.contains(replace)) {
              valsForAtt.add(replace);
            } else {
              valsForAtt.add(origV);
            }
          }
          Attribute newAtt = new Attribute(instanceInfo.attribute(i).name(), valsForAtt);
          attributes.add(newAtt);
        } else {
          // ignore any selected attributes that are not nominal
          Attribute att = (Attribute) instanceInfo.attribute(i).copy();
          attributes.add(att);
        }
      } else {
        Attribute att = (Attribute) instanceInfo.attribute(i).copy();
        attributes.add(att);
      }
    }

    Instances outputFormat = new Instances(instanceInfo.relationName(), attributes, 0);
    outputFormat.setClassIndex(classIndex);
    setOutputFormat(outputFormat);

    return true;
  }
示例#7
0
    @Override
    public void init(Instances structure, Environment env) {
      super.init(structure, env);

      m_resolvedLhsName = m_lhsAttributeName;
      m_resolvedRhsOperand = m_rhsOperand;
      try {
        m_resolvedLhsName = m_env.substitute(m_resolvedLhsName);
        m_resolvedRhsOperand = m_env.substitute(m_resolvedRhsOperand);
      } catch (Exception ex) {
      }

      Attribute lhs = null;
      // try as an index or "special" label first
      if (m_resolvedLhsName.toLowerCase().startsWith("/first")) {
        lhs = structure.attribute(0);
      } else if (m_resolvedLhsName.toLowerCase().startsWith("/last")) {
        lhs = structure.attribute(structure.numAttributes() - 1);
      } else {
        // try as an index
        try {
          int indx = Integer.parseInt(m_resolvedLhsName);
          indx--;
          lhs = structure.attribute(indx);
        } catch (NumberFormatException ex) {
        }
      }

      if (lhs == null) {
        lhs = structure.attribute(m_resolvedLhsName);
      }
      if (lhs == null) {
        throw new IllegalArgumentException(
            "Data does not contain attribute " + "\"" + m_resolvedLhsName + "\"");
      }
      m_lhsAttIndex = lhs.index();

      if (m_rhsIsAttribute) {
        Attribute rhs = null;

        // try as an index or "special" label first
        if (m_resolvedRhsOperand.toLowerCase().equals("/first")) {
          rhs = structure.attribute(0);
        } else if (m_resolvedRhsOperand.toLowerCase().equals("/last")) {
          rhs = structure.attribute(structure.numAttributes() - 1);
        } else {
          // try as an index
          try {
            int indx = Integer.parseInt(m_resolvedRhsOperand);
            indx--;
            rhs = structure.attribute(indx);
          } catch (NumberFormatException ex) {
          }
        }

        if (rhs == null) {
          rhs = structure.attribute(m_resolvedRhsOperand);
        }
        if (rhs == null) {
          throw new IllegalArgumentException(
              "Data does not contain attribute " + "\"" + m_resolvedRhsOperand + "\"");
        }
        m_rhsAttIndex = rhs.index();
      } else if (m_operator != ExpressionType.CONTAINS
          && m_operator != ExpressionType.STARTSWITH
          && m_operator != ExpressionType.ENDSWITH
          && m_operator != ExpressionType.REGEX
          && m_operator != ExpressionType.ISMISSING) {
        // make sure the operand is parseable as a number (unless missing has
        // been specified - equals only)
        if (lhs.isNominal()) {
          m_numericOperand = lhs.indexOfValue(m_resolvedRhsOperand);

          if (m_numericOperand < 0) {
            throw new IllegalArgumentException(
                "Unknown nominal value '"
                    + m_resolvedRhsOperand
                    + "' for attribute '"
                    + lhs.name()
                    + "'");
          }
        } else {
          try {
            m_numericOperand = Double.parseDouble(m_resolvedRhsOperand);
          } catch (NumberFormatException e) {
            throw new IllegalArgumentException(
                "\"" + m_resolvedRhsOperand + "\" is not parseable as a number!");
          }
        }
      }

      if (m_operator == ExpressionType.REGEX) {
        m_regexPattern = Pattern.compile(m_resolvedRhsOperand);
      }
    }
示例#8
0
  private void readHeader() throws IOException {
    m_rowCount = 1;
    m_incrementalReader = null;
    m_current = new ArrayList<Object>();
    openTempFiles();

    m_rowBuffer = new ArrayList<String>();

    String firstRow = m_sourceReader.readLine();
    if (firstRow == null) {
      throw new IOException("No data in the file!");
    }
    if (m_noHeaderRow) {
      m_rowBuffer.add(firstRow);
    }

    ArrayList<Attribute> attribNames = new ArrayList<Attribute>();

    // now tokenize to determine attribute names (or create att names if
    // no header row
    StringReader sr = new StringReader(firstRow + "\n");
    // System.out.print(firstRow + "\n");
    m_st = new StreamTokenizer(sr);
    initTokenizer(m_st);

    m_st.ordinaryChar(m_FieldSeparator.charAt(0));

    int attNum = 1;
    StreamTokenizerUtils.getFirstToken(m_st);
    if (m_st.ttype == StreamTokenizer.TT_EOF) {
      StreamTokenizerUtils.errms(m_st, "premature end of file");
    }
    boolean first = true;
    boolean wasSep;

    while (m_st.ttype != StreamTokenizer.TT_EOL && m_st.ttype != StreamTokenizer.TT_EOF) {
      // Get next token

      if (!first) {
        StreamTokenizerUtils.getToken(m_st);
      }

      if (m_st.ttype == m_FieldSeparator.charAt(0) || m_st.ttype == StreamTokenizer.TT_EOL) {
        wasSep = true;
      } else {
        wasSep = false;

        String attName = null;

        if (m_noHeaderRow) {
          attName = "att" + attNum;
          attNum++;
        } else {
          attName = m_st.sval;
        }

        attribNames.add(new Attribute(attName, (java.util.List<String>) null));
      }
      if (!wasSep) {
        StreamTokenizerUtils.getToken(m_st);
      }
      first = false;
    }
    String relationName;
    if (m_sourceFile != null) {
      relationName = (m_sourceFile.getName()).replaceAll("\\.[cC][sS][vV]$", "");
    } else {
      relationName = "stream";
    }
    m_structure = new Instances(relationName, attribNames, 0);
    m_NominalAttributes.setUpper(m_structure.numAttributes() - 1);
    m_StringAttributes.setUpper(m_structure.numAttributes() - 1);
    m_dateAttributes.setUpper(m_structure.numAttributes() - 1);
    m_numericAttributes.setUpper(m_structure.numAttributes() - 1);
    m_nominalVals = new HashMap<Integer, LinkedHashSet<String>>();

    m_types = new TYPE[m_structure.numAttributes()];
    for (int i = 0; i < m_structure.numAttributes(); i++) {
      if (m_NominalAttributes.isInRange(i)) {
        m_types[i] = TYPE.NOMINAL;
        LinkedHashSet<String> ts = new LinkedHashSet<String>();
        m_nominalVals.put(i, ts);
      } else if (m_StringAttributes.isInRange(i)) {
        m_types[i] = TYPE.STRING;
      } else if (m_dateAttributes.isInRange(i)) {
        m_types[i] = TYPE.DATE;
      } else if (m_numericAttributes.isInRange(i)) {
        m_types[i] = TYPE.NUMERIC;
      } else {
        m_types[i] = TYPE.UNDETERMINED;
      }
    }

    if (m_nominalLabelSpecs.size() > 0) {
      for (String spec : m_nominalLabelSpecs) {
        String[] attsAndLabels = spec.split(":");
        if (attsAndLabels.length == 2) {
          String[] labels = attsAndLabels[1].split(",");
          try {
            // try as a range string first
            Range tempR = new Range();
            tempR.setRanges(attsAndLabels[0].trim());
            tempR.setUpper(m_structure.numAttributes() - 1);

            int[] rangeIndexes = tempR.getSelection();
            for (int i = 0; i < rangeIndexes.length; i++) {
              m_types[rangeIndexes[i]] = TYPE.NOMINAL;
              LinkedHashSet<String> ts = new LinkedHashSet<String>();
              for (String lab : labels) {
                ts.add(lab);
              }
              m_nominalVals.put(rangeIndexes[i], ts);
            }
          } catch (IllegalArgumentException e) {
            // one or more named attributes?
            String[] attNames = attsAndLabels[0].split(",");
            for (String attN : attNames) {
              Attribute a = m_structure.attribute(attN.trim());
              if (a != null) {
                int attIndex = a.index();
                m_types[attIndex] = TYPE.NOMINAL;
                LinkedHashSet<String> ts = new LinkedHashSet<String>();
                for (String lab : labels) {
                  ts.add(lab);
                }
                m_nominalVals.put(attIndex, ts);
              }
            }
          }
        }
      }
    }

    // Prevents the first row from getting lost in the
    // case where there is no header row and we're
    // running in batch mode
    if (m_noHeaderRow && getRetrieval() == BATCH) {
      StreamTokenizer tempT = new StreamTokenizer(new StringReader(firstRow));
      initTokenizer(tempT);
      tempT.ordinaryChar(m_FieldSeparator.charAt(0));
      String checked = getInstance(tempT);
      dumpRow(checked);
    }

    m_st = new StreamTokenizer(m_sourceReader);
    initTokenizer(m_st);
    m_st.ordinaryChar(m_FieldSeparator.charAt(0));

    // try and determine a more accurate structure from the first batch
    readData(false || getRetrieval() == BATCH);
    makeStructure();
  }
示例#9
0
  /**
   * The procedure implementing the SMOTE algorithm. The output instances are pushed onto the output
   * queue for collection.
   *
   * @throws Exception if provided options cannot be executed on input instances
   */
  protected void doSMOTE() throws Exception {
    int minIndex = 0;
    int min = Integer.MAX_VALUE;
    if (m_DetectMinorityClass) {
      // find minority class
      int[] classCounts =
          getInputFormat().attributeStats(getInputFormat().classIndex()).nominalCounts;
      for (int i = 0; i < classCounts.length; i++) {
        if (classCounts[i] != 0 && classCounts[i] < min) {
          min = classCounts[i];
          minIndex = i;
        }
      }
    } else {
      String classVal = getClassValue();
      if (classVal.equalsIgnoreCase("first")) {
        minIndex = 1;
      } else if (classVal.equalsIgnoreCase("last")) {
        minIndex = getInputFormat().numClasses();
      } else {
        minIndex = Integer.parseInt(classVal);
      }
      if (minIndex > getInputFormat().numClasses()) {
        throw new Exception("value index must be <= the number of classes");
      }
      minIndex--; // make it an index
    }

    int nearestNeighbors;
    if (min <= getNearestNeighbors()) {
      nearestNeighbors = min - 1;
    } else {
      nearestNeighbors = getNearestNeighbors();
    }
    if (nearestNeighbors < 1) throw new Exception("Cannot use 0 neighbors!");

    // compose minority class dataset
    // also push all dataset instances
    Instances sample = getInputFormat().stringFreeStructure();
    Enumeration instanceEnum = getInputFormat().enumerateInstances();
    while (instanceEnum.hasMoreElements()) {
      Instance instance = (Instance) instanceEnum.nextElement();
      push((Instance) instance.copy());
      if ((int) instance.classValue() == minIndex) {
        sample.add(instance);
      }
    }

    // compute Value Distance Metric matrices for nominal features
    Map vdmMap = new HashMap();
    Enumeration attrEnum = getInputFormat().enumerateAttributes();
    while (attrEnum.hasMoreElements()) {
      Attribute attr = (Attribute) attrEnum.nextElement();
      if (!attr.equals(getInputFormat().classAttribute())) {
        if (attr.isNominal() || attr.isString()) {
          double[][] vdm = new double[attr.numValues()][attr.numValues()];
          vdmMap.put(attr, vdm);
          int[] featureValueCounts = new int[attr.numValues()];
          int[][] featureValueCountsByClass =
              new int[getInputFormat().classAttribute().numValues()][attr.numValues()];
          instanceEnum = getInputFormat().enumerateInstances();
          while (instanceEnum.hasMoreElements()) {
            Instance instance = (Instance) instanceEnum.nextElement();
            int value = (int) instance.value(attr);
            int classValue = (int) instance.classValue();
            featureValueCounts[value]++;
            featureValueCountsByClass[classValue][value]++;
          }
          for (int valueIndex1 = 0; valueIndex1 < attr.numValues(); valueIndex1++) {
            for (int valueIndex2 = 0; valueIndex2 < attr.numValues(); valueIndex2++) {
              double sum = 0;
              for (int classValueIndex = 0;
                  classValueIndex < getInputFormat().numClasses();
                  classValueIndex++) {
                double c1i = featureValueCountsByClass[classValueIndex][valueIndex1];
                double c2i = featureValueCountsByClass[classValueIndex][valueIndex2];
                double c1 = featureValueCounts[valueIndex1];
                double c2 = featureValueCounts[valueIndex2];
                double term1 = c1i / c1;
                double term2 = c2i / c2;
                sum += Math.abs(term1 - term2);
              }
              vdm[valueIndex1][valueIndex2] = sum;
            }
          }
        }
      }
    }

    // use this random source for all required randomness
    Random rand = new Random(getRandomSeed());

    // find the set of extra indices to use if the percentage is not evenly
    // divisible by 100
    List extraIndices = new LinkedList();
    double percentageRemainder = (getPercentage() / 100) - Math.floor(getPercentage() / 100.0);
    int extraIndicesCount = (int) (percentageRemainder * sample.numInstances());
    if (extraIndicesCount >= 1) {
      for (int i = 0; i < sample.numInstances(); i++) {
        extraIndices.add(i);
      }
    }
    Collections.shuffle(extraIndices, rand);
    extraIndices = extraIndices.subList(0, extraIndicesCount);
    Set extraIndexSet = new HashSet(extraIndices);

    // the main loop to handle computing nearest neighbors and generating SMOTE
    // examples from each instance in the original minority class data
    Instance[] nnArray = new Instance[nearestNeighbors];
    for (int i = 0; i < sample.numInstances(); i++) {
      Instance instanceI = sample.instance(i);
      // find k nearest neighbors for each instance
      List distanceToInstance = new LinkedList();
      for (int j = 0; j < sample.numInstances(); j++) {
        Instance instanceJ = sample.instance(j);
        if (i != j) {
          double distance = 0;
          attrEnum = getInputFormat().enumerateAttributes();
          while (attrEnum.hasMoreElements()) {
            Attribute attr = (Attribute) attrEnum.nextElement();
            if (!attr.equals(getInputFormat().classAttribute())) {
              double iVal = instanceI.value(attr);
              double jVal = instanceJ.value(attr);
              if (attr.isNumeric()) {
                distance += Math.pow(iVal - jVal, 2);
              } else {
                distance += ((double[][]) vdmMap.get(attr))[(int) iVal][(int) jVal];
              }
            }
          }
          distance = Math.pow(distance, .5);
          distanceToInstance.add(new Object[] {distance, instanceJ});
        }
      }

      // sort the neighbors according to distance
      Collections.sort(
          distanceToInstance,
          new Comparator() {
            public int compare(Object o1, Object o2) {
              double distance1 = (Double) ((Object[]) o1)[0];
              double distance2 = (Double) ((Object[]) o2)[0];
              return Double.compare(distance1, distance2);
            }
          });

      // populate the actual nearest neighbor instance array
      Iterator entryIterator = distanceToInstance.iterator();
      int j = 0;
      while (entryIterator.hasNext() && j < nearestNeighbors) {
        nnArray[j] = (Instance) ((Object[]) entryIterator.next())[1];
        j++;
      }

      // create synthetic examples
      int n = (int) Math.floor(getPercentage() / 100);
      while (n > 0 || extraIndexSet.remove(i)) {
        double[] values = new double[sample.numAttributes()];
        int nn = rand.nextInt(nearestNeighbors);
        attrEnum = getInputFormat().enumerateAttributes();
        while (attrEnum.hasMoreElements()) {
          Attribute attr = (Attribute) attrEnum.nextElement();
          if (!attr.equals(getInputFormat().classAttribute())) {
            if (attr.isNumeric()) {
              double dif = nnArray[nn].value(attr) - instanceI.value(attr);
              double gap = rand.nextDouble();
              values[attr.index()] = (instanceI.value(attr) + gap * dif);
            } else if (attr.isDate()) {
              double dif = nnArray[nn].value(attr) - instanceI.value(attr);
              double gap = rand.nextDouble();
              values[attr.index()] = (long) (instanceI.value(attr) + gap * dif);
            } else {
              int[] valueCounts = new int[attr.numValues()];
              int iVal = (int) instanceI.value(attr);
              valueCounts[iVal]++;
              for (int nnEx = 0; nnEx < nearestNeighbors; nnEx++) {
                int val = (int) nnArray[nnEx].value(attr);
                valueCounts[val]++;
              }
              int maxIndex = 0;
              int max = Integer.MIN_VALUE;
              for (int index = 0; index < attr.numValues(); index++) {
                if (valueCounts[index] > max) {
                  max = valueCounts[index];
                  maxIndex = index;
                }
              }
              values[attr.index()] = maxIndex;
            }
          }
        }
        values[sample.classIndex()] = minIndex;
        Instance synthetic = new Instance(1.0, values);
        push(synthetic);
        n--;
      }
    }
  }