Esempio n. 1
0
  /**
   * Input an instance for filtering. Ordinarily the instance is processed and made available for
   * output immediately. Some filters require all instances be read before producing output.
   *
   * @param instance the input instance
   * @return true if the filtered instance may now be collected with output().
   * @exception IllegalStateException if no input format has been defined.
   * @exception Exception if there was a problem during the filtering.
   */
  public boolean input(Instance instance) throws Exception {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }

    double[] vals = new double[instance.numAttributes() + 1];
    for (int i = 0; i < instance.numAttributes(); i++) {
      if (instance.isMissing(i)) {
        vals[i] = Instance.missingValue();
      } else {
        vals[i] = instance.value(i);
      }
    }

    evaluateExpression(vals);

    Instance inst = null;
    if (instance instanceof SparseInstance) {
      inst = new SparseInstance(instance.weight(), vals);
    } else {
      inst = new Instance(instance.weight(), vals);
    }
    copyStringValues(inst, false, instance.dataset(), getOutputFormat());
    inst.setDataset(getOutputFormat());
    push(inst);
    return true;
  }
Esempio n. 2
0
 @Override
 public double[] recommendRanking(Instance inst) throws Exception {
   double[] features = new double[inst.numAttributes() - 1];
   double[] ranking;
   for (int i = 0; i < inst.numAttributes() - 1; i++) {
     features[i] = inst.value(i);
   }
   ranking = m_LRT.lrTr.getLabelRanking(features);
   return ranking;
 }
Esempio n. 3
0
  /**
   * Convert a single instance over. The converted instance is added to the end of the output queue.
   *
   * @param instance the instance to convert
   */
  private void convertInstance(Instance instance) {
    Instance inst = null;

    if (instance instanceof SparseInstance) {
      double[] newVals = new double[instance.numAttributes()];
      int[] newIndices = new int[instance.numAttributes()];
      double[] vals = instance.toDoubleArray();
      int ind = 0;
      for (int j = 0; j < instance.numAttributes(); j++) {
        double value;
        if (instance.attribute(j).isNumeric()
            && (!Instance.isMissingValue(vals[j]))
            && (getInputFormat().classIndex() != j)) {

          value = vals[j] - m_Means[j];
          if (value != 0.0) {
            newVals[ind] = value;
            newIndices[ind] = j;
            ind++;
          }
        } else {
          value = vals[j];
          if (value != 0.0) {
            newVals[ind] = value;
            newIndices[ind] = j;
            ind++;
          }
        }
      }
      double[] tempVals = new double[ind];
      int[] tempInd = new int[ind];
      System.arraycopy(newVals, 0, tempVals, 0, ind);
      System.arraycopy(newIndices, 0, tempInd, 0, ind);
      inst = new SparseInstance(instance.weight(), tempVals, tempInd, instance.numAttributes());
    } else {
      double[] vals = instance.toDoubleArray();
      for (int j = 0; j < getInputFormat().numAttributes(); j++) {
        if (instance.attribute(j).isNumeric()
            && (!Instance.isMissingValue(vals[j]))
            && (getInputFormat().classIndex() != j)) {
          vals[j] = (vals[j] - m_Means[j]);
        }
      }
      inst = new Instance(instance.weight(), vals);
    }

    inst.setDataset(instance.dataset());

    push(inst);
  }
Esempio n. 4
0
 public double[] normalizedInstance(Instance inst) {
   // Normalize Instance
   double[] normalizedInstance = new double[inst.numAttributes()];
   for (int j = 0; j < inst.numAttributes() - 1; j++) {
     int instAttIndex = modelAttIndexToInstanceAttIndex(j, inst);
     double mean = perceptronattributeStatistics.getValue(j) / perceptronYSeen;
     double sd =
         computeSD(
             squaredperceptronattributeStatistics.getValue(j),
             perceptronattributeStatistics.getValue(j),
             perceptronYSeen);
     if (sd > SD_THRESHOLD) normalizedInstance[j] = (inst.value(instAttIndex) - mean) / sd;
     else normalizedInstance[j] = inst.value(instAttIndex) - mean;
   }
   return normalizedInstance;
 }
  /**
   * Compare two datasets to see if they differ.
   *
   * @param data1 one set of instances
   * @param data2 the other set of instances
   * @throws Exception if the datasets differ
   */
  protected void compareDatasets(Instances data1, Instances data2) throws Exception {

    if (m_CheckHeader) {
      if (!data2.equalHeaders(data1)) {
        throw new Exception("header has been modified\n" + data2.equalHeadersMsg(data1));
      }
    }
    if (!(data2.numInstances() == data1.numInstances())) {
      throw new Exception("number of instances has changed");
    }
    for (int i = 0; i < data2.numInstances(); i++) {
      Instance orig = data1.instance(i);
      Instance copy = data2.instance(i);
      for (int j = 0; j < orig.numAttributes(); j++) {
        if (orig.isMissing(j)) {
          if (!copy.isMissing(j)) {
            throw new Exception("instances have changed");
          }
        } else {
          if (m_CompareValuesAsString) {
            if (!orig.toString(j).equals(copy.toString(j))) {
              throw new Exception("instances have changed");
            }
          } else {
            if (Math.abs(orig.value(j) - copy.value(j)) > m_MaxDiffValues) {
              throw new Exception("instances have changed");
            }
          }
        }
        if (Math.abs(orig.weight() - copy.weight()) > m_MaxDiffWeights) {
          throw new Exception("instance weights have changed");
        }
      }
    }
  }
Esempio n. 6
0
  @Override
  public void buildClassifier(Instances data) throws Exception {
    trainingData = data;
    Attribute classAttribute = data.classAttribute();
    prototypes = new ArrayList<>();

    classedData = new HashMap<String, ArrayList<Sequence>>();
    indexClassedDataInFullData = new HashMap<String, ArrayList<Integer>>();
    for (int c = 0; c < data.numClasses(); c++) {
      classedData.put(data.classAttribute().value(c), new ArrayList<Sequence>());
      indexClassedDataInFullData.put(data.classAttribute().value(c), new ArrayList<Integer>());
    }

    sequences = new Sequence[data.numInstances()];
    classMap = new String[sequences.length];
    for (int i = 0; i < sequences.length; i++) {
      Instance sample = data.instance(i);
      MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
      int shift = (sample.classIndex() == 0) ? 1 : 0;
      for (int t = 0; t < sequence.length; t++) {
        sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
      }
      sequences[i] = new Sequence(sequence);
      String clas = sample.stringValue(classAttribute);
      classMap[i] = clas;
      classedData.get(clas).add(sequences[i]);
      indexClassedDataInFullData.get(clas).add(i);
      //			System.out.println("Element "+i+" of train is classed "+clas+" and went to element
      // "+(indexClassedDataInFullData.get(clas).size()-1));
    }
    buildSpecificClassifier(data);
  }
  /**
   * Convert an input instance
   *
   * @param current the input instance to convert
   * @return a transformed instance
   * @throws Exception if a problem occurs
   */
  protected Instance convertInstance(Instance current) throws Exception {
    double[] vals = new double[getOutputFormat().numAttributes()];
    int index = 0;
    for (int j = 0; j < current.numAttributes(); j++) {
      if (j != current.classIndex()) {
        if (m_unchanged != null && m_unchanged.attribute(current.attribute(j).name()) != null) {
          vals[index++] = current.value(j);
        } else {
          Estimator[] estForAtt = m_estimatorLookup.get(current.attribute(j).name());
          for (int k = 0; k < current.classAttribute().numValues(); k++) {
            if (current.isMissing(j)) {
              vals[index++] = Utils.missingValue();
            } else {
              double e = estForAtt[k].getProbability(current.value(j));
              vals[index++] = e;
            }
          }
        }
      }
    }

    vals[vals.length - 1] = current.classValue();
    DenseInstance instNew = new DenseInstance(current.weight(), vals);

    return instNew;
  }
  /**
   * Adds the prediction intervals as additional attributes at the end. Since classifiers can
   * returns varying number of intervals per instance, the dataset is filled with missing values for
   * non-existing intervals.
   */
  protected void addPredictionIntervals() {
    int maxNum;
    int num;
    int i;
    int n;
    FastVector preds;
    FastVector atts;
    Instances data;
    Instance inst;
    Instance newInst;
    double[] values;
    double[][] predInt;

    // determine the maximum number of intervals
    maxNum = 0;
    preds = m_Evaluation.predictions();
    for (i = 0; i < preds.size(); i++) {
      num = ((NumericPrediction) preds.elementAt(i)).predictionIntervals().length;
      if (num > maxNum) maxNum = num;
    }

    // create new header
    atts = new FastVector();
    for (i = 0; i < m_PlotInstances.numAttributes(); i++)
      atts.addElement(m_PlotInstances.attribute(i));
    for (i = 0; i < maxNum; i++) {
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-lowerBoundary"));
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-upperBoundary"));
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-width"));
    }
    data = new Instances(m_PlotInstances.relationName(), atts, m_PlotInstances.numInstances());
    data.setClassIndex(m_PlotInstances.classIndex());

    // update data
    for (i = 0; i < m_PlotInstances.numInstances(); i++) {
      inst = m_PlotInstances.instance(i);
      // copy old values
      values = new double[data.numAttributes()];
      System.arraycopy(inst.toDoubleArray(), 0, values, 0, inst.numAttributes());
      // add interval data
      predInt = ((NumericPrediction) preds.elementAt(i)).predictionIntervals();
      for (n = 0; n < maxNum; n++) {
        if (n < predInt.length) {
          values[m_PlotInstances.numAttributes() + n * 3 + 0] = predInt[n][0];
          values[m_PlotInstances.numAttributes() + n * 3 + 1] = predInt[n][1];
          values[m_PlotInstances.numAttributes() + n * 3 + 2] = predInt[n][1] - predInt[n][0];
        } else {
          values[m_PlotInstances.numAttributes() + n * 3 + 0] = Utils.missingValue();
          values[m_PlotInstances.numAttributes() + n * 3 + 1] = Utils.missingValue();
          values[m_PlotInstances.numAttributes() + n * 3 + 2] = Utils.missingValue();
        }
      }
      // create new Instance
      newInst = new DenseInstance(inst.weight(), values);
      data.add(newInst);
    }

    m_PlotInstances = data;
  }
  /**
   * @param outFilePath full path to the output file
   * @param data the instances object containing the data on which the quantizer is learner
   * @param numClusters the number of clusters in k-means
   * @param maxIterations the maximum number of k-means iterations
   * @param seed the seed given to k-means
   * @param numSlots the number of execution slots to use (>1 = parallel execution)
   * @param kMeansPlusPlus whether to use kmeans++ for the initialization of the centroids
   *     (true/false)
   * @throws Exception
   */
  public static void learnAndWriteQuantizer(
      String outFilePath,
      Instances data,
      int numClusters,
      int maxIterations,
      int seed,
      int numSlots,
      boolean kMeansPlusPlus)
      throws Exception {
    System.out.println("--" + data.numInstances() + " vectors loaded--");
    System.out.println("Vector dimensionality: " + data.numAttributes());
    System.out.println("Clustering settings:");
    System.out.println("Num clusters: " + numClusters);
    System.out.println("Max iterations: " + maxIterations);
    System.out.println("Seed: " + seed);

    System.out.println("Clustering started");
    long start = System.currentTimeMillis();
    // create a new instance for the Clusterer
    SimpleKMeansWithOutput clusterer = new SimpleKMeansWithOutput();
    clusterer.setInitializeUsingKMeansPlusPlusMethod(kMeansPlusPlus);
    clusterer.setSeed(seed);
    clusterer.setNumClusters(numClusters);
    clusterer.setMaxIterations(maxIterations);
    clusterer.setNumExecutionSlots(numSlots);
    clusterer.setFastDistanceCalc(false);
    // build the clusterer
    clusterer.buildClusterer(data);
    long end = System.currentTimeMillis();
    System.out.println("Clustering completed in " + (end - start) + " ms");

    System.out.println("Writing quantizer in file");
    // create a new file to store the codebook
    BufferedWriter out = new BufferedWriter(new FileWriter(new File(outFilePath)));
    // write the results of the clustering to the new file (csv formated)
    Instances clusterCentroids = clusterer.getClusterCentroids();
    for (int j = 0; j < clusterCentroids.numInstances(); j++) {
      Instance centroid = clusterCentroids.instance(j);
      for (int k = 0; k < centroid.numAttributes() - 1; k++) {
        out.write(centroid.value(k) + ",");
      }
      out.write(centroid.value(centroid.numAttributes() - 1) + "\n");
    }
    out.close();
  }
Esempio n. 10
0
  public double updateWeights(Instance inst, double learningRatio) {
    // Normalize Instance
    double[] normalizedInstance = normalizedInstance(inst);
    // Compute the Normalized Prediction of Perceptron
    double normalizedPredict = prediction(normalizedInstance);
    double normalizedY = normalizeActualClassValue(inst);
    double sumWeights = 0.0;
    double delta = normalizedY - normalizedPredict;

    for (int j = 0; j < inst.numAttributes() - 1; j++) {
      int instAttIndex = modelAttIndexToInstanceAttIndex(j, inst);
      if (inst.attribute(instAttIndex).isNumeric()) {
        this.weightAttribute[j] += learningRatio * delta * normalizedInstance[j];
        sumWeights += Math.abs(this.weightAttribute[j]);
      }
    }
    this.weightAttribute[inst.numAttributes() - 1] += learningRatio * delta;
    sumWeights += Math.abs(this.weightAttribute[inst.numAttributes() - 1]);
    if (sumWeights > inst.numAttributes()) { // Lasso regression
      for (int j = 0; j < inst.numAttributes() - 1; j++) {
        int instAttIndex = modelAttIndexToInstanceAttIndex(j, inst);
        if (inst.attribute(instAttIndex).isNumeric()) {
          this.weightAttribute[j] = this.weightAttribute[j] / sumWeights;
        }
      }
      this.weightAttribute[inst.numAttributes() - 1] =
          this.weightAttribute[inst.numAttributes() - 1] / sumWeights;
    }

    return denormalizedPrediction(normalizedPredict);
  }
  /**
   * Input an instance for filtering. Ordinarily the instance is processed and made available for
   * output immediately. Some filters require all instances be read before producing output.
   *
   * @param instance the input instance
   * @return true if the filtered instance may now be collected with output().
   * @throws IllegalStateException if no input structure has been defined.
   */
  @Override
  public boolean input(Instance instance) {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }

    if (getOutputFormat().numAttributes() == 0) {
      return false;
    }

    if (m_selectedAttributes.length == 0) {
      push(instance);
    } else {
      double vals[] = new double[getOutputFormat().numAttributes()];
      for (int i = 0; i < instance.numAttributes(); i++) {
        double currentV = instance.value(i);

        if (!m_selectedCols.isInRange(i)) {
          vals[i] = currentV;
        } else {
          if (currentV == Utils.missingValue()) {
            vals[i] = currentV;
          } else {
            String currentS = instance.attribute(i).value((int) currentV);
            String replace =
                m_ignoreCase ? m_renameMap.get(currentS.toLowerCase()) : m_renameMap.get(currentS);
            if (replace == null) {
              vals[i] = currentV;
            } else {
              vals[i] = getOutputFormat().attribute(i).indexOfValue(replace);
            }
          }
        }
      }

      Instance inst = null;
      if (instance instanceof SparseInstance) {
        inst = new SparseInstance(instance.weight(), vals);
      } else {
        inst = new DenseInstance(instance.weight(), vals);
      }
      inst.setDataset(getOutputFormat());
      copyValues(inst, false, instance.dataset(), getOutputFormat());
      inst.setDataset(getOutputFormat());
      push(inst);
    }

    return true;
  }
Esempio n. 12
0
  /**
   * processes the given instance (may change the provided instance) and returns the modified
   * version.
   *
   * @param instance the instance to process
   * @return the modified data
   * @throws Exception in case the processing goes wrong
   */
  protected Instance process(Instance instance) throws Exception {
    Instance result;
    Attribute att;
    double[] values;
    int i;

    // adjust indices
    values = new double[instance.numAttributes()];
    for (i = 0; i < instance.numAttributes(); i++) {
      att = instance.attribute(i);
      if (!att.isNominal() || !m_AttributeIndices.isInRange(i) || instance.isMissing(i))
        values[i] = instance.value(i);
      else values[i] = m_NewOrder[i][(int) instance.value(i)];
    }

    // create new instance
    result = new DenseInstance(instance.weight(), values);

    return result;
  }
Esempio n. 13
0
  /**
   * Inserts an instance into the hash table
   *
   * @param inst instance to be inserted
   * @param instA to create the hash key from
   * @throws Exception if the instance can't be inserted
   */
  private void insertIntoTable(Instance inst, double[] instA) throws Exception {

    double[] tempClassDist2;
    double[] newDist;
    DecisionTableHashKey thekey;

    if (instA != null) {
      thekey = new DecisionTableHashKey(instA);
    } else {
      thekey = new DecisionTableHashKey(inst, inst.numAttributes(), false);
    }

    // see if this one is already in the table
    tempClassDist2 = (double[]) m_entries.get(thekey);
    if (tempClassDist2 == null) {
      if (m_classIsNominal) {
        newDist = new double[m_theInstances.classAttribute().numValues()];

        // Leplace estimation
        for (int i = 0; i < m_theInstances.classAttribute().numValues(); i++) {
          newDist[i] = 1.0;
        }

        newDist[(int) inst.classValue()] = inst.weight();

        // add to the table
        m_entries.put(thekey, newDist);
      } else {
        newDist = new double[2];
        newDist[0] = inst.classValue() * inst.weight();
        newDist[1] = inst.weight();

        // add to the table
        m_entries.put(thekey, newDist);
      }
    } else {

      // update the distribution for this instance
      if (m_classIsNominal) {
        tempClassDist2[(int) inst.classValue()] += inst.weight();

        // update the table
        m_entries.put(thekey, tempClassDist2);
      } else {
        tempClassDist2[0] += (inst.classValue() * inst.weight());
        tempClassDist2[1] += inst.weight();

        // update the table
        m_entries.put(thekey, tempClassDist2);
      }
    }
  }
Esempio n. 14
0
  /** Update the model using the provided instance */
  public void trainOnInstanceImpl(Instance inst) {
    accumulatedError =
        Math.abs(this.prediction(inst) - inst.classValue()) + fadingFactor * accumulatedError;
    nError = 1 + fadingFactor * nError;
    // Initialise Perceptron if necessary
    if (this.initialisePerceptron == true) {
      this.fadingFactor = this.fadingFactorOption.getValue();
      this.classifierRandom.setSeed(randomSeedOption.getValue());
      this.initialisePerceptron = false; // not in resetLearningImpl() because it needs Instance!
      this.weightAttribute = new double[inst.numAttributes()];
      for (int j = 0; j < inst.numAttributes(); j++) {
        weightAttribute[j] = 2 * this.classifierRandom.nextDouble() - 1;
      }
      // Update Learning Rate
      learningRatio = learningRatioOption.getValue();
      this.learningRateDecay = learningRateDecayOption.getValue();
    }

    // Update attribute statistics
    this.perceptronInstancesSeen++;
    this.perceptronYSeen++;

    for (int j = 0; j < inst.numAttributes() - 1; j++) {
      perceptronattributeStatistics.addToValue(j, inst.value(j));
      squaredperceptronattributeStatistics.addToValue(j, inst.value(j) * inst.value(j));
    }
    this.perceptronsumY += inst.classValue();
    this.squaredperceptronsumY += inst.classValue() * inst.classValue();

    if (constantLearningRatioDecayOption.isSet() == false) {
      learningRatio =
          learningRatioOption.getValue() / (1 + perceptronInstancesSeen * learningRateDecay);
    }

    // double prediction = this.updateWeights(inst,learningRatio);
    // accumulatedError= Math.abs(prediction-inst.classValue()) + fadingFactor*accumulatedError;

    this.updateWeights(inst, learningRatio);
  }
Esempio n. 15
0
  /**
   * Checks if an instance contains an item set.
   *
   * @param instance the instance to be tested
   * @return true if the given instance contains this item set
   */
  public boolean containedByTreatZeroAsMissing(Instance instance) {

    if (instance instanceof weka.core.SparseInstance) {
      int numInstVals = instance.numValues();
      int numItemSetVals = m_items.length;

      for (int p1 = 0, p2 = 0; p1 < numInstVals || p2 < numItemSetVals; ) {
        int instIndex = Integer.MAX_VALUE;
        if (p1 < numInstVals) {
          instIndex = instance.index(p1);
        }
        int itemIndex = p2;

        if (m_items[itemIndex] > -1) {
          if (itemIndex != instIndex) {
            return false;
          } else {
            if (instance.isMissingSparse(p1)) {
              return false;
            }
            if (m_items[itemIndex] != (int) instance.valueSparse(p1)) {
              return false;
            }
          }

          p1++;
          p2++;
        } else {
          if (itemIndex < instIndex) {
            p2++;
          } else if (itemIndex == instIndex) {
            p2++;
            p1++;
          }
        }
      }
    } else {
      for (int i = 0; i < instance.numAttributes(); i++) {
        if (m_items[i] > -1) {
          if (instance.isMissing(i) || (int) instance.value(i) == 0) {
            return false;
          }
          if (m_items[i] != (int) instance.value(i)) {
            return false;
          }
        }
      }
    }

    return true;
  }
Esempio n. 16
0
  /**
   * Checks if an instance contains an item set.
   *
   * @param instance the instance to be tested
   * @return true if the given instance contains this item set
   */
  public boolean containedBy(Instance instance) {
    for (int i = 0; i < instance.numAttributes(); i++) {
      if (m_items[i] > -1) {
        if (instance.isMissing(i)) {
          return false;
        }
        if (m_items[i] != (int) instance.value(i)) {
          return false;
        }
      }
    }

    return true;
  }
Esempio n. 17
0
  /**
   * turns the instance into a libsvm row
   *
   * @param inst the instance to transform
   * @return the generated libsvm row
   */
  protected String instanceToLibsvm(Instance inst) {
    StringBuffer result;
    int i;

    // class
    result = new StringBuffer("" + inst.classValue());

    // attributes
    for (i = 0; i < inst.numAttributes(); i++) {
      if (i == inst.classIndex()) continue;
      if (inst.value(i) == 0) continue;
      result.append(" " + (i + 1) + ":" + inst.value(i));
    }

    return result.toString();
  }
Esempio n. 18
0
  /**
   * Calculate the dependent value for a given instance for a given regression model.
   *
   * @param transformedInstance the input instance
   * @param selectedAttributes an array of flags indicating which attributes are included in the
   *     regression model
   * @param coefficients an array of coefficients for the regression model
   * @return the regression value for the instance.
   * @throws Exception if the class attribute of the input instance is not assigned
   */
  private double regressionPrediction(
      Instance transformedInstance, boolean[] selectedAttributes, double[] coefficients)
      throws Exception {

    double result = 0;
    int column = 0;
    for (int j = 0; j < transformedInstance.numAttributes(); j++) {
      if ((m_ClassIndex != j) && (selectedAttributes[j])) {
        result += coefficients[column] * transformedInstance.value(j);
        column++;
      }
    }
    result += coefficients[column];

    return result;
  }
Esempio n. 19
0
  /**
   * Calculates the class membership probabilities for the given test instance.
   *
   * @param instance the instance to be classified
   * @return predicted class probability distribution
   * @throws Exception if distribution can't be computed
   */
  public double[] distributionForInstance(Instance instance) throws Exception {

    DecisionTableHashKey thekey;
    double[] tempDist;
    double[] normDist;

    m_disTransform.input(instance);
    m_disTransform.batchFinished();
    instance = m_disTransform.output();

    m_delTransform.input(instance);
    m_delTransform.batchFinished();
    instance = m_delTransform.output();

    thekey = new DecisionTableHashKey(instance, instance.numAttributes(), false);

    // if this one is not in the table
    if ((tempDist = (double[]) m_entries.get(thekey)) == null) {
      if (m_useIBk) {
        tempDist = m_ibk.distributionForInstance(instance);
      } else {
        if (!m_classIsNominal) {
          tempDist = new double[1];
          tempDist[0] = m_majority;
        } else {
          tempDist = m_classPriors.clone();
          /*tempDist = new double [m_theInstances.classAttribute().numValues()];
          tempDist[(int)m_majority] = 1.0; */
        }
      }
    } else {
      if (!m_classIsNominal) {
        normDist = new double[1];
        normDist[0] = (tempDist[0] / tempDist[1]);
        tempDist = normDist;
      } else {

        // normalise distribution
        normDist = new double[tempDist.length];
        System.arraycopy(tempDist, 0, normDist, 0, tempDist.length);
        Utils.normalize(normDist);
        tempDist = normDist;
      }
    }
    return tempDist;
  }
Esempio n. 20
0
  /**
   * Updates the minimum and maximum values for all the attributes based on a new instance.
   *
   * @param instance the new instance
   */
  private void updateMinMax(Instance instance) {

    for (int j = 0; j < instance.numAttributes(); j++) {
      if (Double.isNaN(m_Min[j])) {
        m_Min[j] = instance.value(j);
        m_Max[j] = instance.value(j);
      } else {
        if (instance.value(j) < m_Min[j]) {
          m_Min[j] = instance.value(j);
        } else {
          if (instance.value(j) > m_Max[j]) {
            m_Max[j] = instance.value(j);
          }
        }
      }
    }
  }
Esempio n. 21
0
  @Override
  public void trainOnInstanceImpl(Instance inst) {

    if (this.initialized == false) {
      this.dimension = inst.numAttributes();
      manager =
          new BucketManager(this.length, this.dimension, this.coresetsize, this.clustererRandom);
      this.initialized = true;
    }

    manager.insertPoint(new Point(inst, this.numberInstances));

    this.numberInstances++;
    if (this.numberInstances % widthOption.getValue() == 0) {

      Point[] streamingCoreset = manager.getCoresetFromManager(dimension);

      // compute 5 clusterings of the coreset with kMeans++ and take the best
      double minCost = 0.0;
      double curCost = 0.0;

      minCost =
          lloydPlusPlus(
              numberOfCentres, coresetsize, dimension, streamingCoreset, centresStreamingCoreset);
      curCost = minCost;

      for (int i = 1; i < 5; i++) {
        Point[] tmpCentresStreamingCoreset = new Point[0];
        curCost =
            lloydPlusPlus(
                numberOfCentres,
                coresetsize,
                dimension,
                streamingCoreset,
                tmpCentresStreamingCoreset);
        if (curCost < minCost) {
          minCost = curCost;
          centresStreamingCoreset = tmpCentresStreamingCoreset;
        }
      }
    }
  }
Esempio n. 22
0
  public double classifyInstance(Instance sample) throws Exception {
    // transform instance to sequence
    MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
    int shift = (sample.classIndex() == 0) ? 1 : 0;
    for (int t = 0; t < sequence.length; t++) {
      sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
    }
    Sequence seq = new Sequence(sequence);

    double minD = Double.MAX_VALUE;
    String classValue = null;
    for (ClassedSequence s : prototypes) {
      double tmpD = seq.distance(s.sequence);
      if (tmpD < minD) {
        minD = tmpD;
        classValue = s.classValue;
      }
    }
    // System.out.println(prototypes.size());
    return sample.classAttribute().indexOfValue(classValue);
  }
  /**
   * test on one sample
   *
   * @param sample
   * @return p(y|sample) forall y
   * @throws Exception
   */
  public double classifyInstance(Instance sample) throws Exception {
    // transform instance to sequence
    MonoDoubleItemSet[] sequence = new MonoDoubleItemSet[sample.numAttributes() - 1];
    int shift = (sample.classIndex() == 0) ? 1 : 0;
    for (int t = 0; t < sequence.length; t++) {
      sequence[t] = new MonoDoubleItemSet(sample.value(t + shift));
    }
    Sequence seq = new Sequence(sequence);

    // for each class
    String classValue = null;
    double maxProb = 0.0;
    double[] pr = new double[classedData.keySet().size()];
    for (String clas : classedData.keySet()) {
      int c = trainingData.classAttribute().indexOfValue(clas);
      double prob = 0.0;
      for (int k = 0; k < centroidsPerClass[c].length; k++) {
        // compute P(Q|k_c)
        if (sigmasPerClass[c][k] == Double.NaN || sigmasPerClass[c][k] == 0) {
          System.err.println("sigma=NAN||sigma=0");
          continue;
        }
        double dist = seq.distanceEuc(centroidsPerClass[c][k]);
        double p = computeProbaForQueryAndCluster(sigmasPerClass[c][k], dist);
        prob += p / centroidsPerClass[c].length;
        //				prob += p*prior[c][k];
        if (p > maxProb) {
          maxProb = p;
          classValue = clas;
        }
      }
      //			if (prob > maxProb) {
      //				maxProb = prob;
      //				classValue = clas;
      //			}
    }
    //		System.out.println(Arrays.toString(pr));
    //		System.out.println(classValue);
    return sample.classAttribute().indexOfValue(classValue);
  }
  /**
   * Compare two datasets to see if they differ.
   *
   * @param data1 one set of instances
   * @param data2 the other set of instances
   * @throws Exception if the datasets differ
   */
  protected void compareDatasets(Instances data1, Instances data2) throws Exception {

    if (data1.numAttributes() != data2.numAttributes())
      throw new Exception("number of attributes has changed");

    if (!(data2.numInstances() == data1.numInstances()))
      throw new Exception("number of instances has changed");

    for (int i = 0; i < data2.numInstances(); i++) {
      Instance orig = data1.instance(i);
      Instance copy = data2.instance(i);
      for (int j = 0; j < orig.numAttributes(); j++) {
        if (orig.isMissing(j)) {
          if (!copy.isMissing(j)) throw new Exception("instances have changed");
        } else if (!orig.toString(j).equals(copy.toString(j))) {
          throw new Exception("instances have changed");
        }

        if (orig.weight() != copy.weight()) throw new Exception("instance weights have changed");
      }
    }
  }
Esempio n. 25
0
  @Override
  public void updateNode(Instance inst) throws Exception {
    super.updateDistribution(inst);

    for (int i = 0; i < inst.numAttributes(); i++) {
      Attribute a = inst.attribute(i);
      if (i != inst.classIndex()) {
        ConditionalSufficientStats stats = m_nodeStats.get(a.name());
        if (stats == null) {
          if (a.isNumeric()) {
            stats = new GaussianConditionalSufficientStats();
          } else {
            stats = new NominalConditionalSufficientStats();
          }
          m_nodeStats.put(a.name(), stats);
        }

        stats.update(
            inst.value(a), inst.classAttribute().value((int) inst.classValue()), inst.weight());
      }
    }
  }
Esempio n. 26
0
 public Map<FV, Collection<FV>> extractValuesFromData(Instances inst) {
   Multimap<FV, FV> fv_list = ArrayListMultimap.create();
   // Instances outFormat = getOutputFormat();
   for (int i = 0; i < inst.numInstances(); i++) {
     Instance ins = inst.instance(i);
     // Skip the class label
     for (int x = 0; x < ins.numAttributes() - 1; x++) {
       Object value = null;
       try {
         value = ins.stringValue(x);
       } catch (Exception e) {
         value = ins.value(x);
       }
       FV fv = new FV(x, value, ins.classValue());
       fv.setNumLabels(inst.numClasses());
       if (!fv_list.put(fv, fv)) {
         System.err.println("Couldn't put duplicates: " + fv);
       }
     }
   }
   Map<FV, Collection<FV>> original_map = fv_list.asMap();
   return original_map;
 }
Esempio n. 27
0
  /**
   * Input an instance for filtering. The instance is processed and made available for output
   * immediately.
   *
   * @param instance the input instance.
   * @return true if the filtered instance may now be collected with output().
   * @throws IllegalStateException if no input structure has been defined.
   */
  public boolean input(Instance instance) {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }

    if (isOutputFormatDefined()) {
      Instance newInstance = (Instance) instance.copy();

      // make sure that we get the right indexes set for the converted
      // string attributes when operating on a second batch of instances
      for (int i = 0; i < newInstance.numAttributes(); i++) {
        if (newInstance.attribute(i).isString()
            && !newInstance.isMissing(i)
            && m_AttIndices.isInRange(i)) {
          Attribute outAtt = getOutputFormat().attribute(newInstance.attribute(i).name());
          String inVal = newInstance.stringValue(i);
          int outIndex = outAtt.indexOfValue(inVal);
          if (outIndex < 0) {
            newInstance.setMissing(i);
          } else {
            newInstance.setValue(i, outIndex);
          }
        }
      }
      push(newInstance);
      return true;
    }

    bufferInput(instance);
    return false;
  }
  protected void tokenizeInstance(Instance instance, boolean updateDictionary) {
    if (m_inputVector == null) {
      m_inputVector = new LinkedHashMap<String, Count>();
    } else {
      m_inputVector.clear();
    }

    if (m_useStopList && m_stopwords == null) {
      m_stopwords = new Stopwords();
      try {
        if (getStopwords().exists() && !getStopwords().isDirectory()) {
          m_stopwords.read(getStopwords());
        }
      } catch (Exception ex) {
        ex.printStackTrace();
      }
    }

    for (int i = 0; i < instance.numAttributes(); i++) {
      if (instance.attribute(i).isString() && !instance.isMissing(i)) {
        m_tokenizer.tokenize(instance.stringValue(i));

        while (m_tokenizer.hasMoreElements()) {
          String word = m_tokenizer.nextElement();
          if (m_lowercaseTokens) {
            word = word.toLowerCase();
          }

          word = m_stemmer.stem(word);

          if (m_useStopList) {
            if (m_stopwords.is(word)) {
              continue;
            }
          }

          Count docCount = m_inputVector.get(word);
          if (docCount == null) {
            m_inputVector.put(word, new Count(instance.weight()));
          } else {
            docCount.m_count += instance.weight();
          }
        }
      }
    }

    if (updateDictionary) {
      int classValue = (int) instance.classValue();
      LinkedHashMap<String, Count> dictForClass = m_probOfWordGivenClass.get(classValue);

      // document normalization
      double iNorm = 0;
      double fv = 0;

      if (m_normalize) {
        for (Count c : m_inputVector.values()) {
          // word counts or bag-of-words?
          fv = (m_wordFrequencies) ? c.m_count : 1.0;
          iNorm += Math.pow(Math.abs(fv), m_lnorm);
        }
        iNorm = Math.pow(iNorm, 1.0 / m_lnorm);
      }

      for (Map.Entry<String, Count> feature : m_inputVector.entrySet()) {
        String word = feature.getKey();
        double freq = (m_wordFrequencies) ? feature.getValue().m_count : 1.0;
        // double freq = (feature.getValue().m_count / iNorm * m_norm);

        if (m_normalize) {
          freq /= (iNorm * m_norm);
        }

        // check all classes
        for (int i = 0; i < m_data.numClasses(); i++) {
          LinkedHashMap<String, Count> dict = m_probOfWordGivenClass.get(i);
          if (dict.get(word) == null) {
            dict.put(word, new Count(m_leplace));
            m_wordsPerClass[i] += m_leplace;
          }
        }

        Count dictCount = dictForClass.get(word);
        /*
         * if (dictCount == null) { dictForClass.put(word, new Count(m_leplace +
         * freq)); m_wordsPerClass[classValue] += (m_leplace + freq); } else {
         */
        dictCount.m_count += freq;
        m_wordsPerClass[classValue] += freq;
        // }
      }

      pruneDictionary();
    }
  }
Esempio n. 29
0
  /**
   * Saves an instances incrementally. Structure has to be set by using the setStructure() method or
   * setInstances() method.
   *
   * @param inst the instance to save
   * @throws IOException throws IOEXception if an instance cannot be saved incrementally.
   */
  public void writeIncremental(Instance inst) throws IOException {

    int writeMode = getWriteMode();
    Instances structure = getInstances();
    PrintWriter outW = null;

    if (structure != null) {
      if (structure.classIndex() == -1) {
        structure.setClassIndex(structure.numAttributes() - 1);
        System.err.println("No class specified. Last attribute is used as class attribute.");
      }
      if (structure.attribute(structure.classIndex()).isNumeric())
        throw new IOException("To save in C4.5 format the class attribute cannot be numeric.");
    }
    if (getRetrieval() == BATCH || getRetrieval() == NONE)
      throw new IOException("Batch and incremental saving cannot be mixed.");
    if (retrieveFile() == null || getWriter() == null) {
      throw new IOException(
          "C4.5 format requires two files. Therefore no output to standard out can be generated.\nPlease specifiy output files using the -o option.");
    }

    outW = new PrintWriter(getWriter());

    if (writeMode == WAIT) {
      if (structure == null) {
        setWriteMode(CANCEL);
        if (inst != null)
          System.err.println("Structure(Header Information) has to be set in advance");
      } else setWriteMode(STRUCTURE_READY);
      writeMode = getWriteMode();
    }
    if (writeMode == CANCEL) {
      if (outW != null) outW.close();
      cancel();
    }
    if (writeMode == STRUCTURE_READY) {
      setWriteMode(WRITE);
      // write header: here names file
      for (int i = 0; i < structure.attribute(structure.classIndex()).numValues(); i++) {
        outW.write(structure.attribute(structure.classIndex()).value(i));
        if (i < structure.attribute(structure.classIndex()).numValues() - 1) {
          outW.write(",");
        } else {
          outW.write(".\n");
        }
      }
      for (int i = 0; i < structure.numAttributes(); i++) {
        if (i != structure.classIndex()) {
          outW.write(structure.attribute(i).name() + ": ");
          if (structure.attribute(i).isNumeric() || structure.attribute(i).isDate()) {
            outW.write("continuous.\n");
          } else {
            Attribute temp = structure.attribute(i);
            for (int j = 0; j < temp.numValues(); j++) {
              outW.write(temp.value(j));
              if (j < temp.numValues() - 1) {
                outW.write(",");
              } else {
                outW.write(".\n");
              }
            }
          }
        }
      }
      outW.flush();
      outW.close();

      writeMode = getWriteMode();

      String out = retrieveFile().getAbsolutePath();
      setFileExtension(".data");
      out = out.substring(0, out.lastIndexOf('.')) + getFileExtension();
      File namesFile = new File(out);
      try {
        setFile(namesFile);
      } catch (Exception ex) {
        throw new IOException("Cannot create data file, only names file created.");
      }
      if (retrieveFile() == null || getWriter() == null) {
        throw new IOException("Cannot create data file, only names file created.");
      }
      outW = new PrintWriter(getWriter());
    }
    if (writeMode == WRITE) {
      if (structure == null) throw new IOException("No instances information available.");
      if (inst != null) {
        // write instance: here data file
        for (int j = 0; j < inst.numAttributes(); j++) {
          if (j != structure.classIndex()) {
            if (inst.isMissing(j)) {
              outW.write("?,");
            } else if (structure.attribute(j).isNominal() || structure.attribute(j).isString()) {
              outW.write(structure.attribute(j).value((int) inst.value(j)) + ",");
            } else {
              outW.write("" + inst.value(j) + ",");
            }
          }
        }
        // write the class value
        if (inst.isMissing(structure.classIndex())) {
          outW.write("?");
        } else {
          outW.write(
              structure
                  .attribute(structure.classIndex())
                  .value((int) inst.value(structure.classIndex())));
        }
        outW.write("\n");
        // flushes every 100 instances
        m_incrementalCounter++;
        if (m_incrementalCounter > 100) {
          m_incrementalCounter = 0;
          outW.flush();
        }
      } else {
        // close
        if (outW != null) {
          outW.flush();
          outW.close();
        }
        setFileExtension(".names");
        m_incrementalCounter = 0;
        resetStructure();
        outW = null;
        resetWriter();
      }
    }
  }
Esempio n. 30
0
  /**
   * Writes a Batch of instances
   *
   * @throws IOException throws IOException if saving in batch mode is not possible
   */
  public void writeBatch() throws IOException {

    Instances instances = getInstances();

    if (instances == null) throw new IOException("No instances to save");
    if (instances.classIndex() == -1) {
      instances.setClassIndex(instances.numAttributes() - 1);
      System.err.println("No class specified. Last attribute is used as class attribute.");
    }
    if (instances.attribute(instances.classIndex()).isNumeric())
      throw new IOException("To save in C4.5 format the class attribute cannot be numeric.");
    if (getRetrieval() == INCREMENTAL)
      throw new IOException("Batch and incremental saving cannot be mixed.");

    setRetrieval(BATCH);
    if (retrieveFile() == null || getWriter() == null) {
      throw new IOException(
          "C4.5 format requires two files. Therefore no output to standard out can be generated.\nPlease specifiy output files using the -o option.");
    }
    setWriteMode(WRITE);
    // print names file
    setFileExtension(".names");
    PrintWriter outW = new PrintWriter(getWriter());
    for (int i = 0; i < instances.attribute(instances.classIndex()).numValues(); i++) {
      outW.write(instances.attribute(instances.classIndex()).value(i));
      if (i < instances.attribute(instances.classIndex()).numValues() - 1) {
        outW.write(",");
      } else {
        outW.write(".\n");
      }
    }
    for (int i = 0; i < instances.numAttributes(); i++) {
      if (i != instances.classIndex()) {
        outW.write(instances.attribute(i).name() + ": ");
        if (instances.attribute(i).isNumeric() || instances.attribute(i).isDate()) {
          outW.write("continuous.\n");
        } else {
          Attribute temp = instances.attribute(i);
          for (int j = 0; j < temp.numValues(); j++) {
            outW.write(temp.value(j));
            if (j < temp.numValues() - 1) {
              outW.write(",");
            } else {
              outW.write(".\n");
            }
          }
        }
      }
    }
    outW.flush();
    outW.close();

    // print data file
    String out = retrieveFile().getAbsolutePath();
    setFileExtension(".data");
    out = out.substring(0, out.lastIndexOf('.')) + getFileExtension();
    File namesFile = new File(out);
    try {
      setFile(namesFile);
    } catch (Exception ex) {
      throw new IOException(
          "Cannot create data file, only names file created (Reason: " + ex.toString() + ").");
    }
    if (retrieveFile() == null || getWriter() == null) {
      throw new IOException("Cannot create data file, only names file created.");
    }
    outW = new PrintWriter(getWriter());
    // print data file
    for (int i = 0; i < instances.numInstances(); i++) {
      Instance temp = instances.instance(i);
      for (int j = 0; j < temp.numAttributes(); j++) {
        if (j != instances.classIndex()) {
          if (temp.isMissing(j)) {
            outW.write("?,");
          } else if (instances.attribute(j).isNominal() || instances.attribute(j).isString()) {
            outW.write(instances.attribute(j).value((int) temp.value(j)) + ",");
          } else {
            outW.write("" + temp.value(j) + ",");
          }
        }
      }
      // write the class value
      if (temp.isMissing(instances.classIndex())) {
        outW.write("?");
      } else {
        outW.write(
            instances
                .attribute(instances.classIndex())
                .value((int) temp.value(instances.classIndex())));
      }
      outW.write("\n");
    }
    outW.flush();
    outW.close();
    setFileExtension(".names");
    setWriteMode(WAIT);
    outW = null;
    resetWriter();
    setWriteMode(CANCEL);
  }