Ejemplo n.º 1
0
  // 构造一个tri-trainer分类器。
  public Tritrainer(
      String classifier, String trainingIns_File, String testIns_File, double precentage) {
    try {
      this.classifier1 = (Classifier) Class.forName(classifier).newInstance();
      this.classifier2 = (Classifier) Class.forName(classifier).newInstance();
      this.classifier3 = (Classifier) Class.forName(classifier).newInstance();

      Instances trainingInstances = Util.getInstances(trainingIns_File);

      // 将trainIns_File按照precentage和(1-precentage)的比例切割成labeledIns和unlabeledIns;
      int length = trainingInstances.numInstances();
      int i = new Double(length * precentage).intValue();
      labeledIns = new Instances(trainingInstances, 0);
      for (int j = 0; j < i; j++) {
        labeledIns.add(trainingInstances.firstInstance());
        trainingInstances.delete(0);
      }
      unlabeledIns = trainingInstances;
      testIns = Util.getInstances(testIns_File);

      Init();
    } catch (Exception e) {

    }
  }
Ejemplo n.º 2
0
 // 将样本集中裁剪提取成m个样本组成的集合;
 public void SubSample(Instances inst, int m) {
   inst.randomize(new Random());
   while (inst.numInstances() != m) {
     inst.delete(0);
   }
   // System.out.println("subsample:=" + inst.numInstances() + " m:=" + m );
 }
Ejemplo n.º 3
0
  //	计算h1,h2分类器共同的分类错误率;
  public double measureBothError(Classifier h1, Classifier h2, Instances test) {
    int m = test.numInstances();
    double value1, value2, value;
    int error = 0, total = 0;
    try {
      for (int i = 0; i < m; i++) {
        value = test.instance(i).classValue();
        value1 = h1.classifyInstance(test.instance(i));
        value2 = h2.classifyInstance(test.instance(i));

        // 两分类器做出相同决策
        if (value1 == value2) {
          // 两分类器做出相同决策的样本数量
          total++;

          // 两分类器做出相同错误决策
          if (value != value1) {
            //	两分类器做出相同错误决策的样本数量
            error++;
          }
        }
      }
    } catch (Exception e) {
      System.out.println(e);
    }
    // System.out.println("m:=" + m);
    // System.out.println("error:=" + error +"; total:=" + total);

    // 两个分类器的分类错误率= 两分类器做出相同错误决策的样本数量/两分类器做出相同决策的样本数量
    return (error * 1.0) / total;
  }
Ejemplo n.º 4
0
  private static IList<IList<IAgent>> clusteringUsingWeka(
      final IScope scope,
      final Clusterer clusterer,
      final IList<String> attributes,
      final IAddressableContainer<Integer, IAgent, Integer, IAgent> agents)
      throws GamaRuntimeException {
    Instances dataset = convertToInstances(scope, attributes, agents);
    try {
      clusterer.buildClusterer(dataset);

      IList<IList<IAgent>> groupes = GamaListFactory.create(Types.LIST.of(Types.AGENT));

      for (int i = 0; i < clusterer.numberOfClusters(); i++) {
        groupes.add(GamaListFactory.<IAgent>create(Types.AGENT));
      }
      for (int i = 0; i < dataset.numInstances(); i++) {
        Instance inst = dataset.instance(i);
        int clusterIndex = -1;
        clusterIndex = clusterer.clusterInstance(inst);
        IList<IAgent> groupe = groupes.get(clusterIndex);
        groupe.add(agents.get(scope, i));
      }
      return groupes;
    } catch (Exception e) {
      return null;
    }
  }
Ejemplo n.º 5
0
  /**
   * Generate artificial training examples.
   *
   * @param artSize size of examples set to create
   * @param data training data
   * @return the set of unlabeled artificial examples
   */
  protected Instances generateArtificialData(int artSize, Instances data) {
    int numAttributes = data.numAttributes();
    Instances artData = new Instances(data, artSize);
    double[] att;
    Instance artInstance;

    for (int i = 0; i < artSize; i++) {
      att = new double[numAttributes];
      for (int j = 0; j < numAttributes; j++) {
        if (data.attribute(j).isNominal()) {
          // Select nominal value based on the frequency of occurence in the training data
          double[] stats = (double[]) m_AttributeStats.get(j);
          att[j] = (double) selectIndexProbabilistically(stats);
        } else if (data.attribute(j).isNumeric()) {
          // Generate numeric value from the Guassian distribution
          // defined by the mean and std dev of the attribute
          double[] stats = (double[]) m_AttributeStats.get(j);
          att[j] = (m_Random.nextGaussian() * stats[1]) + stats[0];
        } else System.err.println("Decorate can only handle numeric and nominal values.");
      }
      artInstance = new Instance(1.0, att);
      artData.add(artInstance);
    }
    return artData;
  }
Ejemplo n.º 6
0
  /**
   * initializes the algorithm
   *
   * @param data the data to work with
   * @throws Exception if m_SVM is null
   */
  protected void init(Instances data) throws Exception {
    if (m_SVM == null) {
      throw new Exception("SVM not initialized in optimizer. Use RegOptimizer.setSVMReg()");
    }
    m_C = m_SVM.getC();
    m_data = data;
    m_classIndex = data.classIndex();
    m_nInstances = data.numInstances();

    // Initialize kernel
    m_kernel = Kernel.makeCopy(m_SVM.getKernel());
    m_kernel.buildKernel(data);

    // init m_target
    m_target = new double[m_nInstances];
    for (int i = 0; i < m_nInstances; i++) {
      m_target[i] = data.instance(i).classValue();
    }

    m_random = new Random(m_nSeed);

    //		initialize alpha and alpha* array to all zero
    m_alpha = new double[m_target.length];
    m_alphaStar = new double[m_target.length];

    m_supportVectors = new SMOset(m_nInstances);

    m_b = 0.0;
    m_nEvals = 0;
    m_nCacheHits = -1;
  }
Ejemplo n.º 7
0
  /**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data
   * @throws Exception if the classifier has not been generated successfully
   */
  public void buildClassifier(Instances instances) throws Exception {

    if (!(m_Classifier instanceof WeightedInstancesHandler)) {
      throw new IllegalArgumentException("Classifier must be a " + "WeightedInstancesHandler!");
    }

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (instances.numAttributes() == 1) {
      System.err.println(
          "Cannot build model (only class attribute present in data!), "
              + "using ZeroR model instead!");
      m_ZeroR = new weka.classifiers.rules.ZeroR();
      m_ZeroR.buildClassifier(instances);
      return;
    } else {
      m_ZeroR = null;
    }

    m_Train = new Instances(instances, 0, instances.numInstances());

    m_NNSearch.setInstances(m_Train);
  }
Ejemplo n.º 8
0
  /**
   * wrap up various variables to save memeory and do some housekeeping after optimization has
   * finished.
   *
   * @throws Exception if something goes wrong
   */
  protected void wrapUp() throws Exception {
    m_target = null;

    m_nEvals = m_kernel.numEvals();
    m_nCacheHits = m_kernel.numCacheHits();

    if ((m_SVM.getKernel() instanceof PolyKernel)
        && ((PolyKernel) m_SVM.getKernel()).getExponent() == 1.0) {
      // convert alpha's to weights
      double[] weights = new double[m_data.numAttributes()];
      for (int k = m_supportVectors.getNext(-1); k != -1; k = m_supportVectors.getNext(k)) {
        for (int j = 0; j < weights.length; j++) {
          if (j != m_classIndex) {
            weights[j] += (m_alpha[k] - m_alphaStar[k]) * m_data.instance(k).value(j);
          }
        }
      }
      m_weights = weights;

      // release memory
      m_alpha = null;
      m_alphaStar = null;
      m_kernel = null;
    }
    m_bModelBuilt = true;
  }
Ejemplo n.º 9
0
  private static Instances convertToInstances(
      final IScope scope,
      final IList<String> attributes,
      final IAddressableContainer<Integer, IAgent, Integer, IAgent> agents)
      throws GamaRuntimeException {
    FastVector attribs = new FastVector();
    for (String att : attributes) {
      attribs.addElement(new Attribute(att));
    }
    Instances dataset =
        new Instances(scope.getAgentScope().getName(), attribs, agents.length(scope));
    for (IAgent ag : agents.iterable(scope)) {

      int nb = attributes.size();
      double vals[] = new double[nb];
      for (int i = 0; i < nb; i++) {
        String attrib = attributes.get(i);
        Double var = Cast.asFloat(scope, ag.getDirectVarValue(scope, attrib));
        vals[i] = var;
      }
      Instance instance = new Instance(1, vals);
      dataset.add(instance);
    }
    return dataset;
  }
Ejemplo n.º 10
0
  /**
   * Initializes the ranges using all instances of the dataset. Sets m_Ranges.
   *
   * @return the ranges
   */
  public double[][] initializeRanges() {
    if (m_Data == null) {
      m_Ranges = null;
      return m_Ranges;
    }

    int numAtt = m_Data.numAttributes();
    double[][] ranges = new double[numAtt][3];

    if (m_Data.numInstances() <= 0) {
      initializeRangesEmpty(numAtt, ranges);
      m_Ranges = ranges;
      return m_Ranges;
    } else {
      // initialize ranges using the first instance
      updateRangesFirst(m_Data.instance(0), numAtt, ranges);
    }

    // update ranges, starting from the second
    for (int i = 1; i < m_Data.numInstances(); i++) {
      updateRanges(m_Data.instance(i), numAtt, ranges);
    }

    m_Ranges = ranges;

    return m_Ranges;
  }
Ejemplo n.º 11
0
 /** initializes the attribute indices. */
 protected void initializeAttributeIndices() {
   m_AttributeIndices.setUpper(m_Data.numAttributes() - 1);
   m_ActiveIndices = new boolean[m_Data.numAttributes()];
   for (int i = 0; i < m_ActiveIndices.length; i++) {
     m_ActiveIndices[i] = m_AttributeIndices.isInRange(i);
   }
 }
Ejemplo n.º 12
0
 /**
  * Private function to compute default number of accurate instances in the specified data for the
  * consequent of the rule
  *
  * @param data the data in question
  * @return the default accuracy number
  */
 private double computeDefAccu(Instances data) {
   double defAccu = 0;
   for (int i = 0; i < data.numInstances(); i++) {
     Instance inst = data.instance(i);
     if ((int) inst.classValue() == (int) m_Consequent) defAccu += inst.weight();
   }
   return defAccu;
 }
Ejemplo n.º 13
0
 /**
  * GetKs - return [K_1,K_2,...,K_L] where each Y_j \in {1,...,K_j}. In the multi-label case, K[j]
  * = 2 for all j = 1,...,L.
  *
  * @param D a dataset
  * @return an array of the number of values that each label can take
  */
 private static int[] getKs(Instances D) {
   int L = D.classIndex();
   int K[] = new int[L];
   for (int k = 0; k < L; k++) {
     K[k] = D.attribute(k).numValues();
   }
   return K;
 }
  public int getClusterNumber(String objectID) {
    int datasetIndex = -1;

    for (int i = 0; i < m_Sequences.numInstances(); i++) {
      if (objectID.equals(m_Sequences.instance(i).stringValue(0))) datasetIndex = i;
    }

    return cluster[datasetIndex];
  }
Ejemplo n.º 15
0
 /**
  * Discretizes an attribute using bins.
  *
  * @param instances the dataset to discretize.
  * @param attIndex the attribute index.
  * @param bins the bins.
  */
 public static void discretize(Instances instances, int attIndex, Bins bins) {
   Attribute attribute = instances.getAttributes().get(attIndex);
   BinnedAttribute binnedAttribute = new BinnedAttribute(attribute.getName(), bins);
   binnedAttribute.setIndex(attribute.getIndex());
   instances.getAttributes().set(attIndex, binnedAttribute);
   for (Instance instance : instances) {
     int v = bins.getIndex(instance.getValue(attribute.getIndex()));
     instance.setValue(attribute.getIndex(), v);
   }
 }
Ejemplo n.º 16
0
  protected void initMinMax(Instances data) {
    m_Min = new double[data.numAttributes()];
    m_Max = new double[data.numAttributes()];
    for (int i = 0; i < data.numAttributes(); i++) {
      m_Min[i] = m_Max[i] = Double.NaN;
    }

    for (int i = 0; i < data.numInstances(); i++) {
      updateMinMax(data.instance(i));
    }
  }
Ejemplo n.º 17
0
  public static Instances getDatasetDB(String tableName) throws Exception {
    Statement st = null;
    ResultSet rs = null;
    Connection conn = getDBConn();
    String query;
    switch (tableName) {
      case "all":
        query = Config.selectAll;
        break;
      case "chiller1":
        query = Config.selectChiller1;
        break;
      case "chiller2":
        query = Config.selectChiller2;
        break;
      case "consumption":
        query = Config.selectConsumption;
        break;
      default:
        query = Config.selectAll;
        break;
    }

    st = conn.createStatement();
    rs = st.executeQuery(query);

    ResultSetMetaData rsmd = rs.getMetaData();

    ArrayList<Attribute> attributes = new ArrayList<Attribute>();

    int numAtts = rsmd.getColumnCount();
    for (int i = 1; i <= numAtts; i++) {
      String attName = (rsmd.getColumnName(i));
      Attribute att = new Attribute(attName);
      attributes.add(att);
    }

    Instances data = new Instances(tableName, attributes, 0);

    weka.filters.unsupervised.attribute.Add addAtt = new weka.filters.unsupervised.attribute.Add();
    addAtt.setOptions(weka.core.Utils.splitOptions("-T NOM -N class -L T,F -C last"));
    addAtt.setInputFormat(data);

    data = Filter.useFilter(data, addAtt);
    while (rs.next()) {
      double[] values = new double[numAtts + 1];
      for (int i = 1; i <= numAtts; i++) {
        values[i - 1] = rs.getDouble(i);
      }
      data.add(new DenseInstance(1.0, values));
    }
    return data;
  }
Ejemplo n.º 18
0
  /**
   * Computes the error in classification on the given data.
   *
   * @param data the instances to be classified
   * @return classification error
   * @exception Exception if error can not be computed successfully
   */
  protected double computeError(Instances data) throws Exception {
    double error = 0.0;
    int numInstances = data.numInstances();
    Instance curr;

    for (int i = 0; i < numInstances; i++) {
      curr = data.instance(i);
      // Check if the instance has been misclassified
      if (curr.classValue() != ((int) classifyInstance(curr))) error++;
    }
    return (error / numInstances);
  }
Ejemplo n.º 19
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -i &lt;the input file&gt;
   * The input file</pre>
   *
   * <pre> -o &lt;the output file&gt;
   * The output file</pre>
   *
   * <pre> -c &lt;the class index&gt;
   * The class index</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    String outputString = Utils.getOption('o', options);
    String inputString = Utils.getOption('i', options);
    String indexString = Utils.getOption('c', options);

    ArffLoader loader = new ArffLoader();

    resetOptions();

    // parse index
    int index = -1;
    if (indexString.length() != 0) {
      if (indexString.equals("first")) index = 0;
      else {
        if (indexString.equals("last")) index = -1;
        else index = Integer.parseInt(indexString);
      }
    }

    if (inputString.length() != 0) {
      try {
        File input = new File(inputString);
        loader.setFile(input);
        Instances inst = loader.getDataSet();
        if (index == -1) inst.setClassIndex(inst.numAttributes() - 1);
        else inst.setClassIndex(index);
        setInstances(inst);
      } catch (Exception ex) {
        throw new IOException(
            "No data set loaded. Data set has to be arff format (Reason: " + ex.toString() + ").");
      }
    } else throw new IOException("No data set to save.");

    if (outputString.length() != 0) {
      // add appropriate file extension
      if (!outputString.endsWith(getFileExtension())) {
        if (outputString.lastIndexOf('.') != -1)
          outputString =
              (outputString.substring(0, outputString.lastIndexOf('.'))) + getFileExtension();
        else outputString = outputString + getFileExtension();
      }
      try {
        File output = new File(outputString);
        setFile(output);
      } catch (Exception ex) {
        throw new IOException("Cannot create output file.");
      }
    }

    if (index == -1) index = getInstances().numAttributes() - 1;
    getInstances().setClassIndex(index);
  }
Ejemplo n.º 20
0
  /**
   * Labels the artificially generated data.
   *
   * @param artData the artificially generated instances
   * @exception Exception if instances cannot be labeled successfully
   */
  protected void labelData(Instances artData) throws Exception {
    Instance curr;
    double[] probs;

    for (int i = 0; i < artData.numInstances(); i++) {
      curr = artData.instance(i);
      // compute the class membership probs predicted by the current ensemble
      probs = distributionForInstance(curr);
      // select class label inversely proportional to the ensemble predictions
      curr.setClassValue(inverseLabel(probs));
    }
  }
Ejemplo n.º 21
0
  /**
   * Inserts an instance into the hash table
   *
   * @param inst instance to be inserted
   * @param instA to create the hash key from
   * @throws Exception if the instance can't be inserted
   */
  private void insertIntoTable(Instance inst, double[] instA) throws Exception {

    double[] tempClassDist2;
    double[] newDist;
    DecisionTableHashKey thekey;

    if (instA != null) {
      thekey = new DecisionTableHashKey(instA);
    } else {
      thekey = new DecisionTableHashKey(inst, inst.numAttributes(), false);
    }

    // see if this one is already in the table
    tempClassDist2 = (double[]) m_entries.get(thekey);
    if (tempClassDist2 == null) {
      if (m_classIsNominal) {
        newDist = new double[m_theInstances.classAttribute().numValues()];

        // Leplace estimation
        for (int i = 0; i < m_theInstances.classAttribute().numValues(); i++) {
          newDist[i] = 1.0;
        }

        newDist[(int) inst.classValue()] = inst.weight();

        // add to the table
        m_entries.put(thekey, newDist);
      } else {
        newDist = new double[2];
        newDist[0] = inst.classValue() * inst.weight();
        newDist[1] = inst.weight();

        // add to the table
        m_entries.put(thekey, newDist);
      }
    } else {

      // update the distribution for this instance
      if (m_classIsNominal) {
        tempClassDist2[(int) inst.classValue()] += inst.weight();

        // update the table
        m_entries.put(thekey, tempClassDist2);
      } else {
        tempClassDist2[0] += (inst.classValue() * inst.weight());
        tempClassDist2[1] += inst.weight();

        // update the table
        m_entries.put(thekey, tempClassDist2);
      }
    }
  }
Ejemplo n.º 22
0
  /**
   * Adds the supplied instance to the training set.
   *
   * @param instance the instance to add
   * @throws Exception if instance could not be incorporated successfully
   */
  public void updateClassifier(Instance instance) throws Exception {

    if (m_Train == null) {
      throw new Exception("No training instance structure set!");
    } else if (m_Train.equalHeaders(instance.dataset()) == false) {
      throw new Exception(
          "Incompatible instance types\n" + m_Train.equalHeadersMsg(instance.dataset()));
    }
    if (!instance.classIsMissing()) {
      m_NNSearch.update(instance);
      m_Train.add(instance);
    }
  }
Ejemplo n.º 23
0
  /**
   * Initializes a gain ratio attribute evaluator. Discretizes all attributes that are numeric.
   *
   * @param data set of instances serving as training data
   * @throws Exception if the evaluator has not been generated successfully
   */
  public void buildEvaluator(Instances data) throws Exception {

    // can evaluator handle data?
    getCapabilities().testWithFail(data);

    m_trainInstances = data;
    m_classIndex = m_trainInstances.classIndex();
    m_numAttribs = m_trainInstances.numAttributes();
    m_numInstances = m_trainInstances.numInstances();
    Discretize disTransform = new Discretize();
    disTransform.setUseBetterEncoding(true);
    disTransform.setInputFormat(m_trainInstances);
    m_trainInstances = Filter.useFilter(m_trainInstances, disTransform);
    m_numClasses = m_trainInstances.attribute(m_classIndex).numValues();
  }
Ejemplo n.º 24
0
  /**
   * Splits a dataset according to the values of a nominal attribute.
   *
   * @param data the data which is to be split
   * @param att the attribute to be used for splitting
   * @return the sets of instances produced by the split
   */
  private Instances[] splitData(Instances data, Attribute att) {

    Instances[] splitData = new Instances[att.numValues()];
    for (int j = 0; j < att.numValues(); j++) {
      splitData[j] = new Instances(data, data.numInstances());
    }
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
      Instance inst = (Instance) instEnum.nextElement();
      splitData[(int) inst.value(att)].add(inst);
    }
    for (int i = 0; i < splitData.length; i++) {
      splitData[i].compactify();
    }
    return splitData;
  }
Ejemplo n.º 25
0
 /**
  * SVMOutput of an instance in the training set, m_data This uses the cache, unlike
  * SVMOutput(Instance)
  *
  * @param index index of the training instance in m_data
  * @return the SVM output
  * @throws Exception if something goes wrong
  */
 protected double SVMOutput(int index) throws Exception {
   double result = -m_b;
   for (int i = m_supportVectors.getNext(-1); i != -1; i = m_supportVectors.getNext(i)) {
     result += (m_alpha[i] - m_alphaStar[i]) * m_kernel.eval(index, i, m_data.instance(index));
   }
   return result;
 }
Ejemplo n.º 26
0
 /**
  * Compute the value of the objective function.
  *
  * @return the score
  * @throws Exception if something goes wrong
  */
 protected double getScore() throws Exception {
   double res = 0;
   double t = 0, t2 = 0;
   double sumAlpha = 0.0;
   for (int i = 0; i < m_nInstances; i++) {
     sumAlpha += (m_alpha[i] - m_alphaStar[i]);
     for (int j = 0; j < m_nInstances; j++) {
       t +=
           (m_alpha[i] - m_alphaStar[i])
               * (m_alpha[j] - m_alphaStar[j])
               * m_kernel.eval(i, j, m_data.instance(i));
     }
     //    switch(m_nLossType) {
     //    case L1:
     //    t2 += m_data.instance(i).classValue() * (m_alpha[i] - m_alpha_[i]);
     //    break;
     //    case L2:
     //    t2 += m_data.instance(i).classValue() * (m_alpha[i] - m_alpha_[i]) - (0.5/m_SVM.getC())
     // * (m_alpha[i]*m_alpha[i] + m_alpha_[i]*m_alpha_[i]);
     //    break;
     //    case HUBER:
     //    t2 += m_data.instance(i).classValue() * (m_alpha[i] - m_alpha_[i]) -
     // (0.5*m_SVM.getEpsilon()/m_SVM.getC()) * (m_alpha[i]*m_alpha[i] + m_alpha_[i]*m_alpha_[i]);
     //    break;
     //    case EPSILON:
     // t2 += m_data.instance(i).classValue() * (m_alpha[i] - m_alphaStar[i]) - m_epsilon *
     // (m_alpha[i] + m_alphaStar[i]);
     t2 += m_target[i] * (m_alpha[i] - m_alphaStar[i]) - m_epsilon * (m_alpha[i] + m_alphaStar[i]);
     //    break;
     //    }
   }
   res += -0.5 * t + t2;
   return res;
 }
Ejemplo n.º 27
0
  /**
   *
   *
   * <pre>
   * Usage: Discretizer
   * -r	attribute file path
   * -i	input dataset path
   * -o	output dataset path
   * [-d]	discretized attribute file path
   * [-m]	output attribute file path
   * [-n]	maximum num of bins (default: 256)
   * [-t]	training file path
   * </pre>
   *
   * @param args the command line arguments.
   * @throws Exception
   */
  public static void main(String[] args) throws Exception {
    Options app = new Options();
    CmdLineParser parser = new CmdLineParser(Discretizer.class, app);
    try {
      parser.parse(args);
      if (app.maxNumBins < 0) {
        throw new IllegalArgumentException();
      }
    } catch (IllegalArgumentException e) {
      parser.printUsage();
      System.exit(1);
    }
    List<Attribute> attributes = null;
    if (app.trainPath != null) {
      Instances trainSet = InstancesReader.read(app.attPath, app.trainPath);
      attributes = trainSet.getAttributes();
      for (int i = 0; i < attributes.size(); i++) {
        Attribute attribute = attributes.get(i);
        if (attribute.getType() == Type.NUMERIC) {
          // Only discretize numeric attributes
          Discretizer.discretize(trainSet, i, app.maxNumBins);
        }
      }
    } else if (app.disAttPath != null) {
      attributes = AttributesReader.read(app.disAttPath).v1;
    } else {
      parser.printUsage();
      System.exit(1);
    }

    Instances instances = InstancesReader.read(app.attPath, app.inputPath);
    List<Attribute> attrs = instances.getAttributes();
    for (int i = 0; i < attrs.size(); i++) {
      Attribute attr = attrs.get(i);
      if (attr.getType() == Type.NUMERIC) {
        BinnedAttribute binnedAttr = (BinnedAttribute) attributes.get(i);
        // Only discretize numeric attributes
        Discretizer.discretize(instances, i, binnedAttr.getBins());
      }
    }

    if (app.outputAttPath != null) {
      InstancesWriter.write(instances, app.outputAttPath, app.outputPath);
    } else {
      InstancesWriter.write(instances, app.outputPath);
    }
  }
Ejemplo n.º 28
0
  /**
   * Computes the entropy of a dataset.
   *
   * @param data the data for which entropy is to be computed
   * @return the entropy of the data's class distribution
   * @throws Exception if computation fails
   */
  private double computeEntropy(Instances data) throws Exception {

    double[] classCounts = new double[data.numClasses()];
    Enumeration instEnum = data.enumerateInstances();
    while (instEnum.hasMoreElements()) {
      Instance inst = (Instance) instEnum.nextElement();
      classCounts[(int) inst.classValue()]++;
    }
    double entropy = 0;
    for (int j = 0; j < data.numClasses(); j++) {
      if (classCounts[j] > 0) {
        entropy -= classCounts[j] * Utils.log2(classCounts[j]);
      }
    }
    entropy /= (double) data.numInstances();
    return entropy + Utils.log2(data.numInstances());
  }
  /**
   * Generates a clusterer by the mean of spectral clustering algorithm.
   *
   * @param data set of instances serving as training data
   * @exception Exception if the clusterer has not been generated successfully
   */
  public void buildClusterer(Instances data) throws java.lang.Exception {
    m_Sequences = new Instances(data);
    int n = data.numInstances();
    int k = data.numAttributes();
    DoubleMatrix2D w;
    if (useSparseMatrix) w = DoubleFactory2D.sparse.make(n, n);
    else w = DoubleFactory2D.dense.make(n, n);
    double[][] v1 = new double[n][];
    for (int i = 0; i < n; i++) v1[i] = data.instance(i).toDoubleArray();
    v = DoubleFactory2D.dense.make(v1);
    double sigma_sq = sigma * sigma;
    // Sets up similarity matrix
    for (int i = 0; i < n; i++)
      for (int j = i; j < n; j++) {
        /*double dist = distnorm2(v.viewRow(i), v.viewRow(j));
        if((r == -1) || (dist < r)) {
          double sim = Math.exp(- (dist * dist) / (2 * sigma_sq));
          w.set(i, j, sim);
          w.set(j, i, sim);
        }*/
        /* String [] key = {data.instance(i).stringValue(0), data.instance(j).stringValue(0)};
        System.out.println(key[0]);
        System.out.println(key[1]);
        System.out.println(simScoreMap.containsKey(key));
        Double simValue = simScoreMap.get(key);*/

        double sim = sim_matrix[i][j];
        w.set(i, j, sim);
        w.set(j, i, sim);
      }

    // Partitions points
    int[][] p = partition(w, alpha_star);

    // Deploys results
    numOfClusters = p.length;
    cluster = new int[n];
    for (int i = 0; i < p.length; i++) for (int j = 0; j < p[i].length; j++) cluster[p[i][j]] = i;

    // System.out.println("Final partition:");
    // UtilsJS.printMatrix(p);
    // System.out.println("Cluster:\n");
    // UtilsJS.printArray(cluster);
    this.numOfClusters = cluster[Utils.maxIndex(cluster)] + 1;
    //  System.out.println("Num clusters:\t"+this.numOfClusters);
  }
Ejemplo n.º 30
0
 protected void updateMinDistance(
     double[] minDistance, boolean[] selected, Instances data, Instance center) {
   for (int i = 0; i < selected.length; i++)
     if (!selected[i]) {
       double d = distance(center, data.instance(i));
       if (d < minDistance[i]) minDistance[i] = d;
     }
 }