Exemplo n.º 1
0
  private static IList<IList<IAgent>> clusteringUsingWeka(
      final IScope scope,
      final Clusterer clusterer,
      final IList<String> attributes,
      final IAddressableContainer<Integer, IAgent, Integer, IAgent> agents)
      throws GamaRuntimeException {
    Instances dataset = convertToInstances(scope, attributes, agents);
    try {
      clusterer.buildClusterer(dataset);

      IList<IList<IAgent>> groupes = GamaListFactory.create(Types.LIST.of(Types.AGENT));

      for (int i = 0; i < clusterer.numberOfClusters(); i++) {
        groupes.add(GamaListFactory.<IAgent>create(Types.AGENT));
      }
      for (int i = 0; i < dataset.numInstances(); i++) {
        Instance inst = dataset.instance(i);
        int clusterIndex = -1;
        clusterIndex = clusterer.clusterInstance(inst);
        IList<IAgent> groupe = groupes.get(clusterIndex);
        groupe.add(agents.get(scope, i));
      }
      return groupes;
    } catch (Exception e) {
      return null;
    }
  }
  /**
   * Print the cluster assignments for either the training or the testing data.
   *
   * @param clusterer the clusterer to use for cluster assignments
   * @return a string containing the instance indexes and cluster assigns.
   * @exception if cluster assignments can't be printed
   */
  private static String printClusterings(
      Clusterer clusterer, Instances train, String testFileName, Range attributesToOutput)
      throws Exception {
    StringBuffer text = new StringBuffer();
    int i = 0;
    int cnum;

    if (testFileName.length() != 0) {
      BufferedReader testStream = null;

      try {
        testStream = new BufferedReader(new FileReader(testFileName));
      } catch (Exception e) {
        throw new Exception("Can't open file " + e.getMessage() + '.');
      }

      Instances test = new Instances(testStream, 1);

      while (test.readInstance(testStream)) {
        try {
          cnum = clusterer.clusterInstance(test.instance(0));

          text.append(
              i
                  + " "
                  + cnum
                  + " "
                  + attributeValuesString(test.instance(0), attributesToOutput)
                  + "\n");
        } catch (Exception e) {
          /*	  throw  new Exception('\n' + "Unable to cluster instance\n"
          + e.getMessage()); */
          text.append(
              i
                  + " Unclustered "
                  + attributeValuesString(test.instance(0), attributesToOutput)
                  + "\n");
        }
        test.delete(0);
        i++;
      }
    } else // output for training data
    {
      for (i = 0; i < train.numInstances(); i++) {
        try {
          cnum = clusterer.clusterInstance(train.instance(i));

          text.append(
              i
                  + " "
                  + cnum
                  + " "
                  + attributeValuesString(train.instance(i), attributesToOutput)
                  + "\n");
        } catch (Exception e) {
          /*  throw  new Exception('\n'
          + "Unable to cluster instance\n"
          + e.getMessage()); */
          text.append(
              i
                  + " Unclustered "
                  + attributeValuesString(train.instance(i), attributesToOutput)
                  + "\n");
        }
      }
    }

    return text.toString();
  }
  /**
   * Print the cluster statistics for either the training or the testing data.
   *
   * @param clusterer the clusterer to use for generating statistics.
   * @return a string containing cluster statistics.
   * @exception if statistics can't be generated.
   */
  private static String printClusterStats(Clusterer clusterer, String fileName) throws Exception {
    StringBuffer text = new StringBuffer();
    int i = 0;
    int cnum;
    double loglk = 0.0;
    double[] dist;
    double temp;
    int cc = clusterer.numberOfClusters();
    double[] instanceStats = new double[cc];
    int unclusteredInstances = 0;

    if (fileName.length() != 0) {
      BufferedReader inStream = null;

      try {
        inStream = new BufferedReader(new FileReader(fileName));
      } catch (Exception e) {
        throw new Exception("Can't open file " + e.getMessage() + '.');
      }

      Instances inst = new Instances(inStream, 1);

      while (inst.readInstance(inStream)) {
        try {
          cnum = clusterer.clusterInstance(inst.instance(0));

          if (clusterer instanceof DensityBasedClusterer) {
            loglk += ((DensityBasedClusterer) clusterer).logDensityForInstance(inst.instance(0));
            //	    temp = Utils.sum(dist);
          }
          instanceStats[cnum]++;
        } catch (Exception e) {
          unclusteredInstances++;
        }
        inst.delete(0);
        i++;
      }

      /*
           // count the actual number of used clusters
           int count = 0;
           for (i = 0; i < cc; i++) {
      if (instanceStats[i] > 0) {
        count++;
      }
           }
           if (count > 0) {
      double [] tempStats = new double [count];
      count=0;
      for (i=0;i<cc;i++) {
        if (instanceStats[i] > 0) {
          tempStats[count++] = instanceStats[i];
      }
      }
      instanceStats = tempStats;
      cc = instanceStats.length;
      } */

      int clustFieldWidth = (int) ((Math.log(cc) / Math.log(10)) + 1);
      int numInstFieldWidth = (int) ((Math.log(i) / Math.log(10)) + 1);
      double sum = Utils.sum(instanceStats);
      loglk /= sum;
      text.append("Clustered Instances\n");

      for (i = 0; i < cc; i++) {
        if (instanceStats[i] > 0) {
          text.append(
              Utils.doubleToString((double) i, clustFieldWidth, 0)
                  + "      "
                  + Utils.doubleToString(instanceStats[i], numInstFieldWidth, 0)
                  + " ("
                  + Utils.doubleToString((instanceStats[i] / sum * 100.0), 3, 0)
                  + "%)\n");
        }
      }
      if (unclusteredInstances > 0) {
        text.append("\nUnclustered Instances : " + unclusteredInstances);
      }

      if (clusterer instanceof DensityBasedClusterer) {
        text.append("\n\nLog likelihood: " + Utils.doubleToString(loglk, 1, 5) + "\n");
      }
    }

    return text.toString();
  }
  /**
   * Evaluate the clusterer on a set of instances. Calculates clustering statistics and stores
   * cluster assigments for the instances in m_clusterAssignments
   *
   * @param test the set of instances to cluster
   * @exception Exception if something goes wrong
   */
  public void evaluateClusterer(Instances test) throws Exception {
    int i = 0;
    int cnum;
    double loglk = 0.0;
    double[] dist;
    double temp;
    int cc = m_Clusterer.numberOfClusters();
    m_numClusters = cc;
    int numInstFieldWidth = (int) ((Math.log(test.numInstances()) / Math.log(10)) + 1);
    double[] instanceStats = new double[cc];
    m_clusterAssignments = new double[test.numInstances()];
    Instances testCopy = test;
    boolean hasClass = (testCopy.classIndex() >= 0);
    int unclusteredInstances = 0;

    // If class is set then do class based evaluation as well
    if (hasClass) {
      if (testCopy.classAttribute().isNumeric()) {
        throw new Exception("ClusterEvaluation: Class must be nominal!");
      }
      Remove removeClass = new Remove();
      removeClass.setAttributeIndices("" + (testCopy.classIndex() + 1));
      removeClass.setInvertSelection(false);
      removeClass.setInputFormat(testCopy);
      testCopy = Filter.useFilter(testCopy, removeClass);
    }

    for (i = 0; i < testCopy.numInstances(); i++) {
      cnum = -1;
      try {
        if (m_Clusterer instanceof DensityBasedClusterer) {
          loglk +=
              ((DensityBasedClusterer) m_Clusterer).logDensityForInstance(testCopy.instance(i));
          //	  temp = Utils.sum(dist);

          //	  Utils.normalize(dist);
          cnum = m_Clusterer.clusterInstance(testCopy.instance(i));
          // Utils.maxIndex(dist);
          m_clusterAssignments[i] = (double) cnum;
        } else {
          cnum = m_Clusterer.clusterInstance(testCopy.instance(i));
          m_clusterAssignments[i] = (double) cnum;
        }
      } catch (Exception e) {
        unclusteredInstances++;
      }

      if (cnum != -1) {
        instanceStats[cnum]++;
      }
    }

    /* // count the actual number of used clusters
       int count = 0;
       for (i = 0; i < cc; i++) {
         if (instanceStats[i] > 0) {
    count++;
         }
       }
       if (count > 0) {
         double [] tempStats = new double [count];
         double [] map = new double [m_clusterAssignments.length];
         count=0;
         for (i=0;i<cc;i++) {
    if (instanceStats[i] > 0) {
      tempStats[count] = instanceStats[i];
      map[i] = count;
      count++;
    }
         }
         instanceStats = tempStats;
         cc = instanceStats.length;
         for (i=0;i<m_clusterAssignments.length;i++) {
    m_clusterAssignments[i] = map[(int)m_clusterAssignments[i]];
         }
         } */

    double sum = Utils.sum(instanceStats);
    loglk /= sum;
    m_logL = loglk;

    m_clusteringResults.append(m_Clusterer.toString());
    m_clusteringResults.append("Clustered Instances\n\n");
    int clustFieldWidth = (int) ((Math.log(cc) / Math.log(10)) + 1);
    for (i = 0; i < cc; i++) {
      if (instanceStats[i] > 0) {
        m_clusteringResults.append(
            Utils.doubleToString((double) i, clustFieldWidth, 0)
                + "      "
                + Utils.doubleToString(instanceStats[i], numInstFieldWidth, 0)
                + " ("
                + Utils.doubleToString((instanceStats[i] / sum * 100.0), 3, 0)
                + "%)\n");
      }
    }

    if (unclusteredInstances > 0) {
      m_clusteringResults.append("\nUnclustered instances : " + unclusteredInstances);
    }

    if (m_Clusterer instanceof DensityBasedClusterer) {
      m_clusteringResults.append("\n\nLog likelihood: " + Utils.doubleToString(loglk, 1, 5) + "\n");
    }

    if (hasClass) {
      evaluateClustersWithRespectToClass(test);
    }
  }