Beispiel #1
0
  /** Test of learn method, of class RDA. */
  @Test
  public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
      AttributeDataset train =
          parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
      AttributeDataset test =
          parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));

      double[][] x = train.toArray(new double[train.size()][]);
      int[] y = train.toArray(new int[train.size()]);
      double[][] testx = test.toArray(new double[test.size()][]);
      int[] testy = test.toArray(new int[test.size()]);

      RDA rda = new RDA(x, y, 0.7);

      int error = 0;
      for (int i = 0; i < testx.length; i++) {
        if (rda.predict(testx[i]) != testy[i]) {
          error++;
        }
      }

      System.out.format("USPS error rate = %.2f%%%n", 100.0 * error / testx.length);
      assertEquals(235, error);
    } catch (Exception ex) {
      System.err.println(ex);
    }
  }
  /** Test of learn method, of class LogisticRegression. */
  @Test
  public void testIris() {
    System.out.println("Iris");
    ArffParser arffParser = new ArffParser();
    arffParser.setResponseIndex(4);
    try {
      AttributeDataset iris =
          arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/iris.arff"));
      double[][] x = iris.toArray(new double[iris.size()][]);
      int[] y = iris.toArray(new int[iris.size()]);

      int n = x.length;
      LOOCV loocv = new LOOCV(n);
      int error = 0;
      for (int i = 0; i < n; i++) {
        double[][] trainx = Math.slice(x, loocv.train[i]);
        int[] trainy = Math.slice(y, loocv.train[i]);
        LogisticRegression logit = new LogisticRegression(trainx, trainy);

        if (y[loocv.test[i]] != logit.predict(x[loocv.test[i]])) error++;
      }

      System.out.println("Logistic Regression error = " + error);
      assertEquals(3, error);
    } catch (Exception ex) {
      System.err.println(ex);
    }
  }
  /** Test of learn method, of class LogisticRegression. */
  @Test
  public void testSegment() {
    System.out.println("Segment");
    ArffParser arffParser = new ArffParser();
    arffParser.setResponseIndex(19);
    try {
      AttributeDataset train =
          arffParser.parse(
              smile.data.parser.IOUtils.getTestDataFile("weka/segment-challenge.arff"));
      AttributeDataset test =
          arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/segment-test.arff"));

      double[][] x = train.toArray(new double[train.size()][]);
      int[] y = train.toArray(new int[train.size()]);
      double[][] testx = test.toArray(new double[test.size()][]);
      int[] testy = test.toArray(new int[test.size()]);

      LogisticRegression logit = new LogisticRegression(x, y, 0.05, 1E-3, 1000);

      int error = 0;
      for (int i = 0; i < testx.length; i++) {
        if (logit.predict(testx[i]) != testy[i]) {
          error++;
        }
      }

      System.out.format("Segment error rate = %.2f%%\n", 100.0 * error / testx.length);
      assertEquals(48, error);
    } catch (Exception ex) {
      System.err.println(ex);
    }
  }
Beispiel #4
0
  /** Test of learn method, of class MEC. */
  @Test
  public void testUSPS() {
    System.out.println("USPS");
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
      AttributeDataset train =
          parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
      AttributeDataset test =
          parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));

      double[][] x = train.toArray(new double[train.size()][]);
      int[] y = train.toArray(new int[train.size()]);
      double[][] testx = test.toArray(new double[test.size()][]);
      int[] testy = test.toArray(new int[test.size()]);

      AdjustedRandIndex ari = new AdjustedRandIndex();
      RandIndex rand = new RandIndex();
      MEC<double[]> mec = new MEC<double[]>(x, new EuclideanDistance(), 10, 8.0);

      double r = rand.measure(y, mec.getClusterLabel());
      double r2 = ari.measure(y, mec.getClusterLabel());
      System.out.format(
          "Training rand index = %.2f%%\tadjusted rand index = %.2f%%\n", 100.0 * r, 100.0 * r2);
      assertTrue(r > 0.85);
      assertTrue(r2 > 0.35);

      int[] p = new int[testx.length];
      for (int i = 0; i < testx.length; i++) {
        p[i] = mec.predict(testx[i]);
      }

      r = rand.measure(testy, p);
      r2 = ari.measure(testy, p);
      System.out.format(
          "Testing rand index = %.2f%%\tadjusted rand index = %.2f%%\n", 100.0 * r, 100.0 * r2);
      assertTrue(r > 0.85);
      assertTrue(r2 > 0.35);
    } catch (Exception ex) {
      System.err.println(ex);
    }
  }
Beispiel #5
0
  public CoverTreeSpeedTest() {
    long start = System.currentTimeMillis();
    DelimitedTextParser parser = new DelimitedTextParser();
    parser.setResponseIndex(new NominalAttribute("class"), 0);
    try {
      AttributeDataset train =
          parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train"));
      AttributeDataset test =
          parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test"));

      x = train.toArray(new double[train.size()][]);
      testx = test.toArray(new double[test.size()][]);
    } catch (Exception ex) {
      System.err.println(ex);
    }

    double time = (System.currentTimeMillis() - start) / 1000.0;
    System.out.format("Loading data: %.2fs\n", time);

    start = System.currentTimeMillis();
    coverTree = new CoverTree<double[]>(x, new EuclideanDistance());
    time = (System.currentTimeMillis() - start) / 1000.0;
    System.out.format("Building cover tree: %.2fs\n", time);
  }
Beispiel #6
0
  /**
   * Parse a RES dataset from an input stream.
   *
   * @param name the name of dataset.
   * @param stream the input stream of data.
   * @throws java.io.FileNotFoundException
   */
  public AttributeDataset parse(String name, InputStream stream)
      throws IOException, ParseException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(stream));

    String line = reader.readLine();
    if (line == null) {
      throw new IOException("Empty data source.");
    }

    String[] tokens = line.split("\t", -1);
    int p = (tokens.length - 2) / 2;

    line = reader.readLine();
    if (line == null) {
      throw new IOException("Premature end of file.");
    }

    String[] samples = line.split("\t", -1);
    if (samples.length != tokens.length - 1) {
      throw new IOException("Invalid sample description header.");
    }

    Attribute[] attributes = new Attribute[p];
    for (int i = 0; i < p; i++) {
      attributes[i] = new NumericAttribute(tokens[2 * i + 2], samples[2 * i + 1]);
    }

    line = reader.readLine();
    if (line == null) {
      throw new IOException("Premature end of file.");
    }

    int n = Integer.valueOf(line);
    if (n <= 0) {
      throw new IOException("Invalid number of rows: " + n);
    }

    AttributeDataset data = new AttributeDataset(name, attributes);

    for (int i = 0; i < n; i++) {
      line = reader.readLine();
      if (line == null) {
        throw new IOException("Premature end of file.");
      }

      tokens = line.split("\t", -1);
      if (tokens.length != samples.length + 1) {
        throw new IOException(
            String.format("Invalid number of elements of line %d: %d", i + 4, tokens.length));
      }

      double[] x = new double[p];
      for (int j = 0; j < p; j++) {
        x[j] = Double.valueOf(tokens[2 * j + 2]);
      }

      Datum<double[]> datum = new Datum<double[]>(x);
      datum.name = tokens[1];
      datum.description = tokens[0];
      data.add(datum);
    }

    reader.close();
    return data;
  }
Beispiel #7
0
  /** Test of learn method, of class RDA. */
  @Test
  public void testLearn() {
    System.out.println("learn");
    ArffParser arffParser = new ArffParser();
    arffParser.setResponseIndex(4);
    try {
      AttributeDataset iris =
          arffParser.parse(smile.data.parser.IOUtils.getTestDataFile("weka/iris.arff"));
      double[][] x = iris.toArray(new double[iris.size()][]);
      int[] y = iris.toArray(new int[iris.size()]);

      int n = x.length;
      LOOCV loocv = new LOOCV(n);
      int error = 0;
      for (int i = 0; i < n; i++) {
        double[][] trainx = Math.slice(x, loocv.train[i]);
        int[] trainy = Math.slice(y, loocv.train[i]);

        RDA rda = new RDA(trainx, trainy, 0.0);
        if (y[loocv.test[i]] != rda.predict(x[loocv.test[i]])) error++;
      }
      System.out.println("RDA (0.0) error = " + error);
      assertEquals(22, error);

      error = 0;
      for (int i = 0; i < n; i++) {
        double[][] trainx = Math.slice(x, loocv.train[i]);
        int[] trainy = Math.slice(y, loocv.train[i]);

        RDA rda = new RDA(trainx, trainy, 0.1);
        if (y[loocv.test[i]] != rda.predict(x[loocv.test[i]])) error++;
      }
      System.out.println("RDA (0.1) error = " + error);
      assertEquals(24, error);

      error = 0;
      for (int i = 0; i < n; i++) {
        double[][] trainx = Math.slice(x, loocv.train[i]);
        int[] trainy = Math.slice(y, loocv.train[i]);

        RDA rda = new RDA(trainx, trainy, 0.2);
        if (y[loocv.test[i]] != rda.predict(x[loocv.test[i]])) error++;
      }
      System.out.println("RDA (0.2) error = " + error);
      assertEquals(20, error);

      error = 0;
      for (int i = 0; i < n; i++) {
        double[][] trainx = Math.slice(x, loocv.train[i]);
        int[] trainy = Math.slice(y, loocv.train[i]);

        RDA rda = new RDA(trainx, trainy, 0.3);
        if (y[loocv.test[i]] != rda.predict(x[loocv.test[i]])) error++;
      }
      System.out.println("RDA (0.3) error = " + error);
      assertEquals(19, error);

      error = 0;
      for (int i = 0; i < n; i++) {
        double[][] trainx = Math.slice(x, loocv.train[i]);
        int[] trainy = Math.slice(y, loocv.train[i]);

        RDA rda = new RDA(trainx, trainy, 0.4);
        if (y[loocv.test[i]] != rda.predict(x[loocv.test[i]])) error++;
      }
      System.out.println("RDA (0.4) error = " + error);
      assertEquals(16, error);

      error = 0;
      for (int i = 0; i < n; i++) {
        double[][] trainx = Math.slice(x, loocv.train[i]);
        int[] trainy = Math.slice(y, loocv.train[i]);

        RDA rda = new RDA(trainx, trainy, 0.5);
        if (y[loocv.test[i]] != rda.predict(x[loocv.test[i]])) error++;
      }
      System.out.println("RDA (0.5) error = " + error);
      assertEquals(12, error);

      error = 0;
      for (int i = 0; i < n; i++) {
        double[][] trainx = Math.slice(x, loocv.train[i]);
        int[] trainy = Math.slice(y, loocv.train[i]);

        RDA rda = new RDA(trainx, trainy, 0.6);
        if (y[loocv.test[i]] != rda.predict(x[loocv.test[i]])) error++;
      }
      System.out.println("RDA (0.6) error = " + error);
      assertEquals(11, error);

      error = 0;
      for (int i = 0; i < n; i++) {
        double[][] trainx = Math.slice(x, loocv.train[i]);
        int[] trainy = Math.slice(y, loocv.train[i]);

        RDA rda = new RDA(trainx, trainy, 0.7);
        if (y[loocv.test[i]] != rda.predict(x[loocv.test[i]])) error++;
      }
      System.out.println("RDA (0.7) error = " + error);
      assertEquals(9, error);

      error = 0;
      double[] posteriori = new double[3];
      for (int i = 0; i < n; i++) {
        double[][] trainx = Math.slice(x, loocv.train[i]);
        int[] trainy = Math.slice(y, loocv.train[i]);

        RDA rda = new RDA(trainx, trainy, 0.8);
        if (y[loocv.test[i]] != rda.predict(x[loocv.test[i]], posteriori)) error++;

        // System.out.println(posteriori[0]+"\t"+posteriori[1]+"\t"+posteriori[2]);
      }
      System.out.println("RDA (0.8) error = " + error);
      assertEquals(6, error);

      error = 0;
      for (int i = 0; i < n; i++) {
        double[][] trainx = Math.slice(x, loocv.train[i]);
        int[] trainy = Math.slice(y, loocv.train[i]);

        RDA rda = new RDA(trainx, trainy, 0.9);
        if (y[loocv.test[i]] != rda.predict(x[loocv.test[i]])) error++;
      }
      System.out.println("RDA (0.9) error = " + error);
      assertEquals(3, error);

      error = 0;
      for (int i = 0; i < n; i++) {
        double[][] trainx = Math.slice(x, loocv.train[i]);
        int[] trainy = Math.slice(y, loocv.train[i]);

        RDA rda = new RDA(trainx, trainy, 1.0);
        if (y[loocv.test[i]] != rda.predict(x[loocv.test[i]])) error++;
      }
      System.out.println("RDA (1.0) error = " + error);
      assertEquals(4, error);
    } catch (Exception ex) {
      System.err.println(ex);
    }
  }