예제 #1
0
  /**
   * Build the associator on the filtered data.
   *
   * @param data the training data
   * @throws Exception if the Associator could not be built successfully
   */
  public void buildAssociations(Instances data) throws Exception {
    if (m_Associator == null) throw new Exception("No base associator has been set!");

    // create copy and set class-index
    data = new Instances(data);
    if (getClassIndex() == 0) {
      data.setClassIndex(data.numAttributes() - 1);
    } else {
      data.setClassIndex(getClassIndex() - 1);
    }

    if (getClassIndex() != -1) {
      // remove instances with missing class
      data.deleteWithMissingClass();
    }

    m_Filter.setInputFormat(data); // filter capabilities are checked here
    data = Filter.useFilter(data, m_Filter);

    // can associator handle the data?
    getAssociator().getCapabilities().testWithFail(data);

    m_FilteredInstances = data.stringFreeStructure();
    m_Associator.buildAssociations(data);
  }
  /**
   * Method for building this classifier.
   *
   * @param training the training instances
   * @param test the test instances
   * @throws Exception if something goes wrong
   */
  public void buildClassifier(Instances training, Instances test) throws Exception {
    m_ClassifierBuilt = true;
    m_Random = new Random(m_Seed);
    m_Trainset = training;
    m_Testset = test;

    // set class index?
    if ((m_Trainset.classIndex() == -1) || (m_Testset.classIndex() == -1)) {
      m_Trainset.setClassIndex(m_Trainset.numAttributes() - 1);
      m_Testset.setClassIndex(m_Trainset.numAttributes() - 1);
    }

    // are datasets correct?
    checkData();

    // any other data restrictions not met?
    checkRestrictions();

    // generate sets
    generateSets();

    // performs the restarts/iterations
    build();

    m_Random = null;
  }
예제 #3
0
  /**
   * 用分类器测试
   *
   * @param trainFileName
   * @param testFileName
   */
  public static void classify(String trainFileName, String testFileName) {
    try {
      File inputFile = new File(fileName + trainFileName); // 训练语料文件
      ArffLoader atf = new ArffLoader();
      atf.setFile(inputFile);
      Instances instancesTrain = atf.getDataSet(); // 读入训练文件

      // 设置类标签类
      inputFile = new File(fileName + testFileName); // 测试语料文件
      atf.setFile(inputFile);
      Instances instancesTest = atf.getDataSet(); // 读入测试文件

      instancesTest.setClassIndex(instancesTest.numAttributes() - 1);
      instancesTrain.setClassIndex(instancesTrain.numAttributes() - 1);

      classifier = (Classifier) Class.forName(CLASSIFIERNAME).newInstance();
      classifier.buildClassifier(instancesTrain);

      Evaluation eval = new Evaluation(instancesTrain);
      //  第一个为一个训练过的分类器,第二个参数是在某个数据集上评价的数据集
      eval.evaluateModel(classifier, instancesTest);

      System.out.println(eval.toClassDetailsString());
      System.out.println(eval.toSummaryString());
      System.out.println(eval.toMatrixString());
      System.out.println("precision is :" + (1 - eval.errorRate()));

    } catch (Exception e) {
      e.printStackTrace();
    }
  }
  public static void run(String[] args) throws Exception {
    /**
     * *************************************************
     *
     * @param args[0]: train arff path
     * @param args[1]: test arff path
     */
    DataSource source = new DataSource(args[0]);
    Instances data = source.getDataSet();
    data.setClassIndex(data.numAttributes() - 1);
    NaiveBayes model = new NaiveBayes();
    model.buildClassifier(data);

    // Evaluation:
    Evaluation eval = new Evaluation(data);
    Instances testData = new DataSource(args[1]).getDataSet();
    testData.setClassIndex(testData.numAttributes() - 1);
    eval.evaluateModel(model, testData);
    System.out.println(model.toString());
    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
    System.out.println("======\nConfusion Matrix:");
    double[][] confusionM = eval.confusionMatrix();
    for (int i = 0; i < confusionM.length; ++i) {
      for (int j = 0; j < confusionM[i].length; ++j) {
        System.out.format("%10s ", confusionM[i][j]);
      }
      System.out.print("\n");
    }
  }
예제 #5
0
  /** @param args */
  private void Init() {
    testIns.setClassIndex(testIns.numAttributes() - 1);
    labeledIns.setClassIndex(labeledIns.numAttributes() - 1);
    unlabeledIns.setClassIndex(unlabeledIns.numAttributes() - 1);

    class_Array[0] = classifier1;
    class_Array[1] = classifier2;
    class_Array[2] = classifier3;
  }
예제 #6
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -i &lt;the input file&gt;
   * The input file</pre>
   *
   * <pre> -o &lt;the output file&gt;
   * The output file</pre>
   *
   * <pre> -c &lt;the class index&gt;
   * The class index</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    String outputString = Utils.getOption('o', options);
    String inputString = Utils.getOption('i', options);
    String indexString = Utils.getOption('c', options);

    ArffLoader loader = new ArffLoader();

    resetOptions();

    // parse index
    int index = -1;
    if (indexString.length() != 0) {
      if (indexString.equals("first")) index = 0;
      else {
        if (indexString.equals("last")) index = -1;
        else index = Integer.parseInt(indexString);
      }
    }

    if (inputString.length() != 0) {
      try {
        File input = new File(inputString);
        loader.setFile(input);
        Instances inst = loader.getDataSet();
        if (index == -1) inst.setClassIndex(inst.numAttributes() - 1);
        else inst.setClassIndex(index);
        setInstances(inst);
      } catch (Exception ex) {
        throw new IOException(
            "No data set loaded. Data set has to be arff format (Reason: " + ex.toString() + ").");
      }
    } else throw new IOException("No data set to save.");

    if (outputString.length() != 0) {
      // add appropriate file extension
      if (!outputString.endsWith(getFileExtension())) {
        if (outputString.lastIndexOf('.') != -1)
          outputString =
              (outputString.substring(0, outputString.lastIndexOf('.'))) + getFileExtension();
        else outputString = outputString + getFileExtension();
      }
      try {
        File output = new File(outputString);
        setFile(output);
      } catch (Exception ex) {
        throw new IOException("Cannot create output file.");
      }
    }

    if (index == -1) index = getInstances().numAttributes() - 1;
    getInstances().setClassIndex(index);
  }
예제 #7
0
파일: Wavelet.java 프로젝트: dachylong/weka
  /**
   * processes the instances using the HAAR algorithm
   *
   * @param instances the data to process
   * @return the modified data
   * @throws Exception in case the processing goes wrong
   */
  protected Instances processHAAR(Instances instances) throws Exception {
    Instances result;
    int i;
    int n;
    int j;
    int clsIdx;
    double[] oldVal;
    double[] newVal;
    int level;
    int length;
    double[] clsVal;
    Attribute clsAtt;

    clsIdx = instances.classIndex();
    clsVal = null;
    clsAtt = null;
    if (clsIdx > -1) {
      clsVal = instances.attributeToDoubleArray(clsIdx);
      clsAtt = (Attribute) instances.classAttribute().copy();
      instances.setClassIndex(-1);
      instances.deleteAttributeAt(clsIdx);
    }
    result = new Instances(instances, 0);
    level = (int) StrictMath.ceil(StrictMath.log(instances.numAttributes()) / StrictMath.log(2.0));

    for (i = 0; i < instances.numInstances(); i++) {
      oldVal = instances.instance(i).toDoubleArray();
      newVal = new double[oldVal.length];

      for (n = level; n > 0; n--) {
        length = (int) StrictMath.pow(2, n - 1);

        for (j = 0; j < length; j++) {
          newVal[j] = (oldVal[j * 2] + oldVal[j * 2 + 1]) / StrictMath.sqrt(2);
          newVal[j + length] = (oldVal[j * 2] - oldVal[j * 2 + 1]) / StrictMath.sqrt(2);
        }

        System.arraycopy(newVal, 0, oldVal, 0, newVal.length);
      }

      // add new transformed instance
      result.add(new DenseInstance(1, newVal));
    }

    // add class again
    if (clsIdx > -1) {
      result.insertAttributeAt(clsAtt, clsIdx);
      result.setClassIndex(clsIdx);
      for (i = 0; i < clsVal.length; i++) result.instance(i).setClassValue(clsVal[i]);
    }

    return result;
  }
예제 #8
0
  public static Double runClassify(String trainFile, String testFile) {
    double predictOrder = 0.0;
    double trueOrder = 0.0;
    try {
      String trainWekaFileName = trainFile;
      String testWekaFileName = testFile;

      Instances train = DataSource.read(trainWekaFileName);
      Instances test = DataSource.read(testWekaFileName);

      train.setClassIndex(0);
      test.setClassIndex(0);

      train.deleteAttributeAt(8);
      test.deleteAttributeAt(8);
      train.deleteAttributeAt(6);
      test.deleteAttributeAt(6);
      train.deleteAttributeAt(5);
      test.deleteAttributeAt(5);
      train.deleteAttributeAt(4);
      test.deleteAttributeAt(4);

      // AdditiveRegression classifier = new AdditiveRegression();

      // NaiveBayes classifier = new NaiveBayes();

      RandomForest classifier = new RandomForest();
      // LibSVM classifier = new LibSVM();

      classifier.buildClassifier(train);
      Evaluation eval = new Evaluation(train);
      eval.evaluateModel(classifier, test);

      System.out.println(eval.toSummaryString("\nResults\n\n", true));
      // System.out.println(eval.toClassDetailsString());
      // System.out.println(eval.toMatrixString());
      int k = 892;
      for (int i = 0; i < test.numInstances(); i++) {
        predictOrder = classifier.classifyInstance(test.instance(i));
        trueOrder = test.instance(i).classValue();
        System.out.println((k++) + "," + (int) predictOrder);
      }

    } catch (Exception e) {
      e.printStackTrace();
    }
    return predictOrder;
  }
예제 #9
0
  /**
   * Sets instances that should be stored.
   *
   * @param instances the instances
   */
  @Override
  public void setInstances(Instances instances) {
    m_ClassIndex.setUpper(instances.numAttributes() - 1);
    instances.setClassIndex(m_ClassIndex.getIndex());

    super.setInstances(instances);
  }
예제 #10
0
  /**
   * Set the output format. Takes the current average class values and m_InputFormat and calls
   * setOutputFormat(Instances) appropriately.
   */
  private void setOutputFormat() {

    Instances newData;
    FastVector newAtts, newVals;

    // Compute new attributes

    newAtts = new FastVector(getInputFormat().numAttributes());
    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (!m_AttIndices.isInRange(j) || !att.isString()) {

        // We don't have to copy the attribute because the
        // attribute index remains unchanged.
        newAtts.addElement(att);
      } else {

        // Compute list of attribute values
        newVals = new FastVector(att.numValues());
        for (int i = 0; i < att.numValues(); i++) {
          newVals.addElement(att.value(i));
        }
        newAtts.addElement(new Attribute(att.name(), newVals));
      }
    }

    // Construct new header
    newData = new Instances(getInputFormat().relationName(), newAtts, 0);
    newData.setClassIndex(getInputFormat().classIndex());
    setOutputFormat(newData);
  }
예제 #11
0
파일: WekaUtil.java 프로젝트: kulashish/qh
 public static Instances getInstances(String file) throws Exception {
   DataSource datasource = new DataSource(file);
   Instances data = datasource.getDataSet();
   System.out.println("Class index is : " + data.classIndex());
   if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1);
   return data;
 }
  public Instances transformInstances(MultiLabelInstances mlData) throws Exception {
    labelIndices = mlData.getLabelIndices();
    numOfLabels = mlData.getNumLabels();
    Instances data = mlData.getDataSet();

    Instances transformed = new Instances(mlData.getDataSet(), 0);

    // delete all labels
    transformed = RemoveAllLabels.transformInstances(transformed, labelIndices);

    // add single label attribute
    ArrayList<String> classValues = new ArrayList<String>(numOfLabels);
    for (int x = 0; x < numOfLabels; x++) {
      classValues.add("Class" + (x + 1));
    }
    Attribute newClass = new Attribute("Class", classValues);
    transformed.insertAttributeAt(newClass, transformed.numAttributes());
    transformed.setClassIndex(transformed.numAttributes() - 1);

    for (int instanceIndex = 0; instanceIndex < data.numInstances(); instanceIndex++) {
      // System.out.println(data.instance(instanceIndex).toString());
      List<Instance> result = transformInstance(data.instance(instanceIndex));
      for (Instance instance : result) {
        // System.out.println(instance.toString());
        transformed.add(instance);
        // System.out.println(transformed.instance(transformed.numInstances()-1));
      }
    }
    return transformed;
  }
  public static void main(String[] args) throws Exception {
    // TODO Auto-generated method stub
    // String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/Data/audiology.arff";
    // String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/Data/cleveland.arff";
    // String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/Data/colic.arff";
    // String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/Data/credit.arff";
    // String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/Data/dermatology.arff";
    // String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/Data/hepatitis.arff";
    // String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/Data/labor.arff";
    String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/Data/soybean.arff";

    // String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/Data/wine.arff";
    // String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/Data/wdbc.arff";
    Instances m_data = new Instances(new FileReader(fn));
    m_data.setClassIndex(m_data.numAttributes() - 1);

    // SimilarityStyle sstyle = new SStyle_Abs1lambda_VmaxVmin(4);
    SimilarityStyle sstyle = new SStyle_MaxMin();
    ImplicatorTnormStyle itstyle = new ITStyle_KleeneDienes();

    MStyle_ConditionalEntropy mg = new MStyle_ConditionalEntropy(m_data, sstyle, itstyle);
    // mg.getInformation();
    // System.out.println(Arrays.toString(mg.getSelectedAtt()));
    String str1 = Utils.doubleFormat("0.0000", mg.m_useTime) + "s & ";
    String str2 = (mg.m_selectAtt.length - 1) + " & ";
    int[] ans = mg.m_selectAtt.clone();
    String str = "";
    for (int i = 0; i < ans.length - 1; ++i) {
      str += (ans[i] + 1) + ",";
    }
    String str3 = str.substring(0, str.length() - 1) + " \\\\";
    System.out.println(str1 + str2 + str3);
  }
예제 #14
0
  private void jButton1ActionPerformed(
      java.awt.event.ActionEvent evt) { // GEN-FIRST:event_jButton1ActionPerformed
    jDOpen openDialog = new jDOpen(this, true);
    openDialog.show();

    if (openDialog.fileName != "") {
      try {
        BufferedReader reader = new BufferedReader(new FileReader(openDialog.fileName));
        training = new Instances(reader);
        training.setClassIndex(training.numAttributes() - 1);

        jTextField1.setText(openDialog.fileName);
        jTextArea1.append("Dataset changed: ");
        jTextArea1.append(openDialog.fileName);
        jTextArea1.append("\n");

        jButton3.setEnabled(true);
      } catch (Exception ex) {
        jTextField1.setText("");
        jTextArea1.append("Fail to load: '");
        jTextArea1.append(openDialog.fileName);
        jTextArea1.append("' (is it a correct dataset?)\n");

        jButton3.setEnabled(false);
      }
    }
  } // GEN-LAST:event_jButton1ActionPerformed
예제 #15
0
  /**
   * @param args
   * @throws Exception
   */
  public static void main(String[] args) throws Exception {
    // TODO Auto-generated method stub

    oneAlgorithm oneAlg = new oneAlgorithm();
    oneAlg.category = xCategory.RSandFCBFalg;
    oneAlg.style = xStyle.fuzzySU;
    oneAlg.flag = false;
    oneAlg.alpha = 2.0;
    // String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/Data/wine.arff";
    // String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/Data/wdbc.arff";
    String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/Data/glass.arff";
    // String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/shen/wine-shen.arff";
    // String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/fuzzy/fuzzy-ex.arff";
    // String fn = "C:/Users/Eric/Desktop/2011秋冬/Code/Xreducer/data/derm.arff";
    oneFile onef = new oneFile(new File(fn));
    Instances dataset = new Instances(new FileReader(fn));
    dataset.setClassIndex(dataset.numAttributes() - 1);
    onef.ins = dataset.numInstances();
    onef.att = dataset.numAttributes();
    onef.cla = dataset.numClasses();

    RSandFCBFReduceMethod rs = new RSandFCBFReduceMethod(onef, oneAlg);

    boolean[] B = new boolean[rs.NumAttr];
    boolean[] rq = rs.getOneReduction(B);
    System.out.println(Arrays.toString(Utils.boolean2select(rq)));
  }
예제 #16
0
  public weka.core.Instances toWekaInstances() {
    // attributes
    FastVector wattrs = new FastVector();
    Iterator itr = attributes.iterator();
    while (itr.hasNext()) {
      Attribute attr = (Attribute) itr.next();
      wattrs.addElement(attr.toWekaAttribute());
    }
    // data instances
    weka.core.Instances winsts = new weka.core.Instances(name, wattrs, instances.size());
    itr = instances.iterator();

    while (itr.hasNext()) {
      Instance inst = (Instance) itr.next();
      Iterator itrval = inst.getValues().iterator();
      Iterator itrmis = inst.getMissing().iterator();
      double[] vals = new double[wattrs.size()];
      for (int i = 0; i < wattrs.size(); i++) {
        double val = (Double) itrval.next();
        if ((Boolean) itrmis.next()) {
          vals[i] = weka.core.Instance.missingValue();
        } else {
          vals[i] = val;
        }
      }
      weka.core.Instance winst = new weka.core.Instance(1, vals);
      winst.setDataset(winsts);
      winsts.add(winst);
    }
    winsts.setClassIndex(this.class_index);
    return winsts;
  }
예제 #17
0
  /**
   * Determines and returns (if possible) the structure (internally the header) of the data set as
   * an empty set of instances.
   *
   * @return the structure of the data set as an empty set of Instances
   * @throws IOException if an error occurs
   */
  public Instances getStructure() throws IOException {
    if (getDirectory() == null) {
      throw new IOException("No directory/source has been specified");
    }

    // determine class labels, i.e., sub-dirs
    if (m_structure == null) {
      String directoryPath = getDirectory().getAbsolutePath();
      ArrayList<Attribute> atts = new ArrayList<Attribute>();
      ArrayList<String> classes = new ArrayList<String>();

      File dir = new File(directoryPath);
      String[] subdirs = dir.list();

      for (int i = 0; i < subdirs.length; i++) {
        File subdir = new File(directoryPath + File.separator + subdirs[i]);
        if (subdir.isDirectory()) classes.add(subdirs[i]);
      }

      atts.add(new Attribute("text", (ArrayList<String>) null));
      if (m_OutputFilename) atts.add(new Attribute("filename", (ArrayList<String>) null));
      // make sure that the name of the class attribute is unlikely to
      // clash with any attribute created via the StringToWordVector filter
      atts.add(new Attribute("@@class@@", classes));

      String relName = directoryPath.replaceAll("/", "_");
      relName = relName.replaceAll("\\\\", "_").replaceAll(":", "_");
      m_structure = new Instances(relName, atts, 0);
      m_structure.setClassIndex(m_structure.numAttributes() - 1);
    }

    return m_structure;
  }
예제 #18
0
  public static void main(String args[]) throws MWException, FileNotFoundException, IOException {
    ClusterEnsembleJavaclass a = new ClusterEnsembleJavaclass();
    String dataname = "half-rings";
    // String dataname = "Aggregation";
    // String dataname = "Compound";
    // String dataname = "twomoons";
    // String dataname = "Pathbased";
    // String dataname = "Spiral";
    // String dataname = "D31";
    // String dataname = "R15";
    // String dataname = "Flame";

    String path = "C:\\Users\\Eric\\Desktop\\2012Çﶬ\\NO.4\\data\\" + dataname + ".arff";
    Instances data = new Instances(new FileReader(path));
    data.setClassIndex(data.numAttributes() - 1);

    Object[] rhs = new Object[2];
    int n = data.numInstances();
    double[][] pData = new double[2][n];
    for (int i = 0; i < n; ++i) {
      pData[0][i] = data.instance(i).value(0);
      pData[1][i] = data.instance(i).value(1);
    }
    rhs[0] = new MWNumericArray(pData, MWClassID.SINGLE);
    rhs[1] = new MWNumericArray(3, MWClassID.SINGLE);
    Object[] outRes = new Object[1];
    outRes = a.cspa(1, rhs);

    MWNumericArray temp = (MWNumericArray) outRes[0];
    float[] weights = (float[]) temp.toFloatArray();

    System.out.println(Arrays.toString(weights));
  }
  public static void wekaAlgorithms(Instances data) throws Exception {
    classifier = new FilteredClassifier(); // new instance of tree
    classifier.setClassifier(new NaiveBayes());
    //  classifier.setClassifier(new J48());
    // classifier.setClassifier(new RandomForest());

    //	classifier.setClassifier(new ZeroR());
    //  classifier.setClassifier(new NaiveBayes());
    //     classifier.setClassifier(new IBk());

    data.setClassIndex(data.numAttributes() - 1);
    Evaluation eval = new Evaluation(data);

    int folds = 10;
    eval.crossValidateModel(classifier, data, folds, new Random(1));

    System.out.println("===== Evaluating on filtered (training) dataset =====");
    System.out.println(eval.toSummaryString());
    System.out.println(eval.toClassDetailsString());
    double[][] mat = eval.confusionMatrix();
    System.out.println("========= Confusion Matrix =========");
    for (int i = 0; i < mat.length; i++) {
      for (int j = 0; j < mat.length; j++) {

        System.out.print(mat[i][j] + "  ");
      }
      System.out.println(" ");
    }
  }
 /** trains the classifier */
 @Override
 public void train() throws Exception {
   if (_train.classIndex() == -1) _train.setClassIndex(_train.numAttributes() - 1);
   _cl.buildClassifier(_train);
   // evaluate classifier and print some statistics
   evaluate();
 }
  /** tests whether a URL can be loaded (via setURL(URL)). */
  public void testURLSourcedLoader() {
    Instances data;

    if (!(getLoader() instanceof URLSourcedLoader)) {
      return;
    }

    try {
      // save
      m_Saver.setInstances(m_Instances);
      m_Saver.setFile(new File(m_ExportFilename));
      m_Saver.writeBatch();

      // load
      ((URLSourcedLoader) m_Loader).setURL(new File(m_ExportFilename).toURI().toURL().toString());
      data = m_Loader.getDataSet();

      // compare data
      try {
        if (m_Instances.classIndex() != data.classIndex()) {
          data.setClassIndex(m_Instances.classIndex());
        }
        compareDatasets(m_Instances, data);
      } catch (Exception e) {
        fail("URL load failed (datasets differ): " + e.toString());
      }
    } catch (Exception e) {
      e.printStackTrace();
      fail("URL load failed: " + e.toString());
    }
  }
  /** tests whether data can be loaded via setSource() with a file stream. */
  public void testLoaderWithStream() {
    Instances data;

    try {
      // save
      m_Saver.setInstances(m_Instances);
      m_Saver.setFile(new File(m_ExportFilename));
      m_Saver.writeBatch();

      // load
      m_Loader.setSource(new FileInputStream(new File(m_ExportFilename)));
      data = m_Loader.getDataSet();

      // compare data
      try {
        if (m_Instances.classIndex() != data.classIndex()) {
          data.setClassIndex(m_Instances.classIndex());
        }
        compareDatasets(m_Instances, data);
      } catch (Exception e) {
        fail("File stream loading failed (datasets differ): " + e.toString());
      }
    } catch (Exception e) {
      e.printStackTrace();
      fail("File stream loading failed: " + e.toString());
    }
  }
  /** test the batch saving/loading (via setFile(File)). */
  public void testBatch() {
    Instances data;

    try {
      // save
      m_Saver.setInstances(m_Instances);
      m_Saver.setFile(new File(m_ExportFilename));
      m_Saver.writeBatch();

      // load
      ((AbstractFileLoader) m_Loader).setFile(new File(m_ExportFilename));
      data = m_Loader.getDataSet();

      // compare data
      try {
        if (m_Instances.classIndex() != data.classIndex()) {
          data.setClassIndex(m_Instances.classIndex());
        }
        compareDatasets(m_Instances, data);
      } catch (Exception e) {
        fail("Incremental load failed (datasets differ): " + e.toString());
      }
    } catch (Exception e) {
      e.printStackTrace();
      fail("Batch save/load failed: " + e.toString());
    }
  }
예제 #24
0
  /**
   * Tests the ThresholdCurve generation from the command line. The classifier is currently
   * hardcoded. Pipe in an arff file.
   *
   * @param args currently ignored
   */
  public static void main(String[] args) {

    try {

      Instances inst = new Instances(new java.io.InputStreamReader(System.in));
      if (false) {
        System.out.println(ThresholdCurve.getNPointPrecision(inst, 11));
      } else {
        inst.setClassIndex(inst.numAttributes() - 1);
        ThresholdCurve tc = new ThresholdCurve();
        EvaluationUtils eu = new EvaluationUtils();
        Classifier classifier = new weka.classifiers.functions.Logistic();
        FastVector predictions = new FastVector();
        for (int i = 0; i < 2; i++) { // Do two runs.
          eu.setSeed(i);
          predictions.appendElements(eu.getCVPredictions(classifier, inst, 10));
          // System.out.println("\n\n\n");
        }
        Instances result = tc.getCurve(predictions);
        System.out.println(result);
      }
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }
예제 #25
0
  /**
   * Sets the format of the input instances.
   *
   * @param instanceInfo an Instances object containing the input instance structure (any instances
   *     contained in the object are ignored - only the structure is required).
   * @return true if the outputFormat may be collected immediately
   * @throws UnsupportedAttributeTypeException if selected attributes are not numeric or nominal.
   */
  public boolean setInputFormat(Instances instanceInfo) throws Exception {

    if ((instanceInfo.classIndex() > 0) && (!getFillWithMissing())) {
      throw new IllegalArgumentException(
          "TimeSeriesTranslate: Need to fill in missing values "
              + "using appropriate option when class index is set.");
    }
    super.setInputFormat(instanceInfo);
    // Create the output buffer
    Instances outputFormat = new Instances(instanceInfo, 0);
    for (int i = 0; i < instanceInfo.numAttributes(); i++) {
      if (i != instanceInfo.classIndex()) {
        if (m_SelectedCols.isInRange(i)) {
          if (outputFormat.attribute(i).isNominal() || outputFormat.attribute(i).isNumeric()) {
            outputFormat.renameAttribute(
                i,
                outputFormat.attribute(i).name()
                    + (m_InstanceRange < 0 ? '-' : '+')
                    + Math.abs(m_InstanceRange));
          } else {
            throw new UnsupportedAttributeTypeException(
                "Only numeric and nominal attributes may be " + " manipulated in time series.");
          }
        }
      }
    }
    outputFormat.setClassIndex(instanceInfo.classIndex());
    setOutputFormat(outputFormat);
    return true;
  }
  /**
   * Adds the prediction intervals as additional attributes at the end. Since classifiers can
   * returns varying number of intervals per instance, the dataset is filled with missing values for
   * non-existing intervals.
   */
  protected void addPredictionIntervals() {
    int maxNum;
    int num;
    int i;
    int n;
    FastVector preds;
    FastVector atts;
    Instances data;
    Instance inst;
    Instance newInst;
    double[] values;
    double[][] predInt;

    // determine the maximum number of intervals
    maxNum = 0;
    preds = m_Evaluation.predictions();
    for (i = 0; i < preds.size(); i++) {
      num = ((NumericPrediction) preds.elementAt(i)).predictionIntervals().length;
      if (num > maxNum) maxNum = num;
    }

    // create new header
    atts = new FastVector();
    for (i = 0; i < m_PlotInstances.numAttributes(); i++)
      atts.addElement(m_PlotInstances.attribute(i));
    for (i = 0; i < maxNum; i++) {
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-lowerBoundary"));
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-upperBoundary"));
      atts.addElement(new Attribute("predictionInterval_" + (i + 1) + "-width"));
    }
    data = new Instances(m_PlotInstances.relationName(), atts, m_PlotInstances.numInstances());
    data.setClassIndex(m_PlotInstances.classIndex());

    // update data
    for (i = 0; i < m_PlotInstances.numInstances(); i++) {
      inst = m_PlotInstances.instance(i);
      // copy old values
      values = new double[data.numAttributes()];
      System.arraycopy(inst.toDoubleArray(), 0, values, 0, inst.numAttributes());
      // add interval data
      predInt = ((NumericPrediction) preds.elementAt(i)).predictionIntervals();
      for (n = 0; n < maxNum; n++) {
        if (n < predInt.length) {
          values[m_PlotInstances.numAttributes() + n * 3 + 0] = predInt[n][0];
          values[m_PlotInstances.numAttributes() + n * 3 + 1] = predInt[n][1];
          values[m_PlotInstances.numAttributes() + n * 3 + 2] = predInt[n][1] - predInt[n][0];
        } else {
          values[m_PlotInstances.numAttributes() + n * 3 + 0] = Utils.missingValue();
          values[m_PlotInstances.numAttributes() + n * 3 + 1] = Utils.missingValue();
          values[m_PlotInstances.numAttributes() + n * 3 + 2] = Utils.missingValue();
        }
      }
      // create new Instance
      newInst = new DenseInstance(inst.weight(), values);
      data.add(newInst);
    }

    m_PlotInstances = data;
  }
예제 #27
0
  public void run() throws Exception {
    BufferedReader datafileclassificationpickup =
        readDataFile(Config.outputPath() + "DaysPickUpClassification.txt");
    BufferedReader datafileclassificationdropoff =
        readDataFile(Config.outputPath() + "DaysDropOffClassification.txt");
    BufferedReader datafileregresssionpickup =
        readDataFile(Config.outputPath() + "DaysPickUpRegression.txt");
    BufferedReader datafileregresssiondropoff =
        readDataFile(Config.outputPath() + "DaysDropOffRegression.txt");

    dataclassificationpickup = new Instances(datafileclassificationpickup);
    dataclassificationpickup.setClassIndex(dataclassificationpickup.numAttributes() - 1);

    dataclassificationdropoff = new Instances(datafileclassificationdropoff);
    dataclassificationdropoff.setClassIndex(dataclassificationdropoff.numAttributes() - 1);

    dataregressionpickup = new Instances(datafileregresssionpickup);
    dataregressionpickup.setClassIndex(dataregressionpickup.numAttributes() - 1);

    dataregressiondropoff = new Instances(datafileregresssiondropoff);
    dataregressiondropoff.setClassIndex(dataregressiondropoff.numAttributes() - 1);

    System.out.println("KNN classification model");
    ibkclassificationpickup = new IBk(10);
    ibkclassificationpickup.buildClassifier(dataclassificationpickup);
    ibkclassificationdropoff = new IBk(10);
    ibkclassificationdropoff.buildClassifier(dataclassificationdropoff);
    System.out.println("Classification Model Ready");

    System.out.println("KNN regression model");
    ibkregressionpickup = new IBk(10);
    ibkregressionpickup.buildClassifier(dataregressionpickup);
    ibkregressiondropoff = new IBk(10);
    ibkregressiondropoff.buildClassifier(dataregressiondropoff);
    System.out.println("Regression Model Ready");

    instclassificationpickup = new DenseInstance(9);
    instclassificationpickup.setDataset(dataclassificationpickup);
    instclassificationdropoff = new DenseInstance(9);
    instclassificationdropoff.setDataset(dataclassificationdropoff);
    instregressionpickup = new DenseInstance(9);
    instregressionpickup.setDataset(dataregressionpickup);
    instregressiondropoff = new DenseInstance(9);
    instregressiondropoff.setDataset(dataregressiondropoff);
    System.out.println("Models ready");
  }
예제 #28
0
 /**
  * Read the sparse feature vector data from the data file and convert it into the Weka's instance
  * format.
  */
 public void readSparseFVsFromFile(
     File dataFile, int numDocs, boolean trainingMode, int numLabels, boolean surroundMode) {
   int numFeats = 0;
   int numClasses = 0;
   labelsFVDoc = new LabelsOfFeatureVectorDoc[numDocs];
   // Read the sparse FVs by using the method in MultiClassLearning class
   MultiClassLearning multiClassL = new MultiClassLearning();
   boolean isUsingDataFile = false;
   File tempFVDataFile = null;
   multiClassL.getDataFromFile(numDocs, dataFile, isUsingDataFile, tempFVDataFile);
   // Create the attributes.
   numFeats = multiClassL.dataFVinDoc.getTotalNumFeatures();
   FastVector attributes = new FastVector(numFeats + 1);
   for (int i = 0; i < numFeats; ++i)
     attributes.addElement(new Attribute(new Integer(i + 1).toString()));
   // Add class attribute.
   if (surroundMode) numClasses = 2 * numLabels + 1; // count the null too, as value -1.
   else numClasses = numLabels + 1;
   FastVector classValues = new FastVector(numClasses);
   classValues.addElement("-1"); // The first class for null class
   for (int i = 1; i < numClasses; ++i) classValues.addElement(new Integer(i).toString());
   attributes.addElement(new Attribute("Class", classValues));
   // Create the dataset with capacity of all FVs (but actuall number of FVs
   // mabe be larger than the pre-specified, because possible multi-label) and
   // set index of class
   instancesData =
       new Instances("SparseFVsData", attributes, multiClassL.dataFVinDoc.getNumTraining());
   instancesData.setClassIndex(instancesData.numAttributes() - 1);
   // Copy the data into the instance;
   for (int iDoc = 0; iDoc < multiClassL.dataFVinDoc.getNumTrainingDocs(); ++iDoc) {
     SparseFeatureVector[] fvs = multiClassL.dataFVinDoc.trainingFVinDoc[iDoc].getFvs();
     labelsFVDoc[iDoc] = new LabelsOfFeatureVectorDoc();
     labelsFVDoc[iDoc].multiLabels = multiClassL.dataFVinDoc.labelsFVDoc[iDoc].multiLabels;
     for (int i = 0; i < fvs.length; ++i) {
       // Object valueO = fvs[i].getValues();
       double[] values = new double[fvs[i].getLen()];
       int[] indexes = new int[fvs[i].getLen()];
       for (int j = 0; j < fvs[i].getLen(); ++j) {
         // values[j] = (double)fvs[i].values[j];
         values[j] = fvs[i].nodes[j].value;
         indexes[j] = fvs[i].nodes[j].index;
       }
       SparseInstance inst = new SparseInstance(1.0, values, indexes, 50000);
       inst.setDataset(instancesData);
       if (trainingMode && labelsFVDoc[iDoc].multiLabels[i].num > 0)
         for (int j1 = 0; j1 < labelsFVDoc[iDoc].multiLabels[i].num; ++j1) {
           inst.setClassValue((labelsFVDoc[iDoc].multiLabels[i].labels[j1])); // label
           // >0
           instancesData.add(inst);
         }
       else {
         inst.setClassValue("-1"); // set label as -1 for null
         instancesData.add(inst);
       }
     }
   }
   return;
 }
 /** evaluates the classifier */
 @Override
 public void evaluate() throws Exception {
   // evaluate classifier and print some statistics
   if (_test.classIndex() == -1) _test.setClassIndex(_test.numAttributes() - 1);
   Evaluation eval = new Evaluation(_train);
   eval.evaluateModel(_cl, _test);
   System.out.println(eval.toSummaryString("\nResults\n======\n", false));
   System.out.println(eval.toMatrixString());
 }
 public static void main(String[] args) throws Exception {
   BufferedReader reader = new BufferedReader(new FileReader("PCAin.arff"));
   Instances data = new Instances(reader);
   reader.close();
   if (data.classIndex() == -1) {
     data.setClassIndex(data.numAttributes() - 1);
   }
   pca(data);
 }