Java Instancesの例

プログラミング言語: Java

名前空間/パッケージ名: weka.core

クラス/型: Instances

hotexamples.comのコード掲載数: 30

weka.core.Instancesは、Wekaライブラリーの一部であり、機械学習のためのJavaベースのデータセットクラスです。このクラスは、データの読み込み、保存、操作、および変換を行うための機能を提供します。また、データセットの属性やインスタンスの情報を取得し、変更することもできます。weka.core.Instancesは、さまざまな形式のデータセットファイルをサポートしており、機械学習のタスクで広く使用されています。このクラスは、Wekaフレームワークの中心的な役割を果たし、データの前処理やモデルのトレーニングに不可欠なクラスです。

Java Instances - 30件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたJavaのweka.core.Instancesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

add(30)

deleteWithMissingClass(30)

numClasses(30)

numAttributes(30)

setClassIndex(30)

instance(30)

numInstances(30)

classIndex(30)

classAttribute(30)

attribute(30)

relationName(19)

enumerateInstances(17)

setClass(13)

get(12)

insertAttributeAt(11)

delete(11)

meanOrMode(11)

randomize(10)

stringFreeStructure(9)

testCV(8)

attributeToDoubleArray(8)

equalHeaders(8)

sumOfWeights(7)

toString(7)

deleteAttributeAt(7)

enumerateAttributes(7)

trainCV(6)

size(6)

setRelationName(5)

stratify(5)

mergeInstances(5)

variance(5)

checkForStringAttributes(5)

equalHeadersMsg(4)

deleteStringAttributes(3)

getRandomNumberGenerator(3)

firstInstance(3)

numDistinctValues(3)

attributeStats(2)

toSummaryString(2)

clear(2)

readInstance(2)

sort(2)

deleteWithMissing(1)

lastInstance(1)

swap(1)

appendAttribute(1)

resampleWithWeights(1)

renameAttribute(1)

remove(1)

コード例 #1

ファイルを表示

ファイル: LWL.java プロジェクト: alishakiba/jDenetX

  /**
   * Generates the classifier.
   *
   * @param instances set of instances serving as training data
   * @throws Exception if the classifier has not been generated successfully
   */
  public void buildClassifier(Instances instances) throws Exception {

    if (!(m_Classifier instanceof WeightedInstancesHandler)) {
      throw new IllegalArgumentException("Classifier must be a " + "WeightedInstancesHandler!");
    }

    // can classifier handle the data?
    getCapabilities().testWithFail(instances);

    // remove instances with missing class
    instances = new Instances(instances);
    instances.deleteWithMissingClass();

    // only class? -> build ZeroR model
    if (instances.numAttributes() == 1) {
      System.err.println(
          "Cannot build model (only class attribute present in data!), "
              + "using ZeroR model instead!");
      m_ZeroR = new weka.classifiers.rules.ZeroR();
      m_ZeroR.buildClassifier(instances);
      return;
    } else {
      m_ZeroR = null;
    }

    m_Train = new Instances(instances, 0, instances.numInstances());

    m_NNSearch.setInstances(m_Train);
  }

コード例 #2

ファイルを表示

ファイル: ClassifierSplitModel.java プロジェクト: 0x0539/weka

  /**
   * Splits the given set of instances into subsets.
   *
   * @exception Exception if something goes wrong
   */
  public final Instances[] split(Instances data) throws Exception {

    Instances[] instances = new Instances[m_numSubsets];
    double[] weights;
    double newWeight;
    Instance instance;
    int subset, i, j;

    for (j = 0; j < m_numSubsets; j++)
      instances[j] = new Instances((Instances) data, data.numInstances());
    for (i = 0; i < data.numInstances(); i++) {
      instance = ((Instances) data).instance(i);
      weights = weights(instance);
      subset = whichSubset(instance);
      if (subset > -1) instances[subset].add(instance);
      else
        for (j = 0; j < m_numSubsets; j++)
          if (Utils.gr(weights[j], 0)) {
            newWeight = weights[j] * instance.weight();
            instances[j].add(instance);
            instances[j].lastInstance().setWeight(newWeight);
          }
    }
    for (j = 0; j < m_numSubsets; j++) instances[j].compactify();

    return instances;
  }

コード例 #3

ファイルを表示

ファイル: ResultsPanel.java プロジェクト: huangwen87/mdrill

  /**
   * Returns a vector with column names of the dataset, listed in "list". If a column cannot be
   * found or the list is empty the ones from the default list are returned.
   *
   * @param list comma-separated list of attribute names
   * @param defaultList the default list of attribute names
   * @param inst the instances to get the attribute names from
   * @return a vector containing attribute names
   */
  protected Vector determineColumnNames(String list, String defaultList, Instances inst) {
    Vector result;
    Vector atts;
    StringTokenizer tok;
    int i;
    String item;

    // get attribute names
    atts = new Vector();
    for (i = 0; i < inst.numAttributes(); i++) atts.add(inst.attribute(i).name().toLowerCase());

    // process list
    result = new Vector();
    tok = new StringTokenizer(list, ",");
    while (tok.hasMoreTokens()) {
      item = tok.nextToken().toLowerCase();
      if (atts.contains(item)) {
        result.add(item);
      } else {
        result.clear();
        break;
      }
    }

    // do we have to return defaults?
    if (result.size() == 0) {
      tok = new StringTokenizer(defaultList, ",");
      while (tok.hasMoreTokens()) result.add(tok.nextToken().toLowerCase());
    }

    return result;
  }

コード例 #4

ファイルを表示

ファイル: NominalToBinaryTest.java プロジェクト: AustinRP/parallel-weka-project

 public void testTypical() {
   Instances result = useFilter();
   // Number of attributes and instances shouldn't change
   assertEquals(m_Instances.numAttributes() + 5, result.numAttributes());
   assertEquals(m_Instances.numInstances(), result.numInstances());
   // Eibe can enhance this to check the binarizing is correct.
 }

コード例 #5

ファイルを表示

ファイル: Decorate.java プロジェクト: paolopavan/cfr

  /**
   * Generate artificial training examples.
   *
   * @param artSize size of examples set to create
   * @param data training data
   * @return the set of unlabeled artificial examples
   */
  protected Instances generateArtificialData(int artSize, Instances data) {
    int numAttributes = data.numAttributes();
    Instances artData = new Instances(data, artSize);
    double[] att;
    Instance artInstance;

    for (int i = 0; i < artSize; i++) {
      att = new double[numAttributes];
      for (int j = 0; j < numAttributes; j++) {
        if (data.attribute(j).isNominal()) {
          // Select nominal value based on the frequency of occurence in the training data
          double[] stats = (double[]) m_AttributeStats.get(j);
          att[j] = (double) selectIndexProbabilistically(stats);
        } else if (data.attribute(j).isNumeric()) {
          // Generate numeric value from the Guassian distribution
          // defined by the mean and std dev of the attribute
          double[] stats = (double[]) m_AttributeStats.get(j);
          att[j] = (m_Random.nextGaussian() * stats[1]) + stats[0];
        } else System.err.println("Decorate can only handle numeric and nominal values.");
      }
      artInstance = new Instance(1.0, att);
      artData.add(artInstance);
    }
    return artData;
  }

コード例 #6

ファイルを表示

ファイル: MLEfficiency.java プロジェクト: bharatviswa504/MachineLearning-Music-Classification

  public static void wekaAlgorithms(Instances data) throws Exception {
    classifier = new FilteredClassifier(); // new instance of tree
    classifier.setClassifier(new NaiveBayes());
    //  classifier.setClassifier(new J48());
    // classifier.setClassifier(new RandomForest());

    //	classifier.setClassifier(new ZeroR());
    //  classifier.setClassifier(new NaiveBayes());
    //     classifier.setClassifier(new IBk());

    data.setClassIndex(data.numAttributes() - 1);
    Evaluation eval = new Evaluation(data);

    int folds = 10;
    eval.crossValidateModel(classifier, data, folds, new Random(1));

    System.out.println("===== Evaluating on filtered (training) dataset =====");
    System.out.println(eval.toSummaryString());
    System.out.println(eval.toClassDetailsString());
    double[][] mat = eval.confusionMatrix();
    System.out.println("========= Confusion Matrix =========");
    for (int i = 0; i < mat.length; i++) {
      for (int j = 0; j < mat.length; j++) {

        System.out.print(mat[i][j] + "  ");
      }
      System.out.println(" ");
    }
  }

コード例 #7

ファイルを表示

ファイル: WekaClassifier.java プロジェクト: zeitgeist87/Sentiment-Analysis-on-OpenStack

 /** trains the classifier */
 @Override
 public void train() throws Exception {
   if (_train.classIndex() == -1) _train.setClassIndex(_train.numAttributes() - 1);
   _cl.buildClassifier(_train);
   // evaluate classifier and print some statistics
   evaluate();
 }

コード例 #8

ファイルを表示

ファイル: RegOptimizer.java プロジェクト: caglar/CollectiveLLearn

  /**
   * initializes the algorithm
   *
   * @param data the data to work with
   * @throws Exception if m_SVM is null
   */
  protected void init(Instances data) throws Exception {
    if (m_SVM == null) {
      throw new Exception("SVM not initialized in optimizer. Use RegOptimizer.setSVMReg()");
    }
    m_C = m_SVM.getC();
    m_data = data;
    m_classIndex = data.classIndex();
    m_nInstances = data.numInstances();

    // Initialize kernel
    m_kernel = Kernel.makeCopy(m_SVM.getKernel());
    m_kernel.buildKernel(data);

    // init m_target
    m_target = new double[m_nInstances];
    for (int i = 0; i < m_nInstances; i++) {
      m_target[i] = data.instance(i).classValue();
    }

    m_random = new Random(m_nSeed);

    //		initialize alpha and alpha* array to all zero
    m_alpha = new double[m_target.length];
    m_alphaStar = new double[m_target.length];

    m_supportVectors = new SMOset(m_nInstances);

    m_b = 0.0;
    m_nEvals = 0;
    m_nCacheHits = -1;
  }

コード例 #9

ファイルを表示

ファイル: RegOptimizer.java プロジェクト: caglar/CollectiveLLearn

  /**
   * wrap up various variables to save memeory and do some housekeeping after optimization has
   * finished.
   *
   * @throws Exception if something goes wrong
   */
  protected void wrapUp() throws Exception {
    m_target = null;

    m_nEvals = m_kernel.numEvals();
    m_nCacheHits = m_kernel.numCacheHits();

    if ((m_SVM.getKernel() instanceof PolyKernel)
        && ((PolyKernel) m_SVM.getKernel()).getExponent() == 1.0) {
      // convert alpha's to weights
      double[] weights = new double[m_data.numAttributes()];
      for (int k = m_supportVectors.getNext(-1); k != -1; k = m_supportVectors.getNext(k)) {
        for (int j = 0; j < weights.length; j++) {
          if (j != m_classIndex) {
            weights[j] += (m_alpha[k] - m_alphaStar[k]) * m_data.instance(k).value(j);
          }
        }
      }
      m_weights = weights;

      // release memory
      m_alpha = null;
      m_alphaStar = null;
      m_kernel = null;
    }
    m_bModelBuilt = true;
  }

コード例 #10

ファイルを表示

ファイル: WekaTest.java プロジェクト: fsteeg/tm2

  /**
   * @param args
   * @throws Exception
   */
  public static void main(String[] args) throws Exception {
    Instances isTrainingSet = createSet(4);
    Instance instance1 = createInstance(new double[] {1, 0.7, 0.1, 0.7}, "S1", isTrainingSet);
    Instance instance2 = createInstance(new double[] {0.1, 0.2, 1, 0.3}, "S2", isTrainingSet);
    Instance instance22 = createInstance(new double[] {0, 0, 0, 0}, "S3", isTrainingSet);
    isTrainingSet.add(instance1);
    isTrainingSet.add(instance2);
    isTrainingSet.add(instance22);
    Instances isTestingSet = createSet(4);
    Instance instance3 = createInstance(new double[] {1, 0.7, 0.1, 0.7}, "S1", isTrainingSet);
    Instance instance4 = createInstance(new double[] {0.1, 0.2, 1, 0.3}, "S2", isTrainingSet);
    isTestingSet.add(instance3);
    isTestingSet.add(instance4);

    // Create a naïve bayes classifier
    Classifier cModel = (Classifier) new BayesNet(); // M5P
    cModel.buildClassifier(isTrainingSet);

    // Test the model
    Evaluation eTest = new Evaluation(isTrainingSet);
    eTest.evaluateModel(cModel, isTestingSet);

    // Print the result à la Weka explorer:
    String strSummary = eTest.toSummaryString();
    System.out.println(strSummary);

    // Get the likelihood of each classes
    // fDistribution[0] is the probability of being “positive”
    // fDistribution[1] is the probability of being “negative”
    double[] fDistribution = cModel.distributionForInstance(instance4);
    for (int i = 0; i < fDistribution.length; i++) {
      System.out.println(fDistribution[i]);
    }
  }

コード例 #11

ファイルを表示

ファイル: FilteredAssociator.java プロジェクト: huangwen87/mdrill

  /**
   * Build the associator on the filtered data.
   *
   * @param data the training data
   * @throws Exception if the Associator could not be built successfully
   */
  public void buildAssociations(Instances data) throws Exception {
    if (m_Associator == null) throw new Exception("No base associator has been set!");

    // create copy and set class-index
    data = new Instances(data);
    if (getClassIndex() == 0) {
      data.setClassIndex(data.numAttributes() - 1);
    } else {
      data.setClassIndex(getClassIndex() - 1);
    }

    if (getClassIndex() != -1) {
      // remove instances with missing class
      data.deleteWithMissingClass();
    }

    m_Filter.setInputFormat(data); // filter capabilities are checked here
    data = Filter.useFilter(data, m_Filter);

    // can associator handle the data?
    getAssociator().getCapabilities().testWithFail(data);

    m_FilteredInstances = data.stringFreeStructure();
    m_Associator.buildAssociations(data);
  }

コード例 #12

ファイルを表示

ファイル: TextDirectoryLoader.java プロジェクト: huangwen87/mdrill

  /**
   * Determines and returns (if possible) the structure (internally the header) of the data set as
   * an empty set of instances.
   *
   * @return the structure of the data set as an empty set of Instances
   * @throws IOException if an error occurs
   */
  public Instances getStructure() throws IOException {
    if (getDirectory() == null) {
      throw new IOException("No directory/source has been specified");
    }

    // determine class labels, i.e., sub-dirs
    if (m_structure == null) {
      String directoryPath = getDirectory().getAbsolutePath();
      ArrayList<Attribute> atts = new ArrayList<Attribute>();
      ArrayList<String> classes = new ArrayList<String>();

      File dir = new File(directoryPath);
      String[] subdirs = dir.list();

      for (int i = 0; i < subdirs.length; i++) {
        File subdir = new File(directoryPath + File.separator + subdirs[i]);
        if (subdir.isDirectory()) classes.add(subdirs[i]);
      }

      atts.add(new Attribute("text", (ArrayList<String>) null));
      if (m_OutputFilename) atts.add(new Attribute("filename", (ArrayList<String>) null));
      // make sure that the name of the class attribute is unlikely to
      // clash with any attribute created via the StringToWordVector filter
      atts.add(new Attribute("@@class@@", classes));

      String relName = directoryPath.replaceAll("/", "_");
      relName = relName.replaceAll("\\\\", "_").replaceAll(":", "_");
      m_structure = new Instances(relName, atts, 0);
      m_structure.setClassIndex(m_structure.numAttributes() - 1);
    }

    return m_structure;
  }

コード例 #13

ファイルを表示

ファイル: LibSVMSaver.java プロジェクト: daniyar-artykov/j2ee

  /**
   * Sets instances that should be stored.
   *
   * @param instances the instances
   */
  @Override
  public void setInstances(Instances instances) {
    m_ClassIndex.setUpper(instances.numAttributes() - 1);
    instances.setClassIndex(m_ClassIndex.getIndex());

    super.setInstances(instances);
  }

コード例 #14

ファイルを表示

ファイル: LocalScoreSearchAlgorithm.java プロジェクト: ngphloc/zebra

  private double calcNodeScorePlain(int nNode) {
    Instances instances = m_BayesNet.m_Instances;
    ParentSet oParentSet = m_BayesNet.getParentSet(nNode);

    // determine cardinality of parent set & reserve space for frequency counts
    int nCardinality = oParentSet.getCardinalityOfParents();
    int numValues = instances.attribute(nNode).numValues();
    int[] nCounts = new int[nCardinality * numValues];

    // initialize (don't need this?)
    for (int iParent = 0; iParent < nCardinality * numValues; iParent++) {
      nCounts[iParent] = 0;
    }

    // estimate distributions
    Enumeration enumInsts = instances.enumerateInstances();

    while (enumInsts.hasMoreElements()) {
      Instance instance = (Instance) enumInsts.nextElement();

      // updateClassifier;
      double iCPT = 0;

      for (int iParent = 0; iParent < oParentSet.getNrOfParents(); iParent++) {
        int nParent = oParentSet.getParent(iParent);

        iCPT = iCPT * instances.attribute(nParent).numValues() + instance.value(nParent);
      }

      nCounts[numValues * ((int) iCPT) + (int) instance.value(nNode)]++;
    }

    return calcScoreOfCounts(nCounts, nCardinality, numValues, instances);
  } // CalcNodeScore

コード例 #15

ファイルを表示

ファイル: AbstractFileConverterTest.java プロジェクト: CSLeicester/weka

  /** tests whether a URL can be loaded (via setURL(URL)). */
  public void testURLSourcedLoader() {
    Instances data;

    if (!(getLoader() instanceof URLSourcedLoader)) {
      return;
    }

    try {
      // save
      m_Saver.setInstances(m_Instances);
      m_Saver.setFile(new File(m_ExportFilename));
      m_Saver.writeBatch();

      // load
      ((URLSourcedLoader) m_Loader).setURL(new File(m_ExportFilename).toURI().toURL().toString());
      data = m_Loader.getDataSet();

      // compare data
      try {
        if (m_Instances.classIndex() != data.classIndex()) {
          data.setClassIndex(m_Instances.classIndex());
        }
        compareDatasets(m_Instances, data);
      } catch (Exception e) {
        fail("URL load failed (datasets differ): " + e.toString());
      }
    } catch (Exception e) {
      e.printStackTrace();
      fail("URL load failed: " + e.toString());
    }
  }

コード例 #16

ファイルを表示

ファイル: Driver.java プロジェクト: illes/multimodal

  private static void writePredictedDistributions(
      Classifier c, Instances data, int idIndex, Writer out) throws Exception {
    // header
    out.write("id");
    for (int i = 0; i < data.numClasses(); i++) {
      out.write(",\"");
      out.write(data.classAttribute().value(i).replaceAll("[\"\\\\]", "_"));
      out.write("\"");
    }
    out.write("\n");

    // data
    for (int i = 0; i < data.numInstances(); i++) {
      final String id = data.instance(i).stringValue(idIndex);
      double[] distribution = c.distributionForInstance(data.instance(i));

      // final String label = data.attribute(classIndex).value();
      out.write(id);
      for (double probability : distribution) {
        out.write(",");
        out.write(String.valueOf(probability > 1e-5 ? (float) probability : 0f));
      }
      out.write("\n");
    }
  }

コード例 #17

ファイルを表示

ファイル: AbstractFileConverterTest.java プロジェクト: CSLeicester/weka

  /** tests whether data can be loaded via setSource() with a file stream. */
  public void testLoaderWithStream() {
    Instances data;

    try {
      // save
      m_Saver.setInstances(m_Instances);
      m_Saver.setFile(new File(m_ExportFilename));
      m_Saver.writeBatch();

      // load
      m_Loader.setSource(new FileInputStream(new File(m_ExportFilename)));
      data = m_Loader.getDataSet();

      // compare data
      try {
        if (m_Instances.classIndex() != data.classIndex()) {
          data.setClassIndex(m_Instances.classIndex());
        }
        compareDatasets(m_Instances, data);
      } catch (Exception e) {
        fail("File stream loading failed (datasets differ): " + e.toString());
      }
    } catch (Exception e) {
      e.printStackTrace();
      fail("File stream loading failed: " + e.toString());
    }
  }

コード例 #18

ファイルを表示

ファイル: TimeSeriesTranslate.java プロジェクト: SuperWan/weka

  /**
   * Sets the format of the input instances.
   *
   * @param instanceInfo an Instances object containing the input instance structure (any instances
   *     contained in the object are ignored - only the structure is required).
   * @return true if the outputFormat may be collected immediately
   * @throws UnsupportedAttributeTypeException if selected attributes are not numeric or nominal.
   */
  public boolean setInputFormat(Instances instanceInfo) throws Exception {

    if ((instanceInfo.classIndex() > 0) && (!getFillWithMissing())) {
      throw new IllegalArgumentException(
          "TimeSeriesTranslate: Need to fill in missing values "
              + "using appropriate option when class index is set.");
    }
    super.setInputFormat(instanceInfo);
    // Create the output buffer
    Instances outputFormat = new Instances(instanceInfo, 0);
    for (int i = 0; i < instanceInfo.numAttributes(); i++) {
      if (i != instanceInfo.classIndex()) {
        if (m_SelectedCols.isInRange(i)) {
          if (outputFormat.attribute(i).isNominal() || outputFormat.attribute(i).isNumeric()) {
            outputFormat.renameAttribute(
                i,
                outputFormat.attribute(i).name()
                    + (m_InstanceRange < 0 ? '-' : '+')
                    + Math.abs(m_InstanceRange));
          } else {
            throw new UnsupportedAttributeTypeException(
                "Only numeric and nominal attributes may be " + " manipulated in time series.");
          }
        }
      }
    }
    outputFormat.setClassIndex(instanceInfo.classIndex());
    setOutputFormat(outputFormat);
    return true;
  }

コード例 #19

ファイルを表示

ファイル: DecisionAnalyzer.java プロジェクト: CaoAo/BeehiveZ

  /**
   * Analyses the given list of decision points according to the context specified. Furthermore, the
   * context is provided with some visualization of the analysis result.
   *
   * @param decisionPoints the list of decision points to be analysed
   * @param log the log to be analysed
   * @param highLevelPN the simulation model to export discovered data dependencies
   */
  public void analyse(ClusterDecisionAnalyzer cda) {
    clusterDecisionAnalyzer = cda;

    // create empty data set with attribute information
    Instances data = cda.getDataInfo();

    // in case no single learning instance can be provided (as decision
    // point is never
    // reached, or decision classes cannot specified properly) --> do not
    // call algorithm
    if (data.numInstances() == 0) {
      System.out.println("No learning instances available");
    }
    // actually solve the classification problem
    else {
      try {
        myClassifier.buildClassifier(data);
        // build up result visualization
        cda.setResultVisualization(createResultVisualization());
        cda.setEvaluationVisualization(createEvaluationVisualization(data));
      } catch (Exception ex) {
        ex.printStackTrace();
        cda.setResultVisualization(
            createMessagePanel("Error while solving the classification problem"));
      }
    }
  }

コード例 #20

ファイルを表示

ファイル: ThresholdCurve.java プロジェクト: FarooqZuberi/autoweka

  /**
   * Calculates the area under the precision-recall curve (AUPRC).
   *
   * @param tcurve a previously extracted threshold curve Instances.
   * @return the PRC area, or Double.NaN if you don't pass in a ThresholdCurve generated Instances.
   */
  public static double getPRCArea(Instances tcurve) {
    final int n = tcurve.numInstances();
    if (!RELATION_NAME.equals(tcurve.relationName()) || (n == 0)) {
      return Double.NaN;
    }

    final int pInd = tcurve.attribute(PRECISION_NAME).index();
    final int rInd = tcurve.attribute(RECALL_NAME).index();
    final double[] pVals = tcurve.attributeToDoubleArray(pInd);
    final double[] rVals = tcurve.attributeToDoubleArray(rInd);

    double area = 0;
    double xlast = rVals[n - 1];

    // start from the first real p/r pair (not the artificial zero point)
    for (int i = n - 2; i >= 0; i--) {
      double recallDelta = rVals[i] - xlast;
      area += (pVals[i] * recallDelta);

      xlast = rVals[i];
    }

    if (area == 0) {
      return Utils.missingValue();
    }
    return area;
  }

コード例 #21

ファイルを表示

ファイル: BinC45Split.java プロジェクト: huangwen87/mdrill

  /**
   * Returns a string containing java source code equivalent to the test made at this node. The
   * instance being tested is called "i".
   *
   * @param index index of the nominal value tested
   * @param data the data containing instance structure info
   * @return a value of type 'String'
   */
  public final String sourceExpression(int index, Instances data) {

    StringBuffer expr = null;
    if (index < 0) {
      return "i[" + m_attIndex + "] == null";
    }
    if (data.attribute(m_attIndex).isNominal()) {
      if (index == 0) {
        expr = new StringBuffer("i[");
      } else {
        expr = new StringBuffer("!i[");
      }
      expr.append(m_attIndex).append("]");
      expr.append(".equals(\"")
          .append(data.attribute(m_attIndex).value((int) m_splitPoint))
          .append("\")");
    } else {
      expr = new StringBuffer("((Double) i[");
      expr.append(m_attIndex).append("])");
      if (index == 0) {
        expr.append(".doubleValue() <= ").append(m_splitPoint);
      } else {
        expr.append(".doubleValue() > ").append(m_splitPoint);
      }
    }
    return expr.toString();
  }

コード例 #22

ファイルを表示

ファイル: ThresholdCurve.java プロジェクト: FarooqZuberi/autoweka

  /**
   * Calculates the area under the ROC curve as the Wilcoxon-Mann-Whitney statistic.
   *
   * @param tcurve a previously extracted threshold curve Instances.
   * @return the ROC area, or Double.NaN if you don't pass in a ThresholdCurve generated Instances.
   */
  public static double getROCArea(Instances tcurve) {

    final int n = tcurve.numInstances();
    if (!RELATION_NAME.equals(tcurve.relationName()) || (n == 0)) {
      return Double.NaN;
    }
    final int tpInd = tcurve.attribute(TRUE_POS_NAME).index();
    final int fpInd = tcurve.attribute(FALSE_POS_NAME).index();
    final double[] tpVals = tcurve.attributeToDoubleArray(tpInd);
    final double[] fpVals = tcurve.attributeToDoubleArray(fpInd);

    double area = 0.0, cumNeg = 0.0;
    final double totalPos = tpVals[0];
    final double totalNeg = fpVals[0];
    for (int i = 0; i < n; i++) {
      double cip, cin;
      if (i < n - 1) {
        cip = tpVals[i] - tpVals[i + 1];
        cin = fpVals[i] - fpVals[i + 1];
      } else {
        cip = tpVals[n - 1];
        cin = fpVals[n - 1];
      }
      area += cip * (cumNeg + (0.5 * cin));
      cumNeg += cin;
    }
    area /= (totalNeg * totalPos);

    return area;
  }

コード例 #23

ファイルを表示

ファイル: RuleStats.java プロジェクト: dachylong/weka

  /**
   * Find all the instances in the dataset covered/not covered by the rule in given index, and the
   * correponding simple statistics and predicted class distributions are stored in the given double
   * array, which can be obtained by getSimpleStats() and getDistributions().<br>
   *
   * @param index the given index, assuming correct
   * @param insts the dataset to be covered by the rule
   * @param stats the given double array to hold stats, side-effected
   * @param dist the given array to hold class distributions, side-effected if null, the
   *     distribution is not necessary
   * @return the instances covered and not covered by the rule
   */
  private Instances[] computeSimpleStats(
      int index, Instances insts, double[] stats, double[] dist) {
    Rule rule = (Rule) m_Ruleset.elementAt(index);

    Instances[] data = new Instances[2];
    data[0] = new Instances(insts, insts.numInstances());
    data[1] = new Instances(insts, insts.numInstances());

    for (int i = 0; i < insts.numInstances(); i++) {
      Instance datum = insts.instance(i);
      double weight = datum.weight();
      if (rule.covers(datum)) {
        data[0].add(datum); // Covered by this rule
        stats[0] += weight; // Coverage
        if ((int) datum.classValue() == (int) rule.getConsequent())
          stats[2] += weight; // True positives
        else stats[4] += weight; // False positives
        if (dist != null) dist[(int) datum.classValue()] += weight;
      } else {
        data[1].add(datum); // Not covered by this rule
        stats[1] += weight;
        if ((int) datum.classValue() != (int) rule.getConsequent())
          stats[3] += weight; // True negatives
        else stats[5] += weight; // False negatives
      }
    }

    return data;
  }

コード例 #24

ファイルを表示

ファイル: ThresholdCurve.java プロジェクト: FarooqZuberi/autoweka

  /**
   * Tests the ThresholdCurve generation from the command line. The classifier is currently
   * hardcoded. Pipe in an arff file.
   *
   * @param args currently ignored
   */
  public static void main(String[] args) {

    try {

      Instances inst = new Instances(new java.io.InputStreamReader(System.in));
      if (false) {
        System.out.println(ThresholdCurve.getNPointPrecision(inst, 11));
      } else {
        inst.setClassIndex(inst.numAttributes() - 1);
        ThresholdCurve tc = new ThresholdCurve();
        EvaluationUtils eu = new EvaluationUtils();
        Classifier classifier = new weka.classifiers.functions.Logistic();
        FastVector predictions = new FastVector();
        for (int i = 0; i < 2; i++) { // Do two runs.
          eu.setSeed(i);
          predictions.appendElements(eu.getCVPredictions(classifier, inst, 10));
          // System.out.println("\n\n\n");
        }
        Instances result = tc.getCurve(predictions);
        System.out.println(result);
      }
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }

コード例 #25

ファイルを表示

ファイル: EvaluatedUFS.java プロジェクト: Eddy-W/XattReduct

  public static double CA(Instances odata, int[] clusters) {
    double result = 0;
    double[] tmpdclass = odata.attributeToDoubleArray(odata.numAttributes() - 1);
    int[] oclass = new int[odata.numInstances()];
    for (int i = 0; i < tmpdclass.length; ++i) {
      oclass[i] = (int) tmpdclass[i];
    }
    int[] tmpclass = oclass.clone();
    int[] tmpclusters = clusters.clone();

    Arrays.sort(tmpclusters);
    Arrays.sort(tmpclass);
    int[][] M = new int[tmpclass[tmpclass.length - 1] + 1][tmpclusters[tmpclusters.length - 1] + 1];

    for (int i = 0; i < clusters.length; ++i) {
      M[oclass[i]][clusters[i]]++;
    }
    for (int i = 0; i < M.length; ++i) {
      System.out.println(Arrays.toString(M[i]));
    }
    for (int i = 0; i < M.length; ++i) {
      int maxindex = -1;
      for (int j = 0; j < M[0].length - 1; ++j) {
        if (M[i][j] < M[i][j + 1]) maxindex = j + 1;
      }
      M[i][0] = maxindex;
    }

    for (int i = 0; i < oclass.length; ++i) {
      if (M[oclass[i]][0] == clusters[i]) result++;
    }

    return (double) result / (double) odata.numInstances();
  }

コード例 #26

ファイルを表示

ファイル: NaiveBayesMultinomial.java プロジェクト: SuperWan/weka

  /**
   * Returns a string representation of the classifier.
   *
   * @return a string representation of the classifier
   */
  public String toString() {
    StringBuffer result =
        new StringBuffer(
            "The independent probability of a class\n--------------------------------------\n");

    for (int c = 0; c < m_numClasses; c++)
      result
          .append(m_headerInfo.classAttribute().value(c))
          .append("\t")
          .append(Double.toString(m_probOfClass[c]))
          .append("\n");

    result.append(
        "\nThe probability of a word given the class\n-----------------------------------------\n\t");

    for (int c = 0; c < m_numClasses; c++)
      result.append(m_headerInfo.classAttribute().value(c)).append("\t");

    result.append("\n");

    for (int w = 0; w < m_numAttributes; w++) {
      result.append(m_headerInfo.attribute(w).name()).append("\t");
      for (int c = 0; c < m_numClasses; c++)
        result.append(Double.toString(Math.exp(m_probOfWordGivenClass[c][w]))).append("\t");
      result.append("\n");
    }

    return result.toString();
  }

コード例 #27

ファイルを表示

ファイル: MiddleOutConstructor.java プロジェクト: FarooqZuberi/autoweka

  /**
   * Calculates the centroid pivot of a node based on the list of points that it contains (tbe two
   * lists of its children are provided).
   *
   * @param list1 The point index list of first child.
   * @param list2 The point index list of second child.
   * @param insts The insts object on which the tree is being built (for header information).
   * @return The centroid pivot of the node.
   */
  public Instance calcPivot(MyIdxList list1, MyIdxList list2, Instances insts) {
    int classIdx = m_Instances.classIndex();
    double[] attrVals = new double[insts.numAttributes()];

    Instance temp;
    for (int i = 0; i < list1.length(); i++) {
      temp = insts.instance(((ListNode) list1.get(i)).idx);
      for (int k = 0; k < temp.numValues(); k++) {
        if (temp.index(k) == classIdx) continue;
        attrVals[k] += temp.valueSparse(k);
      }
    }
    for (int j = 0; j < list2.length(); j++) {
      temp = insts.instance(((ListNode) list2.get(j)).idx);
      for (int k = 0; k < temp.numValues(); k++) {
        if (temp.index(k) == classIdx) continue;
        attrVals[k] += temp.valueSparse(k);
      }
    }
    for (int j = 0, numInsts = list1.length() + list2.length(); j < attrVals.length; j++) {
      attrVals[j] /= numInsts;
    }
    temp = new DenseInstance(1.0, attrVals);
    return temp;
  }

コード例 #28

ファイルを表示

ファイル: AbstractFileConverterTest.java プロジェクト: CSLeicester/weka

  /** test the batch saving/loading (via setFile(File)). */
  public void testBatch() {
    Instances data;

    try {
      // save
      m_Saver.setInstances(m_Instances);
      m_Saver.setFile(new File(m_ExportFilename));
      m_Saver.writeBatch();

      // load
      ((AbstractFileLoader) m_Loader).setFile(new File(m_ExportFilename));
      data = m_Loader.getDataSet();

      // compare data
      try {
        if (m_Instances.classIndex() != data.classIndex()) {
          data.setClassIndex(m_Instances.classIndex());
        }
        compareDatasets(m_Instances, data);
      } catch (Exception e) {
        fail("Incremental load failed (datasets differ): " + e.toString());
      }
    } catch (Exception e) {
      e.printStackTrace();
      fail("Batch save/load failed: " + e.toString());
    }
  }

コード例 #29

ファイルを表示

ファイル: RemoveMisclassified.java プロジェクト: naranil/weka

  /**
   * Signify that this batch of input to the filter is finished.
   *
   * @return true if there are instances pending output
   * @throws IllegalStateException if no input structure has been defined
   */
  @Override
  public boolean batchFinished() throws Exception {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }

    if (!m_firstBatchFinished) {

      Instances filtered;
      if (m_numOfCrossValidationFolds < 2) {
        filtered = cleanseTrain(getInputFormat());
      } else {
        filtered = cleanseCross(getInputFormat());
      }

      for (int i = 0; i < filtered.numInstances(); i++) {
        push(filtered.instance(i));
      }

      m_firstBatchFinished = true;
      flushInput();
    }
    m_NewBatch = true;
    return (numPendingOutput() != 0);
  }

コード例 #30

ファイルを表示

ファイル: Logistic.java プロジェクト: Faelg5/weka

  /**
   * Aggregate an object with this one
   *
   * @param toAggregate the object to aggregate
   * @return the result of aggregation
   * @throws Exception if the supplied object can't be aggregated for some reason
   */
  @Override
  public Logistic aggregate(Logistic toAggregate) throws Exception {
    if (m_numModels == Integer.MIN_VALUE) {
      throw new Exception(
          "Can't aggregate further - model has already been " + "aggregated and finalized");
    }

    if (m_Par == null) {
      throw new Exception("No model built yet, can't aggregate");
    }

    if (!m_structure.equalHeaders(toAggregate.m_structure)) {
      throw new Exception(
          "Can't aggregate - data headers dont match: "
              + m_structure.equalHeadersMsg(toAggregate.m_structure));
    }

    for (int i = 0; i < m_Par.length; i++) {
      for (int j = 0; j < m_Par[i].length; j++) {
        m_Par[i][j] += toAggregate.m_Par[i][j];
      }
    }

    m_numModels++;

    return this;
  }