Java Instances.checkForStringAttributes 예제들

프로그래밍 언어: Java

네임스페이스/패키지 이름: weka.core

클래스/타입: Instances

메소드/함수: checkForStringAttributes

hotexamples.com에서의 예제들: 5

Java Instances.checkForStringAttributes - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Java의 weka.core.Instances.checkForStringAttributes에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

add(30)

deleteWithMissingClass(30)

numClasses(30)

numAttributes(30)

setClassIndex(30)

instance(30)

numInstances(30)

classIndex(30)

classAttribute(30)

attribute(30)

relationName(19)

enumerateInstances(17)

setClass(13)

get(12)

insertAttributeAt(11)

delete(11)

meanOrMode(11)

randomize(10)

stringFreeStructure(9)

testCV(8)

attributeToDoubleArray(8)

equalHeaders(8)

sumOfWeights(7)

toString(7)

deleteAttributeAt(7)

enumerateAttributes(7)

trainCV(6)

size(6)

setRelationName(5)

stratify(5)

mergeInstances(5)

variance(5)

checkForStringAttributes(5)

equalHeadersMsg(4)

deleteStringAttributes(3)

getRandomNumberGenerator(3)

firstInstance(3)

numDistinctValues(3)

attributeStats(2)

toSummaryString(2)

clear(2)

readInstance(2)

sort(2)

deleteWithMissing(1)

lastInstance(1)

swap(1)

appendAttribute(1)

resampleWithWeights(1)

renameAttribute(1)

remove(1)

예제 #1

파일 보기

파일: WekaScoringMapTask.java 프로젝트: CSLeicester/weka

  /**
   * Set the model to use
   *
   * @param model the model to use
   * @param modelHeader the header of the training data used to train the model
   * @param dataHeader the header of the incoming data
   * @throws DistributedWekaException if more than 50% of the attributes expected by the model are
   *     missing or have a type mismatch with the incoming data
   */
  public void setModel(Object model, Instances modelHeader, Instances dataHeader)
      throws DistributedWekaException {

    m_missingMismatch.clear();

    if (dataHeader == null || modelHeader == null) {
      throw new DistributedWekaException(
          "Can't continue without a header for the model and incoming data");
    }
    try {
      m_isUsingStringAttributes = modelHeader.checkForStringAttributes();
      m_model = ScoringModel.createScorer(model);

      if (modelHeader != null) {
        m_model.setHeader(modelHeader);
      }

      if (m_model.isBatchPredicor()) {
        m_batchScoringData = new Instances(modelHeader, 0);
        Environment env = Environment.getSystemWide();
        String batchSize = ((BatchPredictor) model).getBatchSize();
        if (!DistributedJobConfig.isEmpty(batchSize)) {
          m_batchSize = Integer.parseInt(env.substitute(batchSize));
        } else {
          m_batchSize = 1000;
        }
      }
    } catch (Exception ex) {
      throw new DistributedWekaException(ex);
    }

    buildAttributeMap(modelHeader, dataHeader);
  }

예제 #2

파일 보기

파일: FarthestFirst.java 프로젝트: bigbigbug/wekax

  /**
   * Generates a clusterer. Has to initialize all fields of the clusterer that are not being set via
   * options.
   *
   * @param data set of instances serving as training data
   * @exception Exception if the clusterer has not been generated successfully
   */
  public void buildClusterer(Instances data) throws Exception {

    // long start = System.currentTimeMillis();
    if (data.checkForStringAttributes()) {
      throw new Exception("Can't handle string attributes!");
    }

    m_ReplaceMissingFilter = new ReplaceMissingValues();
    m_ReplaceMissingFilter.setInputFormat(data);
    m_instances = Filter.useFilter(data, m_ReplaceMissingFilter);

    initMinMax(m_instances);

    m_ClusterCentroids = new Instances(m_instances, m_NumClusters);

    int n = m_instances.numInstances();
    Random r = new Random(m_Seed);
    boolean[] selected = new boolean[n];
    double[] minDistance = new double[n];

    for (int i = 0; i < n; i++) minDistance[i] = Double.MAX_VALUE;

    int firstI = r.nextInt(n);
    m_ClusterCentroids.add(m_instances.instance(firstI));
    selected[firstI] = true;

    updateMinDistance(minDistance, selected, m_instances, m_instances.instance(firstI));

    if (m_NumClusters > n) m_NumClusters = n;

    for (int i = 1; i < m_NumClusters; i++) {
      int nextI = farthestAway(minDistance, selected);
      m_ClusterCentroids.add(m_instances.instance(nextI));
      selected[nextI] = true;
      updateMinDistance(minDistance, selected, m_instances, m_instances.instance(nextI));
    }

    m_instances = new Instances(m_instances, 0);
    // long end = System.currentTimeMillis();
    // System.out.println("Clustering Time = " + (end-start));
  }

예제 #3

파일 보기

파일: Decorate.java 프로젝트: paolopavan/cfr

  /**
   * Build Decorate classifier
   *
   * @param data the training data to be used for generating the classifier
   * @exception Exception if the classifier could not be built successfully
   */
  public void buildClassifier(Instances data) throws Exception {
    if (m_Classifier == null) {
      throw new Exception("A base classifier has not been specified!");
    }
    if (data.checkForStringAttributes()) {
      throw new UnsupportedAttributeTypeException("Cannot handle string attributes!");
    }
    if (data.classAttribute().isNumeric()) {
      throw new UnsupportedClassTypeException("Decorate can't handle a numeric class!");
    }
    if (m_NumIterations < m_DesiredSize)
      throw new Exception("Max number of iterations must be >= desired ensemble size!");

    // initialize random number generator
    if (m_Seed == -1) m_Random = new Random();
    else m_Random = new Random(m_Seed);

    int i = 1; // current committee size
    int numTrials = 1; // number of Decorate iterations
    Instances divData = new Instances(data); // local copy of data - diversity data
    divData.deleteWithMissingClass();
    Instances artData = null; // artificial data

    // compute number of artficial instances to add at each iteration
    int artSize = (int) (Math.abs(m_ArtSize) * divData.numInstances());
    if (artSize == 0) artSize = 1; // atleast add one random example
    computeStats(data); // Compute training data stats for creating artificial examples

    // initialize new committee
    m_Committee = new Vector();
    Classifier newClassifier = m_Classifier;
    newClassifier.buildClassifier(divData);
    m_Committee.add(newClassifier);
    double eComm = computeError(divData); // compute ensemble error
    if (m_Debug)
      System.out.println(
          "Initialize:\tClassifier " + i + " added to ensemble. Ensemble error = " + eComm);

    // repeat till desired committee size is reached OR the max number of iterations is exceeded
    while (i < m_DesiredSize && numTrials < m_NumIterations) {
      // Generate artificial training examples
      artData = generateArtificialData(artSize, data);

      // Label artificial examples
      labelData(artData);
      addInstances(divData, artData); // Add new artificial data

      // Build new classifier
      Classifier tmp[] = Classifier.makeCopies(m_Classifier, 1);
      newClassifier = tmp[0];
      newClassifier.buildClassifier(divData);
      // Remove all the artificial data
      removeInstances(divData, artSize);

      // Test if the new classifier should be added to the ensemble
      m_Committee.add(newClassifier); // add new classifier to current committee
      double currError = computeError(divData);
      if (currError <= eComm) { // adding the new member did not increase the error
        i++;
        eComm = currError;
        if (m_Debug)
          System.out.println(
              "Iteration: "
                  + (1 + numTrials)
                  + "\tClassifier "
                  + i
                  + " added to ensemble. Ensemble error = "
                  + eComm);
      } else { // reject the current classifier because it increased the ensemble error
        m_Committee.removeElementAt(m_Committee.size() - 1); // pop the last member
      }
      numTrials++;
    }
  }

예제 #4

파일 보기

파일: PredictiveApriori.java 프로젝트: paolopavan/cfr

  /**
   * Method that generates all large itemsets with a minimum support, and from these all association
   * rules.
   *
   * @param instances the instances to be used for generating the associations
   * @exception Exception if rules can't be built successfully
   */
  public void buildAssociations(Instances instances) throws Exception {

    int temp = m_premiseCount, exactNumber = m_numRules - 5;

    if (instances.checkForStringAttributes()) {
      throw new Exception("Can't handle string attributes!");
    }
    m_instances = instances;
    m_instances.setClassIndex(m_instances.numAttributes() - 1);

    // prior estimation
    m_priorEstimator = new PriorEstimation(m_instances, m_numRandRules, m_numIntervals, false);
    m_priors = m_priorEstimator.estimatePrior();
    m_midPoints = m_priorEstimator.getMidPoints();

    m_Ls = new FastVector();
    m_hashtables = new FastVector();

    for (int i = 1; i < m_instances.numAttributes(); i++) {
      m_bestChanged = false;

      // find large item sets
      findLargeItemSets(i);

      // find association rules (rule generation procedure)
      findRulesQuickly();

      if (m_bestChanged) {
        temp = m_premiseCount;
        while (RuleGeneration.expectation(m_premiseCount, m_premiseCount, m_midPoints, m_priors)
            <= m_expectation) {
          m_premiseCount++;
          if (m_premiseCount > m_instances.numInstances()) break;
        }
      }
      if (m_premiseCount > m_instances.numInstances()) {

        // Reserve space for variables
        m_allTheRules = new FastVector[3];
        m_allTheRules[0] = new FastVector();
        m_allTheRules[1] = new FastVector();
        m_allTheRules[2] = new FastVector();

        int k = 0;
        while (m_best.size() > 0 && exactNumber > 0) {
          m_allTheRules[0].insertElementAt((ItemSet) ((RuleItem) m_best.last()).premise(), k);
          m_allTheRules[1].insertElementAt((ItemSet) ((RuleItem) m_best.last()).consequence(), k);
          m_allTheRules[2].insertElementAt(new Double(((RuleItem) m_best.last()).accuracy()), k);
          boolean remove = m_best.remove(m_best.last());
          k++;
          exactNumber--;
        }
        return;
      }

      if (temp != m_premiseCount && m_Ls.size() > 0) {
        FastVector kSets = (FastVector) m_Ls.lastElement();
        m_Ls.removeElementAt(m_Ls.size() - 1);
        kSets = ItemSet.deleteItemSets(kSets, m_premiseCount, Integer.MAX_VALUE);
        m_Ls.addElement(kSets);
      }
    }

    // Reserve space for variables
    m_allTheRules = new FastVector[3];
    m_allTheRules[0] = new FastVector();
    m_allTheRules[1] = new FastVector();
    m_allTheRules[2] = new FastVector();

    int k = 0;
    while (m_best.size() > 0 && exactNumber > 0) {
      m_allTheRules[0].insertElementAt((ItemSet) ((RuleItem) m_best.last()).premise(), k);
      m_allTheRules[1].insertElementAt((ItemSet) ((RuleItem) m_best.last()).consequence(), k);
      m_allTheRules[2].insertElementAt(new Double(((RuleItem) m_best.last()).accuracy()), k);
      boolean remove = m_best.remove(m_best.last());
      k++;
      exactNumber--;
    }
  }

예제 #5

파일 보기

파일: BVDecompose.java 프로젝트: CSLeicester/weka

  /**
   * Carry out the bias-variance decomposition
   *
   * @throws Exception if the decomposition couldn't be carried out
   */
  public void decompose() throws Exception {

    Reader dataReader = new BufferedReader(new FileReader(m_DataFileName));
    Instances data = new Instances(dataReader);

    if (m_ClassIndex < 0) {
      data.setClassIndex(data.numAttributes() - 1);
    } else {
      data.setClassIndex(m_ClassIndex);
    }
    if (data.classAttribute().type() != Attribute.NOMINAL) {
      throw new Exception("Class attribute must be nominal");
    }
    int numClasses = data.numClasses();

    data.deleteWithMissingClass();
    if (data.checkForStringAttributes()) {
      throw new Exception("Can't handle string attributes!");
    }

    if (data.numInstances() < 2 * m_TrainPoolSize) {
      throw new Exception(
          "The dataset must contain at least " + (2 * m_TrainPoolSize) + " instances");
    }
    Random random = new Random(m_Seed);
    data.randomize(random);
    Instances trainPool = new Instances(data, 0, m_TrainPoolSize);
    Instances test = new Instances(data, m_TrainPoolSize, data.numInstances() - m_TrainPoolSize);
    int numTest = test.numInstances();
    double[][] instanceProbs = new double[numTest][numClasses];

    m_Error = 0;
    for (int i = 0; i < m_TrainIterations; i++) {
      if (m_Debug) {
        System.err.println("Iteration " + (i + 1));
      }
      trainPool.randomize(random);
      Instances train = new Instances(trainPool, 0, m_TrainPoolSize / 2);

      Classifier current = AbstractClassifier.makeCopy(m_Classifier);
      current.buildClassifier(train);

      //// Evaluate the classifier on test, updating BVD stats
      for (int j = 0; j < numTest; j++) {
        int pred = (int) current.classifyInstance(test.instance(j));
        if (pred != test.instance(j).classValue()) {
          m_Error++;
        }
        instanceProbs[j][pred]++;
      }
    }
    m_Error /= (m_TrainIterations * numTest);

    // Average the BV over each instance in test.
    m_Bias = 0;
    m_Variance = 0;
    m_Sigma = 0;
    for (int i = 0; i < numTest; i++) {
      Instance current = test.instance(i);
      double[] predProbs = instanceProbs[i];
      double pActual, pPred;
      double bsum = 0, vsum = 0, ssum = 0;
      for (int j = 0; j < numClasses; j++) {
        pActual = (current.classValue() == j) ? 1 : 0; // Or via 1NN from test data?
        pPred = predProbs[j] / m_TrainIterations;
        bsum +=
            (pActual - pPred) * (pActual - pPred) - pPred * (1 - pPred) / (m_TrainIterations - 1);
        vsum += pPred * pPred;
        ssum += pActual * pActual;
      }
      m_Bias += bsum;
      m_Variance += (1 - vsum);
      m_Sigma += (1 - ssum);
    }
    m_Bias /= (2 * numTest);
    m_Variance /= (2 * numTest);
    m_Sigma /= (2 * numTest);

    if (m_Debug) {
      System.err.println("Decomposition finished");
    }
  }