Java Instances.stringFreeStructure 예제들

프로그래밍 언어: Java

네임스페이스/패키지 이름: weka.core

클래스/타입: Instances

메소드/함수: stringFreeStructure

hotexamples.com에서의 예제들: 9

Java Instances.stringFreeStructure - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Java의 weka.core.Instances.stringFreeStructure에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

add(30)

deleteWithMissingClass(30)

numClasses(30)

numAttributes(30)

setClassIndex(30)

instance(30)

numInstances(30)

classIndex(30)

classAttribute(30)

attribute(30)

relationName(19)

enumerateInstances(17)

setClass(13)

get(12)

insertAttributeAt(11)

delete(11)

meanOrMode(11)

randomize(10)

stringFreeStructure(9)

testCV(8)

attributeToDoubleArray(8)

equalHeaders(8)

sumOfWeights(7)

toString(7)

deleteAttributeAt(7)

enumerateAttributes(7)

trainCV(6)

size(6)

setRelationName(5)

stratify(5)

mergeInstances(5)

variance(5)

checkForStringAttributes(5)

equalHeadersMsg(4)

deleteStringAttributes(3)

getRandomNumberGenerator(3)

firstInstance(3)

numDistinctValues(3)

attributeStats(2)

toSummaryString(2)

clear(2)

readInstance(2)

sort(2)

deleteWithMissing(1)

lastInstance(1)

swap(1)

appendAttribute(1)

resampleWithWeights(1)

renameAttribute(1)

remove(1)

예제 #1

파일 보기

파일: FilteredAssociator.java 프로젝트: huangwen87/mdrill

  /**
   * Build the associator on the filtered data.
   *
   * @param data the training data
   * @throws Exception if the Associator could not be built successfully
   */
  public void buildAssociations(Instances data) throws Exception {
    if (m_Associator == null) throw new Exception("No base associator has been set!");

    // create copy and set class-index
    data = new Instances(data);
    if (getClassIndex() == 0) {
      data.setClassIndex(data.numAttributes() - 1);
    } else {
      data.setClassIndex(getClassIndex() - 1);
    }

    if (getClassIndex() != -1) {
      // remove instances with missing class
      data.deleteWithMissingClass();
    }

    m_Filter.setInputFormat(data); // filter capabilities are checked here
    data = Filter.useFilter(data, m_Filter);

    // can associator handle the data?
    getAssociator().getCapabilities().testWithFail(data);

    m_FilteredInstances = data.stringFreeStructure();
    m_Associator.buildAssociations(data);
  }

예제 #2

파일 보기

파일: KEAServer.java 프로젝트: ajeygsi/keyword

  private List<Instance> myExtractKeyphrases(String document, int numOfPhrases) throws Exception {

    // Check whether there is actually any data
    //
    if (document.length() == 0 || document.equals("")) {
      throw new Exception("Couldn't find any data!");
    }

    FastVector atts = new FastVector(3);
    atts.addElement(new Attribute("doc", (FastVector) null));
    atts.addElement(new Attribute("keyphrases", (FastVector) null));
    Instances data = new Instances("keyphrase_training_data", atts, 0);

    List<Instance> myInstances = new ArrayList<Instance>();

    double[] newInst = new double[2];
    newInst[0] = (double) data.attribute(0).addStringValue(document);
    newInst[1] = Instance.missingValue();

    data.add(new Instance(1.0, newInst));

    m_KEAFilter.input(data.instance(0));

    data = data.stringFreeStructure();

    ke.setNumPhrases(numOfPhrases);

    int numPhrases = numOfPhrases; // ke.getNumPhrases();

    Instance[] topRankedInstances = new Instance[numPhrases];
    Instance inst;

    // Iterating over all extracted keyphrases (inst)
    while ((inst = m_KEAFilter.output()) != null) {
      int index = (int) inst.value(m_KEAFilter.getRankIndex()) - 1;

      if (index < numPhrases) {
        topRankedInstances[index] = inst;
      }
    }

    double numExtracted = 0, numCorrect = 0;

    for (int i = 0; i < numPhrases; i++) {
      if (topRankedInstances[i] != null) {
        if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) {
          numExtracted += 1.0;
        }
        if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) {
          numCorrect += 1.0;
        }
        myInstances.add(topRankedInstances[i]);
      }
    }

    return myInstances;
  }

예제 #3

파일 보기

파일: MessageClassify.java 프로젝트: yuyefeiwu20160615/NERDisambiguationBySVM

  /** 分类过程 */
  public double classifyMessage(String message) throws Exception {

    filter.input(makeInstance(message, instances.stringFreeStructure()));
    Instance filteredInstance = filter.output(); // 必须使用原来的filter

    double predicted = classifier.classifyInstance(filteredInstance); // (int)predicted是类标索引
    //        System.out.println("Message classified as : "
    //                + instances.classAttribute().value((int) predicted));
    return predicted;
  }

예제 #4

파일 보기

파일: Filter.java 프로젝트: hhyifeng/algorithm

  /**
   * Sets the format of the input instances. If the filter is able to determine the output format
   * before seeing any input instances, it does so here. This default implementation clears the
   * output format and output queue, and the new batch flag is set. Overriders should call <code>
   * super.setInputFormat(Instances)</code>
   *
   * @param instanceInfo an Instances object containing the input instance structure (any instances
   *     contained in the object are ignored - only the structure is required).
   * @return true if the outputFormat may be collected immediately
   * @throws Exception if the inputFormat can't be set successfully
   */
  public boolean setInputFormat(Instances instanceInfo) throws Exception {

    testInputFormat(instanceInfo);

    m_InputFormat = instanceInfo.stringFreeStructure();
    m_OutputFormat = null;
    m_OutputQueue = new Queue();
    m_NewBatch = true;
    m_FirstBatchDone = false;
    initInputLocators(m_InputFormat, null);
    return false;
  }

예제 #5

파일 보기

파일: Filter.java 프로젝트: hhyifeng/algorithm

  /**
   * This will remove all buffered instances from the inputformat dataset. Use this method rather
   * than getInputFormat().delete();
   */
  protected void flushInput() {

    if ((m_InputStringAtts.getAttributeIndices().length > 0)
        || (m_InputRelAtts.getAttributeIndices().length > 0)) {
      m_InputFormat = m_InputFormat.stringFreeStructure();
      m_InputStringAtts = new StringLocator(m_InputFormat, m_InputStringAtts.getAllowedIndices());
      m_InputRelAtts = new RelationalLocator(m_InputFormat, m_InputRelAtts.getAllowedIndices());
    } else {
      // This more efficient than new Instances(m_InputFormat, 0);
      m_InputFormat.delete();
    }
  }

예제 #6

파일 보기

파일: Filter.java 프로젝트: hhyifeng/algorithm

  /**
   * Sets the format of output instances. The derived class should use this method once it has
   * determined the outputformat. The output queue is cleared.
   *
   * @param outputFormat the new output format
   */
  protected void setOutputFormat(Instances outputFormat) {

    if (outputFormat != null) {
      m_OutputFormat = outputFormat.stringFreeStructure();
      initOutputLocators(m_OutputFormat, null);

      // Rename the relation
      String relationName = outputFormat.relationName() + "-" + this.getClass().getName();
      if (this instanceof OptionHandler) {
        String[] options = ((OptionHandler) this).getOptions();
        for (int i = 0; i < options.length; i++) {
          relationName += options[i].trim();
        }
      }
      m_OutputFormat.setRelationName(relationName);
    } else {
      m_OutputFormat = null;
    }
    m_OutputQueue = new Queue();
  }

예제 #7

파일 보기

파일: Filter.java 프로젝트: hhyifeng/algorithm

  /**
   * Signify that this batch of input to the filter is finished. If the filter requires all
   * instances prior to filtering, output() may now be called to retrieve the filtered instances.
   * Any subsequent instances filtered should be filtered based on setting obtained from the first
   * batch (unless the inputFormat has been re-assigned or new options have been set). This default
   * implementation assumes all instance processing occurs during inputFormat() and input().
   *
   * @return true if there are instances pending output
   * @throws NullPointerException if no input structure has been defined,
   * @throws Exception if there was a problem finishing the batch.
   */
  public boolean batchFinished() throws Exception {

    if (m_InputFormat == null) {
      throw new NullPointerException("No input instance format defined");
    }
    flushInput();
    m_NewBatch = true;
    m_FirstBatchDone = true;

    if (m_OutputQueue.empty()) {
      // Clear out references to old strings/relationals occasionally
      if ((m_OutputStringAtts.getAttributeIndices().length > 0)
          || (m_OutputRelAtts.getAttributeIndices().length > 0)) {
        m_OutputFormat = m_OutputFormat.stringFreeStructure();
        m_OutputStringAtts =
            new StringLocator(m_OutputFormat, m_OutputStringAtts.getAllowedIndices());
      }
    }

    return (numPendingOutput() != 0);
  }

예제 #8

파일 보기

파일: KEAKeyphraseExtractor.java 프로젝트: rafaelhss/analisador-dou

  /** Builds the model from the files */
  public void extractKeyphrases(Hashtable stems) throws Exception {

    Vector stats = new Vector();

    // Check whether there is actually any data
    if (stems.size() == 0) {
      throw new Exception("Couldn't find any data!");
    }

    FastVector atts = new FastVector(2);
    atts.addElement(new Attribute("doc", (FastVector) null));
    atts.addElement(new Attribute("keyphrases", (FastVector) null));
    Instances data = new Instances("keyphrase_training_data", atts, 0);

    // Extract keyphrases
    Enumeration elem = stems.keys();
    while (elem.hasMoreElements()) {
      String str = (String) elem.nextElement();
      double[] newInst = new double[2];
      try {
        File txt = new File(m_dirName + "/" + str + ".txt");
        Reader is;
        if (!m_encoding.equals("default")) {
          is = new BomStrippingInputStreamReader(new FileInputStream(txt), m_encoding);
        } else {
          is = new BomStrippingInputStreamReader(new FileInputStream(txt));
        }
        StringBuffer txtStr = new StringBuffer();
        int c;
        while ((c = is.read()) != -1) {
          txtStr.append((char) c);
        }
        newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString());
      } catch (Exception e) {
        if (m_debug) {
          System.err.println("Can't read document " + str + ".txt");
        }
        newInst[0] = Instance.missingValue();
      }
      try {
        File key = new File(m_dirName + "/" + str + ".key");
        Reader is;
        if (!m_encoding.equals("default")) {
          is = new BomStrippingInputStreamReader(new FileInputStream(key), m_encoding);
        } else {
          is = new BomStrippingInputStreamReader(new FileInputStream(key));
        }
        StringBuffer keyStr = new StringBuffer();
        int c;
        while ((c = is.read()) != -1) {
          keyStr.append((char) c);
        }
        newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString());
      } catch (Exception e) {
        if (m_debug) {
          System.err.println("No keyphrases for stem " + str + ".");
        }
        newInst[1] = Instance.missingValue();
      }
      data.add(new Instance(1.0, newInst));
      m_KEAFilter.input(data.instance(0));
      data = data.stringFreeStructure();
      if (m_debug) {
        System.err.println("-- Document: " + str);
      }
      Instance[] topRankedInstances = new Instance[m_numPhrases];
      Instance inst;
      while ((inst = m_KEAFilter.output()) != null) {
        int index = (int) inst.value(m_KEAFilter.getRankIndex()) - 1;
        if (index < m_numPhrases) {
          topRankedInstances[index] = inst;
        }
      }
      if (m_debug) {
        System.err.println("-- Keyphrases and feature values:");
      }
      FileOutputStream out = null;
      PrintWriter printer = null;
      File key = new File(m_dirName + "/" + str + ".key");
      if (!key.exists()) {
        out = new FileOutputStream(m_dirName + "/" + str + ".key");
        if (!m_encoding.equals("default")) {
          printer = new PrintWriter(new OutputStreamWriter(out, m_encoding));
        } else {
          printer = new PrintWriter(out);
        }
      }
      double numExtracted = 0, numCorrect = 0;
      for (int i = 0; i < m_numPhrases; i++) {
        if (topRankedInstances[i] != null) {
          if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) {
            numExtracted += 1.0;
          }
          if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1)
              == topRankedInstances[i]
                  .attribute(topRankedInstances[i].numAttributes() - 1)
                  .indexOfValue("True")) {
            numCorrect += 1.0;
          }
          if (printer != null) {
            printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getUnstemmedPhraseIndex()));
            if (m_AdditionalInfo) {
              printer.print("\t");
              printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getStemmedPhraseIndex()));
              printer.print("\t");
              printer.print(
                  Utils.doubleToString(
                      topRankedInstances[i].value(m_KEAFilter.getProbabilityIndex()), 4));
            }
            printer.println();
          }
          if (m_debug) {
            System.err.println(topRankedInstances[i]);
          }
        }
      }
      if (numExtracted > 0) {
        if (m_debug) {
          System.err.println("-- " + numCorrect + " correct");
        }
        stats.addElement(new Double(numCorrect));
      }
      if (printer != null) {
        printer.flush();
        printer.close();
        out.close();
      }
    }
    double[] st = new double[stats.size()];
    for (int i = 0; i < stats.size(); i++) {
      st[i] = ((Double) stats.elementAt(i)).doubleValue();
    }
    double avg = Utils.mean(st);
    double stdDev = Math.sqrt(Utils.variance(st));
    System.err.println(
        "Avg. number of correct keyphrases: "
            + Utils.doubleToString(avg, 2)
            + " +/- "
            + Utils.doubleToString(stdDev, 2));
    System.err.println("Based on " + stats.size() + " documents");
    m_KEAFilter.batchFinished();
  }

예제 #9

파일 보기

파일: MauiModelBuilder.java 프로젝트: pycal/topic-diviner

  /** Builds the model from the training data */
  public void buildModel(HashSet<String> fileNames) throws Exception {

    // Check whether there is actually any data
    if (fileNames.size() == 0) {
      throw new Exception("Couldn't find any data in " + inputDirectoryName);
    }

    System.err.println("-- Building the model... ");

    FastVector atts = new FastVector(3);
    atts.addElement(new Attribute("filename", (FastVector) null));
    atts.addElement(new Attribute("document", (FastVector) null));
    atts.addElement(new Attribute("keyphrases", (FastVector) null));
    Instances data = new Instances("keyphrase_training_data", atts, 0);

    // Build model
    mauiFilter = new MauiFilter();

    mauiFilter.setDebug(getDebug());
    mauiFilter.setMaxPhraseLength(getMaxPhraseLength());
    mauiFilter.setMinPhraseLength(getMinPhraseLength());
    mauiFilter.setMinNumOccur(getMinNumOccur());
    mauiFilter.setStemmer(getStemmer());
    mauiFilter.setDocumentLanguage(getDocumentLanguage());
    mauiFilter.setVocabularyName(getVocabularyName());
    mauiFilter.setVocabularyFormat(getVocabularyFormat());
    mauiFilter.setStopwords(getStopwords());

    if (wikipedia != null) {
      mauiFilter.setWikipedia(wikipedia);
    } else if (wikipediaServer.equals("localhost") && wikipediaDatabase.equals("database")) {
      mauiFilter.setWikipedia(wikipedia);
    } else {
      mauiFilter.setWikipedia(
          wikipediaServer, wikipediaDatabase, cacheWikipediaData, wikipediaDataDirectory);
    }

    if (classifier != null) {
      mauiFilter.setClassifier(classifier);
    }

    mauiFilter.setInputFormat(data);

    // set features configurations
    mauiFilter.setBasicFeatures(useBasicFeatures);
    mauiFilter.setKeyphrasenessFeature(useKeyphrasenessFeature);
    mauiFilter.setFrequencyFeatures(useFrequencyFeatures);
    mauiFilter.setPositionsFeatures(usePositionsFeatures);
    mauiFilter.setLengthFeature(useLengthFeature);
    mauiFilter.setThesaurusFeatures(useNodeDegreeFeature);
    mauiFilter.setBasicWikipediaFeatures(useBasicWikipediaFeatures);
    mauiFilter.setAllWikipediaFeatures(useAllWikipediaFeatures);
    mauiFilter.setThesaurusFeatures(useNodeDegreeFeature);

    mauiFilter.setClassifier(classifier);

    mauiFilter.setContextSize(contextSize);
    mauiFilter.setMinKeyphraseness(minKeyphraseness);
    mauiFilter.setMinSenseProbability(minSenseProbability);

    if (!vocabularyName.equals("none") && !vocabularyName.equals("wikipedia")) {
      mauiFilter.loadThesaurus(getStemmer(), getStopwords());
    }

    System.err.println("-- Reading the input documents... ");

    for (String fileName : fileNames) {

      double[] newInst = new double[3];

      newInst[0] = (double) data.attribute(0).addStringValue(fileName);
      ;

      File documentTextFile = new File(inputDirectoryName + "/" + fileName + ".txt");
      File documentTopicsFile = new File(inputDirectoryName + "/" + fileName + ".key");

      try {

        InputStreamReader is;
        if (!documentEncoding.equals("default")) {
          is = new InputStreamReader(new FileInputStream(documentTextFile), documentEncoding);
        } else {
          is = new InputStreamReader(new FileInputStream(documentTextFile));
        }

        // Reading the file content
        StringBuffer txtStr = new StringBuffer();
        int c;
        while ((c = is.read()) != -1) {
          txtStr.append((char) c);
        }
        is.close();

        // Adding the text of the document to the instance
        newInst[1] = (double) data.attribute(1).addStringValue(txtStr.toString());

      } catch (Exception e) {

        System.err.println("Problem with reading " + documentTextFile);
        e.printStackTrace();
        newInst[1] = Instance.missingValue();
      }

      try {

        InputStreamReader is;
        if (!documentEncoding.equals("default")) {
          is = new InputStreamReader(new FileInputStream(documentTopicsFile), documentEncoding);
        } else {
          is = new InputStreamReader(new FileInputStream(documentTopicsFile));
        }

        // Reading the content of the keyphrase file
        StringBuffer keyStr = new StringBuffer();
        int c;
        while ((c = is.read()) != -1) {
          keyStr.append((char) c);
        }

        // Adding the topics to the file
        newInst[2] = (double) data.attribute(2).addStringValue(keyStr.toString());

      } catch (Exception e) {

        System.err.println("Problem with reading " + documentTopicsFile);
        e.printStackTrace();
        newInst[2] = Instance.missingValue();
      }

      data.add(new Instance(1.0, newInst));

      mauiFilter.input(data.instance(0));
      data = data.stringFreeStructure();
    }
    mauiFilter.batchFinished();

    while ((mauiFilter.output()) != null) {}
    ;
  }