Пример #1
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre>
   * -C &lt;col&gt;
   *  Sets the attribute index (default last).
   * </pre>
   *
   * <pre>
   * -F &lt;value index&gt;
   *  Sets the first value's index (default first).
   * </pre>
   *
   * <pre>
   * -S &lt;value index&gt;
   *  Sets the second value's index (default last).
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {

    String attIndex = Utils.getOption('C', options);
    if (attIndex.length() != 0) {
      setAttributeIndex(attIndex);
    } else {
      setAttributeIndex("last");
    }

    String firstValIndex = Utils.getOption('F', options);
    if (firstValIndex.length() != 0) {
      setFirstValueIndex(firstValIndex);
    } else {
      setFirstValueIndex("first");
    }

    String secondValIndex = Utils.getOption('S', options);
    if (secondValIndex.length() != 0) {
      setSecondValueIndex(secondValIndex);
    } else {
      setSecondValueIndex("last");
    }

    if (getInputFormat() != null) {
      setInputFormat(getInputFormat());
    }

    Utils.checkForRemainingOptions(options);
  }
Пример #2
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -R &lt;col1,col2-col4,...&gt;
   *  Specifies list of columns to Discretize. First and last are valid indexes.
   *  (default none)</pre>
   *
   * <pre> -V
   *  Invert matching sense of column indexes.</pre>
   *
   * <pre> -D
   *  Output binary attributes for discretized attributes.</pre>
   *
   * <pre> -Y
   *  Use bin numbers rather than ranges for discretized attributes.</pre>
   *
   * <pre> -E
   *  Use better encoding of split point for MDL.</pre>
   *
   * <pre> -K
   *  Use Kononenko's MDL criterion.</pre>
   *
   * <pre> -precision &lt;integer&gt;
   *  Precision for bin boundary labels.
   *  (default = 6 decimal places).</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {

    setMakeBinary(Utils.getFlag('D', options));
    setUseBinNumbers(Utils.getFlag('Y', options));
    setUseBetterEncoding(Utils.getFlag('E', options));
    setUseKononenko(Utils.getFlag('K', options));
    setInvertSelection(Utils.getFlag('V', options));

    String convertList = Utils.getOption('R', options);
    if (convertList.length() != 0) {
      setAttributeIndices(convertList);
    } else {
      setAttributeIndices("first-last");
    }

    String precisionS = Utils.getOption("precision", options);
    if (precisionS.length() > 0) {
      setBinRangePrecision(Integer.parseInt(precisionS));
    }

    if (getInputFormat() != null) {
      setInputFormat(getInputFormat());
    }

    Utils.checkForRemainingOptions(options);
  }
  /**
   * Parses a given list of options controlling the behaviour of this object. Valid options are:
   *
   * <p>-l "directory name"<br>
   * Specifies name of directory.
   *
   * <p>-m "model name"<br>
   * Specifies name of model.
   *
   * <p>-e "encoding"<br>
   * Specifies encoding.
   *
   * <p>-n<br>
   * Specifies number of phrases to be output (default: 5).
   *
   * <p>-d<br>
   * Turns debugging mode on.
   *
   * <p>-a<br>
   * Also write stemmed phrase and score into ".key" file.
   *
   * <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    String dirName = Utils.getOption('l', options);
    if (dirName.length() > 0) {
      setDirName(dirName);
    } else {
      setDirName(null);
      throw new Exception("Name of directory required argument.");
    }
    String modelName = Utils.getOption('m', options);
    if (modelName.length() > 0) {
      setModelName(modelName);
    } else {
      setModelName(null);
      throw new Exception("Name of model required argument.");
    }
    String encoding = Utils.getOption('e', options);
    if (encoding.length() > 0) {
      setEncoding(encoding);
    } else {
      setEncoding("default");
    }
    String numPhrases = Utils.getOption('n', options);
    if (numPhrases.length() > 0) {
      setNumPhrases(Integer.parseInt(numPhrases));
    } else {
      setNumPhrases(5);
    }
    setDebug(Utils.getFlag('d', options));
    setAdditionalInfo(Utils.getFlag('a', options));
    Utils.checkForRemainingOptions(options);
  }
Пример #4
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -B &lt;num&gt;
   *  Manual blend setting (default 20%)
   * </pre>
   *
   * <pre> -E
   *  Enable entropic auto-blend setting (symbolic class only)
   * </pre>
   *
   * <pre> -M &lt;char&gt;
   *  Specify the missing value treatment mode (default a)
   *  Valid options are: a(verage), d(elete), m(axdiff), n(ormal)
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String debug = "(KStar.setOptions)";
    String blendStr = Utils.getOption('B', options);
    if (blendStr.length() != 0) {
      setGlobalBlend(Integer.parseInt(blendStr));
    }

    setEntropicAutoBlend(Utils.getFlag('E', options));

    String missingModeStr = Utils.getOption('M', options);
    if (missingModeStr.length() != 0) {
      switch (missingModeStr.charAt(0)) {
        case 'a':
          setMissingMode(new SelectedTag(M_AVERAGE, TAGS_MISSING));
          break;
        case 'd':
          setMissingMode(new SelectedTag(M_DELETE, TAGS_MISSING));
          break;
        case 'm':
          setMissingMode(new SelectedTag(M_MAXDIFF, TAGS_MISSING));
          break;
        case 'n':
          setMissingMode(new SelectedTag(M_NORMAL, TAGS_MISSING));
          break;
        default:
          setMissingMode(new SelectedTag(M_AVERAGE, TAGS_MISSING));
      }
    }
    Utils.checkForRemainingOptions(options);
  }
Пример #5
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -B
   *  Binary splits (convert nominal attributes to binary ones) </pre>
   *
   * <pre> -P
   *  Use error on probabilities instead of misclassification error for stopping criterion of LogitBoost.
   * </pre>
   *
   * <pre> -I &lt;numIterations&gt;
   *  Set fixed number of iterations for LogitBoost (instead of using cross-validation)</pre>
   *
   * <pre> -F &lt;modelType&gt;
   *  Set Funtional Tree type to be generate:  0 for FT, 1 for FTLeaves and 2 for FTInner</pre>
   *
   * <pre> -M &lt;numInstances&gt;
   *  Set minimum number of instances at which a node can be split (default 15)</pre>
   *
   * <pre> -W &lt;beta&gt;
   *  Set beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.</pre>
   *
   * <pre> -A
   *  The AIC is used to choose the best iteration.</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    setBinSplit(Utils.getFlag('B', options));
    setErrorOnProbabilities(Utils.getFlag('P', options));

    String optionString = Utils.getOption('I', options);
    if (optionString.length() != 0) {
      setNumBoostingIterations((new Integer(optionString)).intValue());
    }

    optionString = Utils.getOption('F', options);
    if (optionString.length() != 0) {
      setModelType(new SelectedTag(Integer.parseInt(optionString), TAGS_MODEL));
      // setModelType((new Integer(optionString)).intValue());
    }

    optionString = Utils.getOption('M', options);
    if (optionString.length() != 0) {
      setMinNumInstances((new Integer(optionString)).intValue());
    }

    optionString = Utils.getOption('W', options);
    if (optionString.length() != 0) {
      setWeightTrimBeta((new Double(optionString)).doubleValue());
    }

    setUseAIC(Utils.getFlag('A', options));

    Utils.checkForRemainingOptions(options);
  }
Пример #6
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre>
   * -N
   *  Sets if binary attributes are to be coded as nominal ones.
   * </pre>
   *
   * <pre>
   * -A
   *  For each nominal value a new attribute is created,
   *  not only if there are more than 2 values.
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {

    setBinaryAttributesNominal(Utils.getFlag('N', options));

    setTransformAllValues(Utils.getFlag('A', options));

    if (getInputFormat() != null) {
      setInputFormat(getInputFormat());
    }

    Utils.checkForRemainingOptions(options);
  }
Пример #7
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -C
   *  Don't weight voting intervals by confidence</pre>
   *
   * <pre> -B &lt;bias&gt;
   *  Set exponential bias towards confident intervals
   *  (default = 1.0)</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String optionString;

    setWeightByConfidence(!Utils.getFlag('C', options));

    optionString = Utils.getOption('B', options);
    if (optionString.length() != 0) {
      Double temp = new Double(optionString);
      setBias(temp.doubleValue());
    }

    Utils.checkForRemainingOptions(options);
  }
Пример #8
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre>
   * -W &lt;classifier specification&gt;
   *  Full class name of classifier to use, followed
   *  by scheme options. eg:
   *   "weka.classifiers.bayes.NaiveBayes -D"
   *  (default: weka.classifiers.rules.ZeroR)
   * </pre>
   *
   * <pre>
   * -C &lt;class index&gt;
   *  Attribute on which misclassifications are based.
   *  If &lt; 0 will use any current set class or default to the last attribute.
   * </pre>
   *
   * <pre>
   * -F &lt;number of folds&gt;
   *  The number of folds to use for cross-validation cleansing.
   *  (&lt;2 = no cross-validation - default).
   * </pre>
   *
   * <pre>
   * -T &lt;threshold&gt;
   *  Threshold for the max error when predicting numeric class.
   *  (Value should be &gt;= 0, default = 0.1).
   * </pre>
   *
   * <pre>
   * -I
   *  The maximum number of cleansing iterations to perform.
   *  (&lt;1 = until fully cleansed - default)
   * </pre>
   *
   * <pre>
   * -V
   *  Invert the match so that correctly classified instances are discarded.
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {

    String classifierString = Utils.getOption('W', options);
    if (classifierString.length() == 0) {
      classifierString = weka.classifiers.rules.ZeroR.class.getName();
    }
    String[] classifierSpec = Utils.splitOptions(classifierString);
    if (classifierSpec.length == 0) {
      throw new Exception("Invalid classifier specification string");
    }
    String classifierName = classifierSpec[0];
    classifierSpec[0] = "";
    setClassifier(AbstractClassifier.forName(classifierName, classifierSpec));

    String cString = Utils.getOption('C', options);
    if (cString.length() != 0) {
      setClassIndex((new Double(cString)).intValue());
    } else {
      setClassIndex(-1);
    }

    String fString = Utils.getOption('F', options);
    if (fString.length() != 0) {
      setNumFolds((new Double(fString)).intValue());
    } else {
      setNumFolds(0);
    }

    String tString = Utils.getOption('T', options);
    if (tString.length() != 0) {
      setThreshold((new Double(tString)).doubleValue());
    } else {
      setThreshold(0.1);
    }

    String iString = Utils.getOption('I', options);
    if (iString.length() != 0) {
      setMaxIterations((new Double(iString)).intValue());
    } else {
      setMaxIterations(0);
    }

    if (Utils.getFlag('V', options)) {
      setInvert(true);
    } else {
      setInvert(false);
    }

    Utils.checkForRemainingOptions(options);
  }
Пример #9
0
  /**
   * Parses the options for this object.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre>
   * -i &lt;the input file&gt;
   *  The input file
   * </pre>
   *
   * <pre>
   * -o &lt;the output file&gt;
   *  The output file
   * </pre>
   *
   * <pre>
   * -c &lt;class index&gt;
   *  The class index
   *  (default: last)
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the options to use
   * @throws Exception if setting of options fails
   */
  @Override
  public void setOptions(String[] options) throws Exception {
    String tmpStr;

    tmpStr = Utils.getOption('c', options);
    if (tmpStr.length() != 0) {
      setClassIndex(tmpStr);
    } else {
      setClassIndex("last");
    }

    super.setOptions(options);

    Utils.checkForRemainingOptions(options);
  }
Пример #10
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre>
   * -R
   *  Attributes to act on. Can be either a range
   *  string (e.g. 1,2,6-10) OR a comma-separated list of named attributes
   *  (default none)
   * </pre>
   *
   * <pre>
   * -V
   *  Invert matching sense (i.e. act on all attributes other than those specified)
   * </pre>
   *
   * <pre>
   * -N
   *  Nominal labels and their replacement values.
   *  E.g. red:blue, black:white, fred:bob
   * </pre>
   *
   * <pre>
   * -I
   *  Ignore case when matching nominal values
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {
    String atts = Utils.getOption('R', options);
    if (atts.length() > 0) {
      setSelectedAttributes(atts);
    }

    String replacements = Utils.getOption('N', options);
    if (replacements.length() > 0) {
      setValueReplacements(replacements);
    }

    setInvertSelection(Utils.getFlag('V', options));
    setIgnoreCase(Utils.getFlag('I', options));

    Utils.checkForRemainingOptions(options);
  }
Пример #11
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre>
   * -P &lt;start set&gt;
   *  Specify a starting set of attributes.
   *  Eg. 1,3,5-7.If supplied, the starting set becomes
   *  one member of the initial random
   *  population.
   * </pre>
   *
   * <pre>
   * -Z &lt;population size&gt;
   *  Set the size of the population (even number).
   *  (default = 20).
   * </pre>
   *
   * <pre>
   * -G &lt;number of generations&gt;
   *  Set the number of generations.
   *  (default = 20)
   * </pre>
   *
   * <pre>
   * -C &lt;probability of crossover&gt;
   *  Set the probability of crossover.
   *  (default = 0.6)
   * </pre>
   *
   * <pre>
   * -M &lt;probability of mutation&gt;
   *  Set the probability of mutation.
   *  (default = 0.033)
   * </pre>
   *
   * <pre>
   * -R &lt;report frequency&gt;
   *  Set frequency of generation reports.
   *  e.g, setting the value to 5 will
   *  report every 5th generation
   *  (default = number of generations)
   * </pre>
   *
   * <pre>
   * -S &lt;seed&gt;
   *  Set the random number seed.
   *  (default = 1)
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {
    String optionString;
    resetOptions();

    optionString = Utils.getOption('P', options);
    if (optionString.length() != 0) {
      setStartSet(optionString);
    }

    optionString = Utils.getOption('Z', options);
    if (optionString.length() != 0) {
      setPopulationSize(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption('G', options);
    if (optionString.length() != 0) {
      setMaxGenerations(Integer.parseInt(optionString));
      setReportFrequency(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption('C', options);
    if (optionString.length() != 0) {
      setCrossoverProb((new Double(optionString)).doubleValue());
    }

    optionString = Utils.getOption('M', options);
    if (optionString.length() != 0) {
      setMutationProb((new Double(optionString)).doubleValue());
    }

    optionString = Utils.getOption('R', options);
    if (optionString.length() != 0) {
      setReportFrequency(Integer.parseInt(optionString));
    }

    optionString = Utils.getOption('S', options);
    if (optionString.length() != 0) {
      setSeed(Integer.parseInt(optionString));
    }

    Utils.checkForRemainingOptions(options);
  }
Пример #12
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre>
   * -S &lt;num&gt;
   *  Specify the random number seed (default 1)
   * </pre>
   *
   * <pre>
   * -Z &lt;num&gt;
   *  The size of the output dataset - number of instances
   *  (default 100)
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {

    String tmpStr = Utils.getOption('S', options);
    if (tmpStr.length() != 0) {
      setRandomSeed(Integer.parseInt(tmpStr));
    } else {
      setRandomSeed(1);
    }

    tmpStr = Utils.getOption('Z', options);
    if (tmpStr.length() != 0) {
      setSampleSize(Integer.parseInt(tmpStr));
    } else {
      setSampleSize(100);
    }

    Utils.checkForRemainingOptions(options);
  }
Пример #13
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -C &lt;double&gt;
   *  The complexity constant C.
   *  (default 1)</pre>
   *
   * <pre> -N
   *  Whether to 0=normalize/1=standardize/2=neither.
   *  (default 0=normalize)</pre>
   *
   * <pre> -I &lt;classname and parameters&gt;
   *  Optimizer class used for solving quadratic optimization problem
   *  (default weka.classifiers.functions.supportVector.RegSMOImproved)</pre>
   *
   * <pre> -K &lt;classname and parameters&gt;
   *  The Kernel to use.
   *  (default: weka.classifiers.functions.supportVector.PolyKernel)</pre>
   *
   * <pre>
   * Options specific to optimizer ('-I') weka.classifiers.functions.supportVector.RegSMOImproved:
   * </pre>
   *
   * <pre> -T &lt;double&gt;
   *  The tolerance parameter for checking the stopping criterion.
   *  (default 0.001)</pre>
   *
   * <pre> -V
   *  Use variant 1 of the algorithm when true, otherwise use variant 2.
   *  (default true)</pre>
   *
   * <pre> -P &lt;double&gt;
   *  The epsilon for round-off error.
   *  (default 1.0e-12)</pre>
   *
   * <pre> -L &lt;double&gt;
   *  The epsilon parameter in epsilon-insensitive loss function.
   *  (default 1.0e-3)</pre>
   *
   * <pre> -W &lt;double&gt;
   *  The random number seed.
   *  (default 1)</pre>
   *
   * <pre>
   * Options specific to kernel ('-K') weka.classifiers.functions.supportVector.PolyKernel:
   * </pre>
   *
   * <pre> -D
   *  Enables debugging output (if available) to be printed.
   *  (default: off)</pre>
   *
   * <pre> -no-checks
   *  Turns off all checks - use with caution!
   *  (default: checks on)</pre>
   *
   * <pre> -C &lt;num&gt;
   *  The size of the cache (a prime number), 0 for full cache and
   *  -1 to turn it off.
   *  (default: 250007)</pre>
   *
   * <pre> -E &lt;num&gt;
   *  The Exponent to use.
   *  (default: 1.0)</pre>
   *
   * <pre> -L
   *  Use lower-order terms.
   *  (default: no)</pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String tmpStr;
    String[] tmpOptions;

    tmpStr = Utils.getOption('C', options);
    if (tmpStr.length() != 0) {
      setC(Double.parseDouble(tmpStr));
    } else {
      setC(1.0);
    }

    String nString = Utils.getOption('N', options);
    if (nString.length() != 0) {
      setFilterType(new SelectedTag(Integer.parseInt(nString), TAGS_FILTER));
    } else {
      setFilterType(new SelectedTag(FILTER_NORMALIZE, TAGS_FILTER));
    }

    tmpStr = Utils.getOption('I', options);
    tmpOptions = Utils.splitOptions(tmpStr);
    if (tmpOptions.length != 0) {
      tmpStr = tmpOptions[0];
      tmpOptions[0] = "";
      setRegOptimizer((RegOptimizer) Utils.forName(RegOptimizer.class, tmpStr, tmpOptions));
    } else {
      setRegOptimizer(new RegSMOImproved());
    }

    tmpStr = Utils.getOption('K', options);
    tmpOptions = Utils.splitOptions(tmpStr);
    if (tmpOptions.length != 0) {
      tmpStr = tmpOptions[0];
      tmpOptions[0] = "";
      setKernel(Kernel.forName(tmpStr, tmpOptions));
    } else {
      setKernel(new PolyKernel());
    }

    super.setOptions(options);

    Utils.checkForRemainingOptions(options);
  }
Пример #14
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre>
   *  -I
   *  Weight neighbours by the inverse of their distance
   *  (use when k &gt; 1)
   * </pre>
   *
   * <pre>
   *  -F
   *  Weight neighbours by 1 - their distance
   *  (use when k &gt; 1)
   * </pre>
   *
   * <pre>
   *  -K &lt;number of neighbors&gt;
   *  Number of nearest neighbours (k) used in classification.
   *  (Default = 1)
   * </pre>
   *
   * <pre>
   *  -E
   *  Minimise mean squared error rather than mean absolute
   *  error when using -X option with numeric prediction.
   * </pre>
   *
   * <pre>
   *  -W &lt;window size&gt;
   *  Maximum number of training instances maintained.
   *  Training instances are dropped FIFO. (Default = no window)
   * </pre>
   *
   * <pre>
   *  -X
   *  Select the number of nearest neighbours between 1
   *  and the k value specified using hold-one-out evaluation
   *  on the training data (use when k &gt; 1)
   * </pre>
   *
   * <pre>
   *  -A
   *  The nearest neighbour search algorithm to use (default: weka.core.neighboursearch.LinearNNSearch).
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    String knnString = Utils.getOption('K', options);
    if (knnString.length() != 0) {
      setKNN(Integer.parseInt(knnString));
    } else {
      setKNN(1);
    }
    String windowString = Utils.getOption('W', options);
    if (windowString.length() != 0) {
      setWindowSize(Integer.parseInt(windowString));
    } else {
      setWindowSize(0);
    }
    if (Utils.getFlag('I', options)) {
      setDistanceWeighting(new SelectedTag(WEIGHT_INVERSE, TAGS_WEIGHTING));
    } else if (Utils.getFlag('F', options)) {
      setDistanceWeighting(new SelectedTag(WEIGHT_SIMILARITY, TAGS_WEIGHTING));
    } else {
      setDistanceWeighting(new SelectedTag(WEIGHT_NONE, TAGS_WEIGHTING));
    }
    setCrossValidate(Utils.getFlag('X', options));
    setMeanSquared(Utils.getFlag('E', options));

    String nnSearchClass = Utils.getOption('A', options);
    if (nnSearchClass.length() != 0) {
      String nnSearchClassSpec[] = Utils.splitOptions(nnSearchClass);
      if (nnSearchClassSpec.length == 0) {
        throw new Exception("Invalid NearestNeighbourSearch algorithm " + "specification string.");
      }
      String className = nnSearchClassSpec[0];
      nnSearchClassSpec[0] = "";

      setNearestNeighbourSearchAlgorithm(
          (NearestNeighbourSearch)
              Utils.forName(NearestNeighbourSearch.class, className, nnSearchClassSpec));
    } else this.setNearestNeighbourSearchAlgorithm(new LinearNNSearch());

    Utils.checkForRemainingOptions(options);
  }
Пример #15
0
  /**
   * Test the class from the command line. The instance query should be specified with -Q sql_query
   *
   * @param args contains options for the instance query
   */
  public static void main(String args[]) {

    try {
      InstanceQuery iq = new InstanceQuery();
      String query = Utils.getOption('Q', args);
      if (query.length() == 0) {
        iq.setQuery("select * from Experiment_index");
      } else {
        iq.setQuery(query);
      }
      iq.setOptions(args);
      try {
        Utils.checkForRemainingOptions(args);
      } catch (Exception e) {
        System.err.println("Options for weka.experiment.InstanceQuery:\n");
        Enumeration en = iq.listOptions();
        while (en.hasMoreElements()) {
          Option o = (Option) en.nextElement();
          System.err.println(o.synopsis() + "\n" + o.description());
        }
        System.exit(1);
      }

      Instances aha = iq.retrieveInstances();
      iq.disconnectFromDatabase();
      // query returned no result -> exit
      if (aha == null) return;
      // The dataset may be large, so to make things easier we'll
      // output an instance at a time (rather than having to convert
      // the entire dataset to one large string)
      System.out.println(new Instances(aha, 0));
      for (int i = 0; i < aha.numInstances(); i++) {
        System.out.println(aha.instance(i));
      }
    } catch (Exception e) {
      e.printStackTrace();
      System.err.println(e.getMessage());
    }
  }
Пример #16
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre>
   * -C &lt;col&gt;
   *  Sets the attribute index (default last).
   * </pre>
   *
   * <pre>
   * -F &lt;value index&gt;
   *  Sets the output date format string (default corresponds to ISO-8601).
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {

    String attIndex = Utils.getOption('C', options);
    if (attIndex.length() != 0) {
      setAttributeIndex(attIndex);
    } else {
      setAttributeIndex("last");
    }

    String formatString = Utils.getOption('F', options);
    if (formatString.length() != 0) {
      setDateFormat(formatString);
    } else {
      setDateFormat(DEFAULT_FORMAT);
    }

    if (getInputFormat() != null) {
      setInputFormat(getInputFormat());
    }

    Utils.checkForRemainingOptions(options);
  }
Пример #17
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre>
   * -D
   *  Turn on debugging output.
   * </pre>
   *
   * <pre>
   * -R &lt;ridge&gt;
   *  Set the ridge in the log-likelihood.
   * </pre>
   *
   * <pre>
   * -M &lt;number&gt;
   *  Set the maximum number of iterations (default -1, until convergence).
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {

    setUseConjugateGradientDescent(Utils.getFlag('C', options));

    String ridgeString = Utils.getOption('R', options);
    if (ridgeString.length() != 0) {
      m_Ridge = Double.parseDouble(ridgeString);
    } else {
      m_Ridge = 1.0e-8;
    }

    String maxItsString = Utils.getOption('M', options);
    if (maxItsString.length() != 0) {
      m_MaxIts = Integer.parseInt(maxItsString);
    } else {
      m_MaxIts = -1;
    }

    super.setOptions(options);

    Utils.checkForRemainingOptions(options);
  }
Пример #18
0
  /**
   * Test method for this class
   *
   * @param args the command line arguments
   */
  public static void main(String[] args) {

    try {
      BVDecompose bvd = new BVDecompose();

      try {
        bvd.setOptions(args);
        Utils.checkForRemainingOptions(args);
      } catch (Exception ex) {
        String result = ex.getMessage() + "\nBVDecompose Options:\n\n";
        Enumeration<Option> enu = bvd.listOptions();
        while (enu.hasMoreElements()) {
          Option option = (Option) enu.nextElement();
          result += option.synopsis() + "\n" + option.description() + "\n";
        }
        throw new Exception(result);
      }

      bvd.decompose();
      System.out.println(bvd.toString());
    } catch (Exception ex) {
      System.err.println(ex.getMessage());
    }
  }
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre>
   * -W
   *  Use word frequencies instead of binary bag of words.
   * </pre>
   *
   * <pre>
   * -P &lt;# instances&gt;
   *  How often to prune the dictionary of low frequency words (default = 0, i.e. don't prune)
   * </pre>
   *
   * <pre>
   * -M &lt;double&gt;
   *  Minimum word frequency. Words with less than this frequence are ignored.
   *  If periodic pruning is turned on then this is also used to determine which
   *  words to remove from the dictionary (default = 3).
   * </pre>
   *
   * <pre>
   * -normalize
   *  Normalize document length (use in conjunction with -norm and -lnorm)
   * </pre>
   *
   * <pre>
   * -norm &lt;num&gt;
   *  Specify the norm that each instance must have (default 1.0)
   * </pre>
   *
   * <pre>
   * -lnorm &lt;num&gt;
   *  Specify L-norm to use (default 2.0)
   * </pre>
   *
   * <pre>
   * -lowercase
   *  Convert all tokens to lowercase before adding to the dictionary.
   * </pre>
   *
   * <pre>
   * -stoplist
   *  Ignore words that are in the stoplist.
   * </pre>
   *
   * <pre>
   * -stopwords &lt;file&gt;
   *  A file containing stopwords to override the default ones.
   *  Using this option automatically sets the flag ('-stoplist') to use the
   *  stoplist if the file exists.
   *  Format: one stopword per line, lines starting with '#'
   *  are interpreted as comments and ignored.
   * </pre>
   *
   * <pre>
   * -tokenizer &lt;spec&gt;
   *  The tokenizing algorihtm (classname plus parameters) to use.
   *  (default: weka.core.tokenizers.WordTokenizer)
   * </pre>
   *
   * <pre>
   * -stemmer &lt;spec&gt;
   *  The stemmering algorihtm (classname plus parameters) to use.
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {
    reset();

    super.setOptions(options);

    setUseWordFrequencies(Utils.getFlag("W", options));

    String pruneFreqS = Utils.getOption("P", options);
    if (pruneFreqS.length() > 0) {
      setPeriodicPruning(Integer.parseInt(pruneFreqS));
    }
    String minFreq = Utils.getOption("M", options);
    if (minFreq.length() > 0) {
      setMinWordFrequency(Double.parseDouble(minFreq));
    }

    setNormalizeDocLength(Utils.getFlag("normalize", options));

    String normFreqS = Utils.getOption("norm", options);
    if (normFreqS.length() > 0) {
      setNorm(Double.parseDouble(normFreqS));
    }
    String lnormFreqS = Utils.getOption("lnorm", options);
    if (lnormFreqS.length() > 0) {
      setLNorm(Double.parseDouble(lnormFreqS));
    }

    setLowercaseTokens(Utils.getFlag("lowercase", options));
    setUseStopList(Utils.getFlag("stoplist", options));

    String stopwordsS = Utils.getOption("stopwords", options);
    if (stopwordsS.length() > 0) {
      setStopwords(new File(stopwordsS));
    } else {
      setStopwords(null);
    }

    String tokenizerString = Utils.getOption("tokenizer", options);
    if (tokenizerString.length() == 0) {
      setTokenizer(new WordTokenizer());
    } else {
      String[] tokenizerSpec = Utils.splitOptions(tokenizerString);
      if (tokenizerSpec.length == 0) {
        throw new Exception("Invalid tokenizer specification string");
      }
      String tokenizerName = tokenizerSpec[0];
      tokenizerSpec[0] = "";
      Tokenizer tokenizer = (Tokenizer) Class.forName(tokenizerName).newInstance();
      if (tokenizer instanceof OptionHandler) {
        ((OptionHandler) tokenizer).setOptions(tokenizerSpec);
      }
      setTokenizer(tokenizer);
    }

    String stemmerString = Utils.getOption("stemmer", options);
    if (stemmerString.length() == 0) {
      setStemmer(null);
    } else {
      String[] stemmerSpec = Utils.splitOptions(stemmerString);
      if (stemmerSpec.length == 0) {
        throw new Exception("Invalid stemmer specification string");
      }
      String stemmerName = stemmerSpec[0];
      stemmerSpec[0] = "";
      Stemmer stemmer = (Stemmer) Class.forName(stemmerName).newInstance();
      if (stemmer instanceof OptionHandler) {
        ((OptionHandler) stemmer).setOptions(stemmerSpec);
      }
      setStemmer(stemmer);
    }

    Utils.checkForRemainingOptions(options);
  }
Пример #20
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre>
   * -D
   *  Turns on output of debugging information.
   * </pre>
   *
   * <pre>
   * -min &lt;double&gt;
   *  The minimum threshold. (default -Double.MAX_VALUE)
   * </pre>
   *
   * <pre>
   * -min-default &lt;double&gt;
   *  The replacement for values smaller than the minimum threshold.
   *  (default -Double.MAX_VALUE)
   * </pre>
   *
   * <pre>
   * -max &lt;double&gt;
   *  The maximum threshold. (default Double.MAX_VALUE)
   * </pre>
   *
   * <pre>
   * -max-default &lt;double&gt;
   *  The replacement for values larger than the maximum threshold.
   *  (default Double.MAX_VALUE)
   * </pre>
   *
   * <pre>
   * -closeto &lt;double&gt;
   *  The number values are checked for closeness. (default 0)
   * </pre>
   *
   * <pre>
   * -closeto-default &lt;double&gt;
   *  The replacement for values that are close to '-closeto'.
   *  (default 0)
   * </pre>
   *
   * <pre>
   * -closeto-tolerance &lt;double&gt;
   *  The tolerance below which numbers are considered being close to
   *  to each other. (default 1E-6)
   * </pre>
   *
   * <pre>
   * -decimals &lt;int&gt;
   *  The number of decimals to round to, -1 means no rounding at all.
   *  (default -1)
   * </pre>
   *
   * <pre>
   * -R &lt;col1,col2,...&gt;
   *  The list of columns to cleanse, e.g., first-last or first-3,5-last.
   *  (default first-last)
   * </pre>
   *
   * <pre>
   * -V
   *  Inverts the matching sense.
   * </pre>
   *
   * <pre>
   * -include-class
   *  Whether to include the class in the cleansing.
   *  The class column will always be skipped, if this flag is not
   *  present. (default no)
   * </pre>
   *
   * <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  @Override
  public void setOptions(String[] options) throws Exception {

    String tmpStr = Utils.getOption("min", options);
    if (tmpStr.length() != 0) {
      setMinThreshold(Double.parseDouble(tmpStr));
    } else {
      setMinThreshold(-Double.MAX_VALUE);
    }

    tmpStr = Utils.getOption("min-default", options);
    if (tmpStr.length() != 0) {
      setMinDefault(Double.parseDouble(tmpStr));
    } else {
      setMinDefault(-Double.MAX_VALUE);
    }

    tmpStr = Utils.getOption("max", options);
    if (tmpStr.length() != 0) {
      setMaxThreshold(Double.parseDouble(tmpStr));
    } else {
      setMaxThreshold(Double.MAX_VALUE);
    }

    tmpStr = Utils.getOption("max-default", options);
    if (tmpStr.length() != 0) {
      setMaxDefault(Double.parseDouble(tmpStr));
    } else {
      setMaxDefault(Double.MAX_VALUE);
    }

    tmpStr = Utils.getOption("closeto", options);
    if (tmpStr.length() != 0) {
      setCloseTo(Double.parseDouble(tmpStr));
    } else {
      setCloseTo(0);
    }

    tmpStr = Utils.getOption("closeto-default", options);
    if (tmpStr.length() != 0) {
      setCloseToDefault(Double.parseDouble(tmpStr));
    } else {
      setCloseToDefault(0);
    }

    tmpStr = Utils.getOption("closeto-tolerance", options);
    if (tmpStr.length() != 0) {
      setCloseToTolerance(Double.parseDouble(tmpStr));
    } else {
      setCloseToTolerance(1E-6);
    }

    tmpStr = Utils.getOption("R", options);
    if (tmpStr.length() != 0) {
      setAttributeIndices(tmpStr);
    } else {
      setAttributeIndices("first-last");
    }

    setInvertSelection(Utils.getFlag("V", options));

    setIncludeClass(Utils.getFlag("include-class", options));

    tmpStr = Utils.getOption("decimals", options);
    if (tmpStr.length() != 0) {
      setDecimals(Integer.parseInt(tmpStr));
    } else {
      setDecimals(-1);
    }

    super.setOptions(options);

    Utils.checkForRemainingOptions(options);
  }
Пример #21
0
  /**
   * Method for testing filters.
   *
   * @param filter the filter to use
   * @param options should contain the following arguments: <br>
   *     -i input_file <br>
   *     -o output_file <br>
   *     -c class_index <br>
   *     -z classname (for filters implementing weka.filters.Sourcable) <br>
   *     or -h for help on options
   * @throws Exception if something goes wrong or the user requests help on command options
   */
  public static void filterFile(Filter filter, String[] options) throws Exception {

    boolean debug = false;
    Instances data = null;
    DataSource input = null;
    PrintWriter output = null;
    boolean helpRequest;
    String sourceCode = "";

    try {
      helpRequest = Utils.getFlag('h', options);

      if (Utils.getFlag('d', options)) {
        debug = true;
      }
      String infileName = Utils.getOption('i', options);
      String outfileName = Utils.getOption('o', options);
      String classIndex = Utils.getOption('c', options);
      if (filter instanceof Sourcable) sourceCode = Utils.getOption('z', options);

      if (filter instanceof OptionHandler) {
        ((OptionHandler) filter).setOptions(options);
      }

      Utils.checkForRemainingOptions(options);
      if (helpRequest) {
        throw new Exception("Help requested.\n");
      }
      if (infileName.length() != 0) {
        input = new DataSource(infileName);
      } else {
        input = new DataSource(System.in);
      }
      if (outfileName.length() != 0) {
        output = new PrintWriter(new FileOutputStream(outfileName));
      } else {
        output = new PrintWriter(System.out);
      }

      data = input.getStructure();
      if (classIndex.length() != 0) {
        if (classIndex.equals("first")) {
          data.setClassIndex(0);
        } else if (classIndex.equals("last")) {
          data.setClassIndex(data.numAttributes() - 1);
        } else {
          data.setClassIndex(Integer.parseInt(classIndex) - 1);
        }
      }
    } catch (Exception ex) {
      String filterOptions = "";
      // Output the error and also the valid options
      if (filter instanceof OptionHandler) {
        filterOptions += "\nFilter options:\n\n";
        Enumeration enu = ((OptionHandler) filter).listOptions();
        while (enu.hasMoreElements()) {
          Option option = (Option) enu.nextElement();
          filterOptions += option.synopsis() + '\n' + option.description() + "\n";
        }
      }

      String genericOptions =
          "\nGeneral options:\n\n"
              + "-h\n"
              + "\tGet help on available options.\n"
              + "\t(use -b -h for help on batch mode.)\n"
              + "-i <file>\n"
              + "\tThe name of the file containing input instances.\n"
              + "\tIf not supplied then instances will be read from stdin.\n"
              + "-o <file>\n"
              + "\tThe name of the file output instances will be written to.\n"
              + "\tIf not supplied then instances will be written to stdout.\n"
              + "-c <class index>\n"
              + "\tThe number of the attribute to use as the class.\n"
              + "\t\"first\" and \"last\" are also valid entries.\n"
              + "\tIf not supplied then no class is assigned.\n";

      if (filter instanceof Sourcable) {
        genericOptions +=
            "-z <class name>\n" + "\tOutputs the source code representing the trained filter.\n";
      }

      throw new Exception('\n' + ex.getMessage() + filterOptions + genericOptions);
    }

    if (debug) {
      System.err.println("Setting input format");
    }
    boolean printedHeader = false;
    if (filter.setInputFormat(data)) {
      if (debug) {
        System.err.println("Getting output format");
      }
      output.println(filter.getOutputFormat().toString());
      printedHeader = true;
    }

    // Pass all the instances to the filter
    Instance inst;
    while (input.hasMoreElements(data)) {
      inst = input.nextElement(data);
      if (debug) {
        System.err.println("Input instance to filter");
      }
      if (filter.input(inst)) {
        if (debug) {
          System.err.println("Filter said collect immediately");
        }
        if (!printedHeader) {
          throw new Error("Filter didn't return true from setInputFormat() " + "earlier!");
        }
        if (debug) {
          System.err.println("Getting output instance");
        }
        output.println(filter.output().toString());
      }
    }

    // Say that input has finished, and print any pending output instances
    if (debug) {
      System.err.println("Setting end of batch");
    }
    if (filter.batchFinished()) {
      if (debug) {
        System.err.println("Filter said collect output");
      }
      if (!printedHeader) {
        if (debug) {
          System.err.println("Getting output format");
        }
        output.println(filter.getOutputFormat().toString());
      }
      if (debug) {
        System.err.println("Getting output instance");
      }
      while (filter.numPendingOutput() > 0) {
        output.println(filter.output().toString());
        if (debug) {
          System.err.println("Getting output instance");
        }
      }
    }
    if (debug) {
      System.err.println("Done");
    }

    if (output != null) {
      output.close();
    }

    if (sourceCode.length() != 0)
      System.out.println(
          wekaStaticWrapper((Sourcable) filter, sourceCode, data, filter.getOutputFormat()));
  }
Пример #22
0
  /**
   * Configures/Runs the Experiment from the command line.
   *
   * @param args command line arguments to the Experiment.
   */
  public static void main(String[] args) {

    try {
      weka.core.WekaPackageManager.loadPackages(false, true, false);
      RemoteExperiment exp = null;

      // get options from XML?
      String xmlOption = Utils.getOption("xml", args);
      if (!xmlOption.equals("")) {
        args = new XMLOptions(xmlOption).toArray();
      }

      Experiment base = null;
      String expFile = Utils.getOption('l', args);
      String saveFile = Utils.getOption('s', args);
      boolean runExp = Utils.getFlag('r', args);
      ArrayList<String> remoteHosts = new ArrayList<String>();
      String runHost = " ";
      while (runHost.length() != 0) {
        runHost = Utils.getOption('h', args);
        if (runHost.length() != 0) {
          remoteHosts.add(runHost);
        }
      }
      if (expFile.length() == 0) {
        base = new Experiment();
        try {
          base.setOptions(args);
          Utils.checkForRemainingOptions(args);
        } catch (Exception ex) {
          ex.printStackTrace();
          String result =
              "Usage:\n\n"
                  + "-l <exp file>\n"
                  + "\tLoad experiment from file (default use cli options)\n"
                  + "-s <exp file>\n"
                  + "\tSave experiment to file after setting other options\n"
                  + "\t(default don't save)\n"
                  + "-h <remote host name>\n"
                  + "\tHost to run experiment on (may be specified more than once\n"
                  + "\tfor multiple remote hosts)\n"
                  + "-r \n"
                  + "\tRun experiment on (default don't run)\n"
                  + "-xml <filename | xml-string>\n"
                  + "\tget options from XML-Data instead from parameters\n"
                  + "\n";
          Enumeration<Option> enm = ((OptionHandler) base).listOptions();
          while (enm.hasMoreElements()) {
            Option option = enm.nextElement();
            result += option.synopsis() + "\n";
            result += option.description() + "\n";
          }
          throw new Exception(result + "\n" + ex.getMessage());
        }
      } else {
        Object tmp;

        // KOML?
        if ((KOML.isPresent()) && (expFile.toLowerCase().endsWith(KOML.FILE_EXTENSION))) {
          tmp = KOML.read(expFile);
        } else
        // XML?
        if (expFile.toLowerCase().endsWith(".xml")) {
          XMLExperiment xml = new XMLExperiment();
          tmp = xml.read(expFile);
        }
        // binary
        else {
          FileInputStream fi = new FileInputStream(expFile);
          ObjectInputStream oi = new ObjectInputStream(new BufferedInputStream(fi));
          tmp = oi.readObject();
          oi.close();
        }
        if (tmp instanceof RemoteExperiment) {
          exp = (RemoteExperiment) tmp;
        } else {
          base = (Experiment) tmp;
        }
      }
      if (base != null) {
        exp = new RemoteExperiment(base);
      }
      for (int i = 0; i < remoteHosts.size(); i++) {
        exp.addRemoteHost(remoteHosts.get(i));
      }
      System.err.println("Experiment:\n" + exp.toString());

      if (saveFile.length() != 0) {
        // KOML?
        if ((KOML.isPresent()) && (saveFile.toLowerCase().endsWith(KOML.FILE_EXTENSION))) {
          KOML.write(saveFile, exp);
        } else
        // XML?
        if (saveFile.toLowerCase().endsWith(".xml")) {
          XMLExperiment xml = new XMLExperiment();
          xml.write(saveFile, exp);
        }
        // binary
        else {
          FileOutputStream fo = new FileOutputStream(saveFile);
          ObjectOutputStream oo = new ObjectOutputStream(new BufferedOutputStream(fo));
          oo.writeObject(exp);
          oo.close();
        }
      }

      if (runExp) {
        System.err.println("Initializing...");
        exp.initialize();
        System.err.println("Iterating...");
        exp.runExperiment();
        System.err.println("Postprocessing...");
        exp.postProcess();
      }
    } catch (Exception ex) {
      ex.printStackTrace();
      System.err.println(ex.getMessage());
    }
  }
Пример #23
0
  /**
   * Parses a given list of options controlling the behaviour of this object. Valid options are:
   *
   * <p>-l "directory name" <br>
   * Specifies name of directory.
   *
   * <p>-m "model name" <br>
   * Specifies name of model.
   *
   * <p>-v "vocabulary name" <br>
   * Specifies vocabulary name.
   *
   * <p>-f "vocabulary format" <br>
   * Specifies vocabulary format.
   *
   * <p>-i "document language" <br>
   * Specifies document language.
   *
   * <p>-e "encoding" <br>
   * Specifies encoding.
   *
   * <p>-w "WikipediaDatabase@WikipediaServer" <br>
   * Specifies wikipedia data.
   *
   * <p>-d<br>
   * Turns debugging mode on.
   *
   * <p>-x "length"<br>
   * Sets maximum phrase length (default: 3).
   *
   * <p>-y "length"<br>
   * Sets minimum phrase length (default: 3).
   *
   * <p>-o "number"<br>
   * The minimum number of times a phrase needs to occur (default: 2).
   *
   * <p>-s "name of class implementing list of stop words"<br>
   * Sets list of stop words to used (default: StopwordsEnglish).
   *
   * <p>-t "name of class implementing stemmer"<br>
   * Sets stemmer to use (default: IteratedLovinsStemmer).
   *
   * <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    String dirName = Utils.getOption('l', options);
    if (dirName.length() > 0) {
      setDirName(dirName);
    } else {
      setDirName(null);
      throw new Exception("Name of directory required argument.");
    }

    String modelName = Utils.getOption('m', options);
    if (modelName.length() > 0) {
      setModelName(modelName);
    } else {
      setModelName(null);
      throw new Exception("Name of model required argument.");
    }

    String vocabularyName = Utils.getOption('v', options);
    if (vocabularyName.length() > 0) {
      setVocabularyName(vocabularyName);
    }

    String vocabularyFormat = Utils.getOption('f', options);

    if (!getVocabularyName().equals("none") && !getVocabularyName().equals("wikipedia")) {
      if (vocabularyFormat.length() > 0) {
        if (vocabularyFormat.equals("skos") || vocabularyFormat.equals("text")) {
          setVocabularyFormat(vocabularyFormat);
        } else {
          throw new Exception(
              "Unsupported format of vocabulary. It should be either \"skos\" or \"text\".");
        }
      } else {
        setVocabularyFormat(null);
        throw new Exception(
            "If a controlled vocabulary is used, format of vocabulary required argument (skos or text).");
      }
    } else {
      setVocabularyFormat(null);
    }

    String encoding = Utils.getOption('e', options);
    if (encoding.length() > 0) {
      setEncoding(encoding);
    } else {
      setEncoding("default");
    }

    String wikipediaConnection = Utils.getOption('w', options);
    if (wikipediaConnection.length() > 0) {
      setWikipediaConnection(wikipediaConnection);
    }

    String documentLanguage = Utils.getOption('i', options);
    if (documentLanguage.length() > 0) {
      setDocumentLanguage(documentLanguage);
    } else {
      setDocumentLanguage("en");
    }

    String maxPhraseLengthString = Utils.getOption('x', options);
    if (maxPhraseLengthString.length() > 0) {
      setMaxPhraseLength(Integer.parseInt(maxPhraseLengthString));
    } else {
      setMaxPhraseLength(5);
    }
    String minPhraseLengthString = Utils.getOption('y', options);
    if (minPhraseLengthString.length() > 0) {
      setMinPhraseLength(Integer.parseInt(minPhraseLengthString));
    } else {
      setMinPhraseLength(1);
    }
    String minNumOccurString = Utils.getOption('o', options);
    if (minNumOccurString.length() > 0) {
      setMinNumOccur(Integer.parseInt(minNumOccurString));
    } else {
      setMinNumOccur(2);
    }

    String stopwordsString = Utils.getOption('s', options);
    if (stopwordsString.length() > 0) {
      stopwordsString = "kea.stopwords.".concat(stopwordsString);
      setStopwords((Stopwords) Class.forName(stopwordsString).newInstance());
    }

    String stemmerString = Utils.getOption('t', options);
    if (stemmerString.length() > 0) {
      stemmerString = "kea.stemmers.".concat(stemmerString);
      setStemmer((Stemmer) Class.forName(stemmerString).newInstance());
    }
    setDebug(Utils.getFlag('d', options));
    Utils.checkForRemainingOptions(options);
  }