/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> * -C <col> * Sets the attribute index (default last). * </pre> * * <pre> * -F <value index> * Sets the first value's index (default first). * </pre> * * <pre> * -S <value index> * Sets the second value's index (default last). * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String attIndex = Utils.getOption('C', options); if (attIndex.length() != 0) { setAttributeIndex(attIndex); } else { setAttributeIndex("last"); } String firstValIndex = Utils.getOption('F', options); if (firstValIndex.length() != 0) { setFirstValueIndex(firstValIndex); } else { setFirstValueIndex("first"); } String secondValIndex = Utils.getOption('S', options); if (secondValIndex.length() != 0) { setSecondValueIndex(secondValIndex); } else { setSecondValueIndex("last"); } if (getInputFormat() != null) { setInputFormat(getInputFormat()); } Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -R <col1,col2-col4,...> * Specifies list of columns to Discretize. First and last are valid indexes. * (default none)</pre> * * <pre> -V * Invert matching sense of column indexes.</pre> * * <pre> -D * Output binary attributes for discretized attributes.</pre> * * <pre> -Y * Use bin numbers rather than ranges for discretized attributes.</pre> * * <pre> -E * Use better encoding of split point for MDL.</pre> * * <pre> -K * Use Kononenko's MDL criterion.</pre> * * <pre> -precision <integer> * Precision for bin boundary labels. * (default = 6 decimal places).</pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { setMakeBinary(Utils.getFlag('D', options)); setUseBinNumbers(Utils.getFlag('Y', options)); setUseBetterEncoding(Utils.getFlag('E', options)); setUseKononenko(Utils.getFlag('K', options)); setInvertSelection(Utils.getFlag('V', options)); String convertList = Utils.getOption('R', options); if (convertList.length() != 0) { setAttributeIndices(convertList); } else { setAttributeIndices("first-last"); } String precisionS = Utils.getOption("precision", options); if (precisionS.length() > 0) { setBinRangePrecision(Integer.parseInt(precisionS)); } if (getInputFormat() != null) { setInputFormat(getInputFormat()); } Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options controlling the behaviour of this object. Valid options are: * * <p>-l "directory name"<br> * Specifies name of directory. * * <p>-m "model name"<br> * Specifies name of model. * * <p>-e "encoding"<br> * Specifies encoding. * * <p>-n<br> * Specifies number of phrases to be output (default: 5). * * <p>-d<br> * Turns debugging mode on. * * <p>-a<br> * Also write stemmed phrase and score into ".key" file. * * <p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String dirName = Utils.getOption('l', options); if (dirName.length() > 0) { setDirName(dirName); } else { setDirName(null); throw new Exception("Name of directory required argument."); } String modelName = Utils.getOption('m', options); if (modelName.length() > 0) { setModelName(modelName); } else { setModelName(null); throw new Exception("Name of model required argument."); } String encoding = Utils.getOption('e', options); if (encoding.length() > 0) { setEncoding(encoding); } else { setEncoding("default"); } String numPhrases = Utils.getOption('n', options); if (numPhrases.length() > 0) { setNumPhrases(Integer.parseInt(numPhrases)); } else { setNumPhrases(5); } setDebug(Utils.getFlag('d', options)); setAdditionalInfo(Utils.getFlag('a', options)); Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -B <num> * Manual blend setting (default 20%) * </pre> * * <pre> -E * Enable entropic auto-blend setting (symbolic class only) * </pre> * * <pre> -M <char> * Specify the missing value treatment mode (default a) * Valid options are: a(verage), d(elete), m(axdiff), n(ormal) * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String debug = "(KStar.setOptions)"; String blendStr = Utils.getOption('B', options); if (blendStr.length() != 0) { setGlobalBlend(Integer.parseInt(blendStr)); } setEntropicAutoBlend(Utils.getFlag('E', options)); String missingModeStr = Utils.getOption('M', options); if (missingModeStr.length() != 0) { switch (missingModeStr.charAt(0)) { case 'a': setMissingMode(new SelectedTag(M_AVERAGE, TAGS_MISSING)); break; case 'd': setMissingMode(new SelectedTag(M_DELETE, TAGS_MISSING)); break; case 'm': setMissingMode(new SelectedTag(M_MAXDIFF, TAGS_MISSING)); break; case 'n': setMissingMode(new SelectedTag(M_NORMAL, TAGS_MISSING)); break; default: setMissingMode(new SelectedTag(M_AVERAGE, TAGS_MISSING)); } } Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -B * Binary splits (convert nominal attributes to binary ones) </pre> * * <pre> -P * Use error on probabilities instead of misclassification error for stopping criterion of LogitBoost. * </pre> * * <pre> -I <numIterations> * Set fixed number of iterations for LogitBoost (instead of using cross-validation)</pre> * * <pre> -F <modelType> * Set Funtional Tree type to be generate: 0 for FT, 1 for FTLeaves and 2 for FTInner</pre> * * <pre> -M <numInstances> * Set minimum number of instances at which a node can be split (default 15)</pre> * * <pre> -W <beta> * Set beta for weight trimming for LogitBoost. Set to 0 (default) for no weight trimming.</pre> * * <pre> -A * The AIC is used to choose the best iteration.</pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { setBinSplit(Utils.getFlag('B', options)); setErrorOnProbabilities(Utils.getFlag('P', options)); String optionString = Utils.getOption('I', options); if (optionString.length() != 0) { setNumBoostingIterations((new Integer(optionString)).intValue()); } optionString = Utils.getOption('F', options); if (optionString.length() != 0) { setModelType(new SelectedTag(Integer.parseInt(optionString), TAGS_MODEL)); // setModelType((new Integer(optionString)).intValue()); } optionString = Utils.getOption('M', options); if (optionString.length() != 0) { setMinNumInstances((new Integer(optionString)).intValue()); } optionString = Utils.getOption('W', options); if (optionString.length() != 0) { setWeightTrimBeta((new Double(optionString)).doubleValue()); } setUseAIC(Utils.getFlag('A', options)); Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> * -N * Sets if binary attributes are to be coded as nominal ones. * </pre> * * <pre> * -A * For each nominal value a new attribute is created, * not only if there are more than 2 values. * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { setBinaryAttributesNominal(Utils.getFlag('N', options)); setTransformAllValues(Utils.getFlag('A', options)); if (getInputFormat() != null) { setInputFormat(getInputFormat()); } Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -C * Don't weight voting intervals by confidence</pre> * * <pre> -B <bias> * Set exponential bias towards confident intervals * (default = 1.0)</pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String optionString; setWeightByConfidence(!Utils.getFlag('C', options)); optionString = Utils.getOption('B', options); if (optionString.length() != 0) { Double temp = new Double(optionString); setBias(temp.doubleValue()); } Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> * -W <classifier specification> * Full class name of classifier to use, followed * by scheme options. eg: * "weka.classifiers.bayes.NaiveBayes -D" * (default: weka.classifiers.rules.ZeroR) * </pre> * * <pre> * -C <class index> * Attribute on which misclassifications are based. * If < 0 will use any current set class or default to the last attribute. * </pre> * * <pre> * -F <number of folds> * The number of folds to use for cross-validation cleansing. * (<2 = no cross-validation - default). * </pre> * * <pre> * -T <threshold> * Threshold for the max error when predicting numeric class. * (Value should be >= 0, default = 0.1). * </pre> * * <pre> * -I * The maximum number of cleansing iterations to perform. * (<1 = until fully cleansed - default) * </pre> * * <pre> * -V * Invert the match so that correctly classified instances are discarded. * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String classifierString = Utils.getOption('W', options); if (classifierString.length() == 0) { classifierString = weka.classifiers.rules.ZeroR.class.getName(); } String[] classifierSpec = Utils.splitOptions(classifierString); if (classifierSpec.length == 0) { throw new Exception("Invalid classifier specification string"); } String classifierName = classifierSpec[0]; classifierSpec[0] = ""; setClassifier(AbstractClassifier.forName(classifierName, classifierSpec)); String cString = Utils.getOption('C', options); if (cString.length() != 0) { setClassIndex((new Double(cString)).intValue()); } else { setClassIndex(-1); } String fString = Utils.getOption('F', options); if (fString.length() != 0) { setNumFolds((new Double(fString)).intValue()); } else { setNumFolds(0); } String tString = Utils.getOption('T', options); if (tString.length() != 0) { setThreshold((new Double(tString)).doubleValue()); } else { setThreshold(0.1); } String iString = Utils.getOption('I', options); if (iString.length() != 0) { setMaxIterations((new Double(iString)).intValue()); } else { setMaxIterations(0); } if (Utils.getFlag('V', options)) { setInvert(true); } else { setInvert(false); } Utils.checkForRemainingOptions(options); }
/** * Parses the options for this object. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> * -i <the input file> * The input file * </pre> * * <pre> * -o <the output file> * The output file * </pre> * * <pre> * -c <class index> * The class index * (default: last) * </pre> * * <!-- options-end --> * * @param options the options to use * @throws Exception if setting of options fails */ @Override public void setOptions(String[] options) throws Exception { String tmpStr; tmpStr = Utils.getOption('c', options); if (tmpStr.length() != 0) { setClassIndex(tmpStr); } else { setClassIndex("last"); } super.setOptions(options); Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> * -R * Attributes to act on. Can be either a range * string (e.g. 1,2,6-10) OR a comma-separated list of named attributes * (default none) * </pre> * * <pre> * -V * Invert matching sense (i.e. act on all attributes other than those specified) * </pre> * * <pre> * -N * Nominal labels and their replacement values. * E.g. red:blue, black:white, fred:bob * </pre> * * <pre> * -I * Ignore case when matching nominal values * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String atts = Utils.getOption('R', options); if (atts.length() > 0) { setSelectedAttributes(atts); } String replacements = Utils.getOption('N', options); if (replacements.length() > 0) { setValueReplacements(replacements); } setInvertSelection(Utils.getFlag('V', options)); setIgnoreCase(Utils.getFlag('I', options)); Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> * -P <start set> * Specify a starting set of attributes. * Eg. 1,3,5-7.If supplied, the starting set becomes * one member of the initial random * population. * </pre> * * <pre> * -Z <population size> * Set the size of the population (even number). * (default = 20). * </pre> * * <pre> * -G <number of generations> * Set the number of generations. * (default = 20) * </pre> * * <pre> * -C <probability of crossover> * Set the probability of crossover. * (default = 0.6) * </pre> * * <pre> * -M <probability of mutation> * Set the probability of mutation. * (default = 0.033) * </pre> * * <pre> * -R <report frequency> * Set frequency of generation reports. * e.g, setting the value to 5 will * report every 5th generation * (default = number of generations) * </pre> * * <pre> * -S <seed> * Set the random number seed. * (default = 1) * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String optionString; resetOptions(); optionString = Utils.getOption('P', options); if (optionString.length() != 0) { setStartSet(optionString); } optionString = Utils.getOption('Z', options); if (optionString.length() != 0) { setPopulationSize(Integer.parseInt(optionString)); } optionString = Utils.getOption('G', options); if (optionString.length() != 0) { setMaxGenerations(Integer.parseInt(optionString)); setReportFrequency(Integer.parseInt(optionString)); } optionString = Utils.getOption('C', options); if (optionString.length() != 0) { setCrossoverProb((new Double(optionString)).doubleValue()); } optionString = Utils.getOption('M', options); if (optionString.length() != 0) { setMutationProb((new Double(optionString)).doubleValue()); } optionString = Utils.getOption('R', options); if (optionString.length() != 0) { setReportFrequency(Integer.parseInt(optionString)); } optionString = Utils.getOption('S', options); if (optionString.length() != 0) { setSeed(Integer.parseInt(optionString)); } Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> * -S <num> * Specify the random number seed (default 1) * </pre> * * <pre> * -Z <num> * The size of the output dataset - number of instances * (default 100) * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String tmpStr = Utils.getOption('S', options); if (tmpStr.length() != 0) { setRandomSeed(Integer.parseInt(tmpStr)); } else { setRandomSeed(1); } tmpStr = Utils.getOption('Z', options); if (tmpStr.length() != 0) { setSampleSize(Integer.parseInt(tmpStr)); } else { setSampleSize(100); } Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> -C <double> * The complexity constant C. * (default 1)</pre> * * <pre> -N * Whether to 0=normalize/1=standardize/2=neither. * (default 0=normalize)</pre> * * <pre> -I <classname and parameters> * Optimizer class used for solving quadratic optimization problem * (default weka.classifiers.functions.supportVector.RegSMOImproved)</pre> * * <pre> -K <classname and parameters> * The Kernel to use. * (default: weka.classifiers.functions.supportVector.PolyKernel)</pre> * * <pre> * Options specific to optimizer ('-I') weka.classifiers.functions.supportVector.RegSMOImproved: * </pre> * * <pre> -T <double> * The tolerance parameter for checking the stopping criterion. * (default 0.001)</pre> * * <pre> -V * Use variant 1 of the algorithm when true, otherwise use variant 2. * (default true)</pre> * * <pre> -P <double> * The epsilon for round-off error. * (default 1.0e-12)</pre> * * <pre> -L <double> * The epsilon parameter in epsilon-insensitive loss function. * (default 1.0e-3)</pre> * * <pre> -W <double> * The random number seed. * (default 1)</pre> * * <pre> * Options specific to kernel ('-K') weka.classifiers.functions.supportVector.PolyKernel: * </pre> * * <pre> -D * Enables debugging output (if available) to be printed. * (default: off)</pre> * * <pre> -no-checks * Turns off all checks - use with caution! * (default: checks on)</pre> * * <pre> -C <num> * The size of the cache (a prime number), 0 for full cache and * -1 to turn it off. * (default: 250007)</pre> * * <pre> -E <num> * The Exponent to use. * (default: 1.0)</pre> * * <pre> -L * Use lower-order terms. * (default: no)</pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String tmpStr; String[] tmpOptions; tmpStr = Utils.getOption('C', options); if (tmpStr.length() != 0) { setC(Double.parseDouble(tmpStr)); } else { setC(1.0); } String nString = Utils.getOption('N', options); if (nString.length() != 0) { setFilterType(new SelectedTag(Integer.parseInt(nString), TAGS_FILTER)); } else { setFilterType(new SelectedTag(FILTER_NORMALIZE, TAGS_FILTER)); } tmpStr = Utils.getOption('I', options); tmpOptions = Utils.splitOptions(tmpStr); if (tmpOptions.length != 0) { tmpStr = tmpOptions[0]; tmpOptions[0] = ""; setRegOptimizer((RegOptimizer) Utils.forName(RegOptimizer.class, tmpStr, tmpOptions)); } else { setRegOptimizer(new RegSMOImproved()); } tmpStr = Utils.getOption('K', options); tmpOptions = Utils.splitOptions(tmpStr); if (tmpOptions.length != 0) { tmpStr = tmpOptions[0]; tmpOptions[0] = ""; setKernel(Kernel.forName(tmpStr, tmpOptions)); } else { setKernel(new PolyKernel()); } super.setOptions(options); Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> * -I * Weight neighbours by the inverse of their distance * (use when k > 1) * </pre> * * <pre> * -F * Weight neighbours by 1 - their distance * (use when k > 1) * </pre> * * <pre> * -K <number of neighbors> * Number of nearest neighbours (k) used in classification. * (Default = 1) * </pre> * * <pre> * -E * Minimise mean squared error rather than mean absolute * error when using -X option with numeric prediction. * </pre> * * <pre> * -W <window size> * Maximum number of training instances maintained. * Training instances are dropped FIFO. (Default = no window) * </pre> * * <pre> * -X * Select the number of nearest neighbours between 1 * and the k value specified using hold-one-out evaluation * on the training data (use when k > 1) * </pre> * * <pre> * -A * The nearest neighbour search algorithm to use (default: weka.core.neighboursearch.LinearNNSearch). * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String knnString = Utils.getOption('K', options); if (knnString.length() != 0) { setKNN(Integer.parseInt(knnString)); } else { setKNN(1); } String windowString = Utils.getOption('W', options); if (windowString.length() != 0) { setWindowSize(Integer.parseInt(windowString)); } else { setWindowSize(0); } if (Utils.getFlag('I', options)) { setDistanceWeighting(new SelectedTag(WEIGHT_INVERSE, TAGS_WEIGHTING)); } else if (Utils.getFlag('F', options)) { setDistanceWeighting(new SelectedTag(WEIGHT_SIMILARITY, TAGS_WEIGHTING)); } else { setDistanceWeighting(new SelectedTag(WEIGHT_NONE, TAGS_WEIGHTING)); } setCrossValidate(Utils.getFlag('X', options)); setMeanSquared(Utils.getFlag('E', options)); String nnSearchClass = Utils.getOption('A', options); if (nnSearchClass.length() != 0) { String nnSearchClassSpec[] = Utils.splitOptions(nnSearchClass); if (nnSearchClassSpec.length == 0) { throw new Exception("Invalid NearestNeighbourSearch algorithm " + "specification string."); } String className = nnSearchClassSpec[0]; nnSearchClassSpec[0] = ""; setNearestNeighbourSearchAlgorithm( (NearestNeighbourSearch) Utils.forName(NearestNeighbourSearch.class, className, nnSearchClassSpec)); } else this.setNearestNeighbourSearchAlgorithm(new LinearNNSearch()); Utils.checkForRemainingOptions(options); }
/** * Test the class from the command line. The instance query should be specified with -Q sql_query * * @param args contains options for the instance query */ public static void main(String args[]) { try { InstanceQuery iq = new InstanceQuery(); String query = Utils.getOption('Q', args); if (query.length() == 0) { iq.setQuery("select * from Experiment_index"); } else { iq.setQuery(query); } iq.setOptions(args); try { Utils.checkForRemainingOptions(args); } catch (Exception e) { System.err.println("Options for weka.experiment.InstanceQuery:\n"); Enumeration en = iq.listOptions(); while (en.hasMoreElements()) { Option o = (Option) en.nextElement(); System.err.println(o.synopsis() + "\n" + o.description()); } System.exit(1); } Instances aha = iq.retrieveInstances(); iq.disconnectFromDatabase(); // query returned no result -> exit if (aha == null) return; // The dataset may be large, so to make things easier we'll // output an instance at a time (rather than having to convert // the entire dataset to one large string) System.out.println(new Instances(aha, 0)); for (int i = 0; i < aha.numInstances(); i++) { System.out.println(aha.instance(i)); } } catch (Exception e) { e.printStackTrace(); System.err.println(e.getMessage()); } }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> * -C <col> * Sets the attribute index (default last). * </pre> * * <pre> * -F <value index> * Sets the output date format string (default corresponds to ISO-8601). * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String attIndex = Utils.getOption('C', options); if (attIndex.length() != 0) { setAttributeIndex(attIndex); } else { setAttributeIndex("last"); } String formatString = Utils.getOption('F', options); if (formatString.length() != 0) { setDateFormat(formatString); } else { setDateFormat(DEFAULT_FORMAT); } if (getInputFormat() != null) { setInputFormat(getInputFormat()); } Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> * -D * Turn on debugging output. * </pre> * * <pre> * -R <ridge> * Set the ridge in the log-likelihood. * </pre> * * <pre> * -M <number> * Set the maximum number of iterations (default -1, until convergence). * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { setUseConjugateGradientDescent(Utils.getFlag('C', options)); String ridgeString = Utils.getOption('R', options); if (ridgeString.length() != 0) { m_Ridge = Double.parseDouble(ridgeString); } else { m_Ridge = 1.0e-8; } String maxItsString = Utils.getOption('M', options); if (maxItsString.length() != 0) { m_MaxIts = Integer.parseInt(maxItsString); } else { m_MaxIts = -1; } super.setOptions(options); Utils.checkForRemainingOptions(options); }
/** * Test method for this class * * @param args the command line arguments */ public static void main(String[] args) { try { BVDecompose bvd = new BVDecompose(); try { bvd.setOptions(args); Utils.checkForRemainingOptions(args); } catch (Exception ex) { String result = ex.getMessage() + "\nBVDecompose Options:\n\n"; Enumeration<Option> enu = bvd.listOptions(); while (enu.hasMoreElements()) { Option option = (Option) enu.nextElement(); result += option.synopsis() + "\n" + option.description() + "\n"; } throw new Exception(result); } bvd.decompose(); System.out.println(bvd.toString()); } catch (Exception ex) { System.err.println(ex.getMessage()); } }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> * -W * Use word frequencies instead of binary bag of words. * </pre> * * <pre> * -P <# instances> * How often to prune the dictionary of low frequency words (default = 0, i.e. don't prune) * </pre> * * <pre> * -M <double> * Minimum word frequency. Words with less than this frequence are ignored. * If periodic pruning is turned on then this is also used to determine which * words to remove from the dictionary (default = 3). * </pre> * * <pre> * -normalize * Normalize document length (use in conjunction with -norm and -lnorm) * </pre> * * <pre> * -norm <num> * Specify the norm that each instance must have (default 1.0) * </pre> * * <pre> * -lnorm <num> * Specify L-norm to use (default 2.0) * </pre> * * <pre> * -lowercase * Convert all tokens to lowercase before adding to the dictionary. * </pre> * * <pre> * -stoplist * Ignore words that are in the stoplist. * </pre> * * <pre> * -stopwords <file> * A file containing stopwords to override the default ones. * Using this option automatically sets the flag ('-stoplist') to use the * stoplist if the file exists. * Format: one stopword per line, lines starting with '#' * are interpreted as comments and ignored. * </pre> * * <pre> * -tokenizer <spec> * The tokenizing algorihtm (classname plus parameters) to use. * (default: weka.core.tokenizers.WordTokenizer) * </pre> * * <pre> * -stemmer <spec> * The stemmering algorihtm (classname plus parameters) to use. * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { reset(); super.setOptions(options); setUseWordFrequencies(Utils.getFlag("W", options)); String pruneFreqS = Utils.getOption("P", options); if (pruneFreqS.length() > 0) { setPeriodicPruning(Integer.parseInt(pruneFreqS)); } String minFreq = Utils.getOption("M", options); if (minFreq.length() > 0) { setMinWordFrequency(Double.parseDouble(minFreq)); } setNormalizeDocLength(Utils.getFlag("normalize", options)); String normFreqS = Utils.getOption("norm", options); if (normFreqS.length() > 0) { setNorm(Double.parseDouble(normFreqS)); } String lnormFreqS = Utils.getOption("lnorm", options); if (lnormFreqS.length() > 0) { setLNorm(Double.parseDouble(lnormFreqS)); } setLowercaseTokens(Utils.getFlag("lowercase", options)); setUseStopList(Utils.getFlag("stoplist", options)); String stopwordsS = Utils.getOption("stopwords", options); if (stopwordsS.length() > 0) { setStopwords(new File(stopwordsS)); } else { setStopwords(null); } String tokenizerString = Utils.getOption("tokenizer", options); if (tokenizerString.length() == 0) { setTokenizer(new WordTokenizer()); } else { String[] tokenizerSpec = Utils.splitOptions(tokenizerString); if (tokenizerSpec.length == 0) { throw new Exception("Invalid tokenizer specification string"); } String tokenizerName = tokenizerSpec[0]; tokenizerSpec[0] = ""; Tokenizer tokenizer = (Tokenizer) Class.forName(tokenizerName).newInstance(); if (tokenizer instanceof OptionHandler) { ((OptionHandler) tokenizer).setOptions(tokenizerSpec); } setTokenizer(tokenizer); } String stemmerString = Utils.getOption("stemmer", options); if (stemmerString.length() == 0) { setStemmer(null); } else { String[] stemmerSpec = Utils.splitOptions(stemmerString); if (stemmerSpec.length == 0) { throw new Exception("Invalid stemmer specification string"); } String stemmerName = stemmerSpec[0]; stemmerSpec[0] = ""; Stemmer stemmer = (Stemmer) Class.forName(stemmerName).newInstance(); if (stemmer instanceof OptionHandler) { ((OptionHandler) stemmer).setOptions(stemmerSpec); } setStemmer(stemmer); } Utils.checkForRemainingOptions(options); }
/** * Parses a given list of options. * * <p> * <!-- options-start --> * Valid options are: * * <p> * * <pre> * -D * Turns on output of debugging information. * </pre> * * <pre> * -min <double> * The minimum threshold. (default -Double.MAX_VALUE) * </pre> * * <pre> * -min-default <double> * The replacement for values smaller than the minimum threshold. * (default -Double.MAX_VALUE) * </pre> * * <pre> * -max <double> * The maximum threshold. (default Double.MAX_VALUE) * </pre> * * <pre> * -max-default <double> * The replacement for values larger than the maximum threshold. * (default Double.MAX_VALUE) * </pre> * * <pre> * -closeto <double> * The number values are checked for closeness. (default 0) * </pre> * * <pre> * -closeto-default <double> * The replacement for values that are close to '-closeto'. * (default 0) * </pre> * * <pre> * -closeto-tolerance <double> * The tolerance below which numbers are considered being close to * to each other. (default 1E-6) * </pre> * * <pre> * -decimals <int> * The number of decimals to round to, -1 means no rounding at all. * (default -1) * </pre> * * <pre> * -R <col1,col2,...> * The list of columns to cleanse, e.g., first-last or first-3,5-last. * (default first-last) * </pre> * * <pre> * -V * Inverts the matching sense. * </pre> * * <pre> * -include-class * Whether to include the class in the cleansing. * The class column will always be skipped, if this flag is not * present. (default no) * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { String tmpStr = Utils.getOption("min", options); if (tmpStr.length() != 0) { setMinThreshold(Double.parseDouble(tmpStr)); } else { setMinThreshold(-Double.MAX_VALUE); } tmpStr = Utils.getOption("min-default", options); if (tmpStr.length() != 0) { setMinDefault(Double.parseDouble(tmpStr)); } else { setMinDefault(-Double.MAX_VALUE); } tmpStr = Utils.getOption("max", options); if (tmpStr.length() != 0) { setMaxThreshold(Double.parseDouble(tmpStr)); } else { setMaxThreshold(Double.MAX_VALUE); } tmpStr = Utils.getOption("max-default", options); if (tmpStr.length() != 0) { setMaxDefault(Double.parseDouble(tmpStr)); } else { setMaxDefault(Double.MAX_VALUE); } tmpStr = Utils.getOption("closeto", options); if (tmpStr.length() != 0) { setCloseTo(Double.parseDouble(tmpStr)); } else { setCloseTo(0); } tmpStr = Utils.getOption("closeto-default", options); if (tmpStr.length() != 0) { setCloseToDefault(Double.parseDouble(tmpStr)); } else { setCloseToDefault(0); } tmpStr = Utils.getOption("closeto-tolerance", options); if (tmpStr.length() != 0) { setCloseToTolerance(Double.parseDouble(tmpStr)); } else { setCloseToTolerance(1E-6); } tmpStr = Utils.getOption("R", options); if (tmpStr.length() != 0) { setAttributeIndices(tmpStr); } else { setAttributeIndices("first-last"); } setInvertSelection(Utils.getFlag("V", options)); setIncludeClass(Utils.getFlag("include-class", options)); tmpStr = Utils.getOption("decimals", options); if (tmpStr.length() != 0) { setDecimals(Integer.parseInt(tmpStr)); } else { setDecimals(-1); } super.setOptions(options); Utils.checkForRemainingOptions(options); }
/** * Method for testing filters. * * @param filter the filter to use * @param options should contain the following arguments: <br> * -i input_file <br> * -o output_file <br> * -c class_index <br> * -z classname (for filters implementing weka.filters.Sourcable) <br> * or -h for help on options * @throws Exception if something goes wrong or the user requests help on command options */ public static void filterFile(Filter filter, String[] options) throws Exception { boolean debug = false; Instances data = null; DataSource input = null; PrintWriter output = null; boolean helpRequest; String sourceCode = ""; try { helpRequest = Utils.getFlag('h', options); if (Utils.getFlag('d', options)) { debug = true; } String infileName = Utils.getOption('i', options); String outfileName = Utils.getOption('o', options); String classIndex = Utils.getOption('c', options); if (filter instanceof Sourcable) sourceCode = Utils.getOption('z', options); if (filter instanceof OptionHandler) { ((OptionHandler) filter).setOptions(options); } Utils.checkForRemainingOptions(options); if (helpRequest) { throw new Exception("Help requested.\n"); } if (infileName.length() != 0) { input = new DataSource(infileName); } else { input = new DataSource(System.in); } if (outfileName.length() != 0) { output = new PrintWriter(new FileOutputStream(outfileName)); } else { output = new PrintWriter(System.out); } data = input.getStructure(); if (classIndex.length() != 0) { if (classIndex.equals("first")) { data.setClassIndex(0); } else if (classIndex.equals("last")) { data.setClassIndex(data.numAttributes() - 1); } else { data.setClassIndex(Integer.parseInt(classIndex) - 1); } } } catch (Exception ex) { String filterOptions = ""; // Output the error and also the valid options if (filter instanceof OptionHandler) { filterOptions += "\nFilter options:\n\n"; Enumeration enu = ((OptionHandler) filter).listOptions(); while (enu.hasMoreElements()) { Option option = (Option) enu.nextElement(); filterOptions += option.synopsis() + '\n' + option.description() + "\n"; } } String genericOptions = "\nGeneral options:\n\n" + "-h\n" + "\tGet help on available options.\n" + "\t(use -b -h for help on batch mode.)\n" + "-i <file>\n" + "\tThe name of the file containing input instances.\n" + "\tIf not supplied then instances will be read from stdin.\n" + "-o <file>\n" + "\tThe name of the file output instances will be written to.\n" + "\tIf not supplied then instances will be written to stdout.\n" + "-c <class index>\n" + "\tThe number of the attribute to use as the class.\n" + "\t\"first\" and \"last\" are also valid entries.\n" + "\tIf not supplied then no class is assigned.\n"; if (filter instanceof Sourcable) { genericOptions += "-z <class name>\n" + "\tOutputs the source code representing the trained filter.\n"; } throw new Exception('\n' + ex.getMessage() + filterOptions + genericOptions); } if (debug) { System.err.println("Setting input format"); } boolean printedHeader = false; if (filter.setInputFormat(data)) { if (debug) { System.err.println("Getting output format"); } output.println(filter.getOutputFormat().toString()); printedHeader = true; } // Pass all the instances to the filter Instance inst; while (input.hasMoreElements(data)) { inst = input.nextElement(data); if (debug) { System.err.println("Input instance to filter"); } if (filter.input(inst)) { if (debug) { System.err.println("Filter said collect immediately"); } if (!printedHeader) { throw new Error("Filter didn't return true from setInputFormat() " + "earlier!"); } if (debug) { System.err.println("Getting output instance"); } output.println(filter.output().toString()); } } // Say that input has finished, and print any pending output instances if (debug) { System.err.println("Setting end of batch"); } if (filter.batchFinished()) { if (debug) { System.err.println("Filter said collect output"); } if (!printedHeader) { if (debug) { System.err.println("Getting output format"); } output.println(filter.getOutputFormat().toString()); } if (debug) { System.err.println("Getting output instance"); } while (filter.numPendingOutput() > 0) { output.println(filter.output().toString()); if (debug) { System.err.println("Getting output instance"); } } } if (debug) { System.err.println("Done"); } if (output != null) { output.close(); } if (sourceCode.length() != 0) System.out.println( wekaStaticWrapper((Sourcable) filter, sourceCode, data, filter.getOutputFormat())); }
/** * Configures/Runs the Experiment from the command line. * * @param args command line arguments to the Experiment. */ public static void main(String[] args) { try { weka.core.WekaPackageManager.loadPackages(false, true, false); RemoteExperiment exp = null; // get options from XML? String xmlOption = Utils.getOption("xml", args); if (!xmlOption.equals("")) { args = new XMLOptions(xmlOption).toArray(); } Experiment base = null; String expFile = Utils.getOption('l', args); String saveFile = Utils.getOption('s', args); boolean runExp = Utils.getFlag('r', args); ArrayList<String> remoteHosts = new ArrayList<String>(); String runHost = " "; while (runHost.length() != 0) { runHost = Utils.getOption('h', args); if (runHost.length() != 0) { remoteHosts.add(runHost); } } if (expFile.length() == 0) { base = new Experiment(); try { base.setOptions(args); Utils.checkForRemainingOptions(args); } catch (Exception ex) { ex.printStackTrace(); String result = "Usage:\n\n" + "-l <exp file>\n" + "\tLoad experiment from file (default use cli options)\n" + "-s <exp file>\n" + "\tSave experiment to file after setting other options\n" + "\t(default don't save)\n" + "-h <remote host name>\n" + "\tHost to run experiment on (may be specified more than once\n" + "\tfor multiple remote hosts)\n" + "-r \n" + "\tRun experiment on (default don't run)\n" + "-xml <filename | xml-string>\n" + "\tget options from XML-Data instead from parameters\n" + "\n"; Enumeration<Option> enm = ((OptionHandler) base).listOptions(); while (enm.hasMoreElements()) { Option option = enm.nextElement(); result += option.synopsis() + "\n"; result += option.description() + "\n"; } throw new Exception(result + "\n" + ex.getMessage()); } } else { Object tmp; // KOML? if ((KOML.isPresent()) && (expFile.toLowerCase().endsWith(KOML.FILE_EXTENSION))) { tmp = KOML.read(expFile); } else // XML? if (expFile.toLowerCase().endsWith(".xml")) { XMLExperiment xml = new XMLExperiment(); tmp = xml.read(expFile); } // binary else { FileInputStream fi = new FileInputStream(expFile); ObjectInputStream oi = new ObjectInputStream(new BufferedInputStream(fi)); tmp = oi.readObject(); oi.close(); } if (tmp instanceof RemoteExperiment) { exp = (RemoteExperiment) tmp; } else { base = (Experiment) tmp; } } if (base != null) { exp = new RemoteExperiment(base); } for (int i = 0; i < remoteHosts.size(); i++) { exp.addRemoteHost(remoteHosts.get(i)); } System.err.println("Experiment:\n" + exp.toString()); if (saveFile.length() != 0) { // KOML? if ((KOML.isPresent()) && (saveFile.toLowerCase().endsWith(KOML.FILE_EXTENSION))) { KOML.write(saveFile, exp); } else // XML? if (saveFile.toLowerCase().endsWith(".xml")) { XMLExperiment xml = new XMLExperiment(); xml.write(saveFile, exp); } // binary else { FileOutputStream fo = new FileOutputStream(saveFile); ObjectOutputStream oo = new ObjectOutputStream(new BufferedOutputStream(fo)); oo.writeObject(exp); oo.close(); } } if (runExp) { System.err.println("Initializing..."); exp.initialize(); System.err.println("Iterating..."); exp.runExperiment(); System.err.println("Postprocessing..."); exp.postProcess(); } } catch (Exception ex) { ex.printStackTrace(); System.err.println(ex.getMessage()); } }
/** * Parses a given list of options controlling the behaviour of this object. Valid options are: * * <p>-l "directory name" <br> * Specifies name of directory. * * <p>-m "model name" <br> * Specifies name of model. * * <p>-v "vocabulary name" <br> * Specifies vocabulary name. * * <p>-f "vocabulary format" <br> * Specifies vocabulary format. * * <p>-i "document language" <br> * Specifies document language. * * <p>-e "encoding" <br> * Specifies encoding. * * <p>-w "WikipediaDatabase@WikipediaServer" <br> * Specifies wikipedia data. * * <p>-d<br> * Turns debugging mode on. * * <p>-x "length"<br> * Sets maximum phrase length (default: 3). * * <p>-y "length"<br> * Sets minimum phrase length (default: 3). * * <p>-o "number"<br> * The minimum number of times a phrase needs to occur (default: 2). * * <p>-s "name of class implementing list of stop words"<br> * Sets list of stop words to used (default: StopwordsEnglish). * * <p>-t "name of class implementing stemmer"<br> * Sets stemmer to use (default: IteratedLovinsStemmer). * * <p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String dirName = Utils.getOption('l', options); if (dirName.length() > 0) { setDirName(dirName); } else { setDirName(null); throw new Exception("Name of directory required argument."); } String modelName = Utils.getOption('m', options); if (modelName.length() > 0) { setModelName(modelName); } else { setModelName(null); throw new Exception("Name of model required argument."); } String vocabularyName = Utils.getOption('v', options); if (vocabularyName.length() > 0) { setVocabularyName(vocabularyName); } String vocabularyFormat = Utils.getOption('f', options); if (!getVocabularyName().equals("none") && !getVocabularyName().equals("wikipedia")) { if (vocabularyFormat.length() > 0) { if (vocabularyFormat.equals("skos") || vocabularyFormat.equals("text")) { setVocabularyFormat(vocabularyFormat); } else { throw new Exception( "Unsupported format of vocabulary. It should be either \"skos\" or \"text\"."); } } else { setVocabularyFormat(null); throw new Exception( "If a controlled vocabulary is used, format of vocabulary required argument (skos or text)."); } } else { setVocabularyFormat(null); } String encoding = Utils.getOption('e', options); if (encoding.length() > 0) { setEncoding(encoding); } else { setEncoding("default"); } String wikipediaConnection = Utils.getOption('w', options); if (wikipediaConnection.length() > 0) { setWikipediaConnection(wikipediaConnection); } String documentLanguage = Utils.getOption('i', options); if (documentLanguage.length() > 0) { setDocumentLanguage(documentLanguage); } else { setDocumentLanguage("en"); } String maxPhraseLengthString = Utils.getOption('x', options); if (maxPhraseLengthString.length() > 0) { setMaxPhraseLength(Integer.parseInt(maxPhraseLengthString)); } else { setMaxPhraseLength(5); } String minPhraseLengthString = Utils.getOption('y', options); if (minPhraseLengthString.length() > 0) { setMinPhraseLength(Integer.parseInt(minPhraseLengthString)); } else { setMinPhraseLength(1); } String minNumOccurString = Utils.getOption('o', options); if (minNumOccurString.length() > 0) { setMinNumOccur(Integer.parseInt(minNumOccurString)); } else { setMinNumOccur(2); } String stopwordsString = Utils.getOption('s', options); if (stopwordsString.length() > 0) { stopwordsString = "kea.stopwords.".concat(stopwordsString); setStopwords((Stopwords) Class.forName(stopwordsString).newInstance()); } String stemmerString = Utils.getOption('t', options); if (stemmerString.length() > 0) { stemmerString = "kea.stemmers.".concat(stemmerString); setStemmer((Stemmer) Class.forName(stemmerString).newInstance()); } setDebug(Utils.getFlag('d', options)); Utils.checkForRemainingOptions(options); }