/**
   * Make data sets and train and test model
   *
   * @param filePathTrain
   * @param filePathTest
   * @param gram
   */
  public static void makeDataSet(String filePathTrain, String filePathTest, int gram) {

    TextDirectoryLoader loader = new TextDirectoryLoader();
    try {

      loader.setDirectory(new File(filePathTrain));
      Instances dataRawTrain = loader.getDataSet();

      loader.setDirectory(new File(filePathTest));
      Instances dataRawTest = loader.getDataSet();

      StringToWordVector filter = new StringToWordVector();
      NGramTokenizer tokeniser = new NGramTokenizer();

      tokeniser.setNGramMinSize(gram);
      tokeniser.setNGramMaxSize(gram);

      filter.setTokenizer(tokeniser);

      filter.setInputFormat(dataRawTrain);

      Instances train = Filter.useFilter(dataRawTrain, filter);

      // filter.setInputFormat(dataRawTest);

      Instances test = Filter.useFilter(dataRawTest, filter);

      /**
       * *
       *
       * <p>Replace this function each time to change models
       */
      trainModelNaiveBayes(train, test);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
Ejemplo n.º 2
0
  /**
   * Parses a given list of options.
   *
   * <p>
   * <!-- options-start -->
   * Valid options are:
   *
   * <p>
   *
   * <pre> -D
   *  Enables debug output.
   *  (default: off)</pre>
   *
   * <pre> -F
   *  Stores the filename in an additional attribute.
   *  (default: off)</pre>
   *
   * <pre> -dir &lt;directory&gt;
   *  The directory to work on.
   *  (default: current directory)</pre>
   *
   * <pre> -charset &lt;charset name&gt;
   *  The character set to use, e.g UTF-8.
   *  (default: use the default character set)</pre>
   *
   * <pre> -R
   *  Retain all string attribute values when reading incrementally.</pre>
   *
   * <!-- options-end -->
   *
   * @param options the options
   * @throws Exception if options cannot be set
   */
  public void setOptions(String[] options) throws Exception {
    setDebug(Utils.getFlag("D", options));

    setOutputFilename(Utils.getFlag("F", options));

    setDirectory(new File(Utils.getOption("dir", options)));

    String charSet = Utils.getOption("charset", options);
    m_charSet = "";
    if (charSet.length() > 0) {
      m_charSet = charSet;
    }

    setRetainStringValues(Utils.getFlag('R', options));
  }