/**
   * SVM trainer
   *
   * @param dataTrain
   * @param dataTest
   */
  public static void trainModelLibSVM(Instances dataTrain, Instances dataTest) {
    try {
      LibSVM classifier = new LibSVM();

      CVParameterSelection ps = new CVParameterSelection();
      ps.setClassifier(classifier);
      ps.setNumFolds(5); // using 5-fold CV
      // ps.addCVParameter("C 0.1 0.5 5");

      // build and output best options
      ps.buildClassifier(dataTrain);

      Evaluation eval = new Evaluation(dataTrain);
      eval.evaluateModel(ps, dataTest);
      System.out.println("Results of the set :::::::::::::::::::::: ");
      System.out.println(
          "Percentage of correctly classified instances : "
              + eval.pctCorrect()
              + "\n"
              + "Percentage of incorrectly classified instances : "
              + eval.pctIncorrect());
      System.out.println("No of correct predictions : " + eval.correct());
      System.out.println("TRUTHFUL");
      System.out.println(
          "Precision : "
              + eval.precision(0)
              + "\n"
              + "Recall : "
              + eval.recall(0)
              + "\n"
              + "F measure/score  : "
              + eval.fMeasure(0));
      System.out.println("DECEPTIVE");
      System.out.println(
          "Precision : "
              + eval.precision(0)
              + "\n"
              + "Recall : "
              + eval.recall(1)
              + "\n"
              + "F measure/score  : "
              + eval.fMeasure(1));

    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
  public static void execSVM(String expName) {
    try {
      FileWriter outFile = null;
      PrintWriter out = null;
      outFile = new FileWriter(expName + "-SVM.results");
      out = new PrintWriter(outFile);
      DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
      ProcessTweets tweetsProcessor = null;
      System.out.println("***************************************");
      System.out.println("***\tEXECUTING TEST\t" + expName + "***");
      System.out.println("+++++++++++++++++++++++++++++++++++++++");
      out.println("***************************************");
      out.println("***\tEXECUTING TEST\t" + expName + "***");
      out.println("+++++++++++++++++++++++++++++++++++++++");
      out.println("4-Generate classifier " + dateFormat.format(new Date()));

      Classifier cls = null;
      DataSource sourceTrain = new DataSource(expName + "-train.arff");
      Instances dataTrain = sourceTrain.getDataSet();
      if (dataTrain.classIndex() == -1) dataTrain.setClassIndex(dataTrain.numAttributes() - 1);
      // Entreno el clasificador
      // cls = new weka.classifiers.functions.LibSVM();
      int clase = dataTrain.numAttributes() - 1;
      cls = new weka.classifiers.bayes.ComplementNaiveBayes();
      dataTrain.setClassIndex(clase);
      cls.buildClassifier(dataTrain);
      ObjectOutputStream oos =
          new ObjectOutputStream(new FileOutputStream(expName + "-SVM.classifier"));
      oos.writeObject(cls);
      oos.flush();
      oos.close();
      DataSource sourceTest = new DataSource(expName + "-test.arff");
      Instances dataTest = sourceTest.getDataSet();
      dataTest.setClassIndex(clase);
      Evaluation eval = new Evaluation(dataTest);
      eval.evaluateModel(cls, dataTest);
      // Ahora calculo los valores precision, recall y fmeasure. Además saco las matrices de
      // confusion

      float precision = 0;
      float recall = 0;
      float fmeasure = 0;
      int topeTopics = 8;
      for (int ind = 0; ind < topeTopics; ind++) {
        precision += eval.precision(ind);
        recall += eval.recall(ind);
        fmeasure += eval.fMeasure(ind);
      }
      precision = precision / topeTopics;
      recall = recall / topeTopics;
      fmeasure = fmeasure / topeTopics;
      System.out.println("++++++++++++++ CNB ++++++++++++++++++++");
      System.out.println(eval.toMatrixString());
      System.out.println("+++++++++++++++++++++++++++++++++++++++");
      System.out.printf("Precision: %.3f\n", precision);
      System.out.printf("Recall: %.3f\n", recall);
      System.out.printf("F-measure: %.3f\n", fmeasure);
      System.out.println("***************************************");
      out.println("++++++++++++++ CNB ++++++++++++++++++++");
      out.println(eval.toMatrixString());
      out.println("+++++++++++++++++++++++++++++++++++++++");
      out.printf("Precision: %.3f\n", precision);
      out.printf("Recall: %.3f\n", recall);
      out.printf("F-measure: %.3f\n", fmeasure);
      out.println("***************************************");
      // OTRO CLASIFICADOR ZeroR
      cls = new weka.classifiers.rules.ZeroR();
      dataTrain.setClassIndex(clase);
      cls.buildClassifier(dataTrain);
      eval = new Evaluation(dataTest);
      eval.evaluateModel(cls, dataTest);
      precision = 0;
      recall = 0;
      fmeasure = 0;
      for (int ind = 0; ind < topeTopics; ind++) {
        precision += eval.precision(ind);
        recall += eval.recall(ind);
        fmeasure += eval.fMeasure(ind);
      }
      precision = precision / topeTopics;
      recall = recall / topeTopics;
      fmeasure = fmeasure / topeTopics;
      System.out.println("++++++++++++++ ZEROR ++++++++++++++++++++");
      System.out.println(eval.toMatrixString());
      System.out.println("+++++++++++++++++++++++++++++++++++++++");
      System.out.printf("Precision: %.3f\n", precision);
      System.out.printf("Recall: %.3f\n", recall);
      System.out.printf("F-measure: %.3f\n", fmeasure);
      System.out.println("***************************************");
      out.println("++++++++++++++ ZEROR ++++++++++++++++++++");
      out.println(eval.toMatrixString());
      out.println("+++++++++++++++++++++++++++++++++++++++");
      out.printf("Precision: %.3f\n", precision);
      out.printf("Recall: %.3f\n", recall);
      out.printf("F-measure: %.3f\n", fmeasure);
      out.println("***************************************");
      // OTRO CLASIFICADOR J48
      /*
      			cls = new weka.classifiers.trees.J48();
      			dataTrain.setClassIndex(clase);
      			cls.buildClassifier(dataTrain);
      			eval = new Evaluation(dataTest);
      			eval.evaluateModel(cls, dataTest);
      			precision=0;
      			recall=0;
      			fmeasure=0;
      			for(int ind=0; ind<topeTopics; ind++)
      			{
      				precision += eval.precision(ind);
      				recall += eval.recall(ind);
      				fmeasure += eval.fMeasure(ind);
      			}
      			precision = precision / topeTopics;
      			recall = recall / topeTopics;
      			fmeasure = fmeasure / topeTopics;
      			System.out.println("++++++++++++++ J48 ++++++++++++++++++++");
      			System.out.println(eval.toMatrixString());
      			System.out.println("+++++++++++++++++++++++++++++++++++++++");
      			System.out.printf("Precision: %.3f\n", precision);
      			System.out.printf("Recall: %.3f\n", recall);
      			System.out.printf("F-measure: %.3f\n", fmeasure);
      			System.out.println("***************************************");
      			out.println("++++++++++++++ J48 ++++++++++++++++++++");
      			out.println(eval.toMatrixString());
      			out.println("+++++++++++++++++++++++++++++++++++++++");
      			out.printf("Precision: %.3f\n", precision);
      			out.printf("Recall: %.3f\n", recall);
      			out.printf("F-measure: %.3f\n", fmeasure);
      			out.println("***************************************");

      //OTRO SMO
      			cls = new weka.classifiers.functions.SMO();
      			dataTrain.setClassIndex(clase);
      			cls.buildClassifier(dataTrain);
      			eval = new Evaluation(dataTest);
      			eval.evaluateModel(cls, dataTest);
      			precision=0;
      			recall=0;
      			fmeasure=0;
      			for(int ind=0; ind<topeTopics; ind++)
      			{
      				precision += eval.precision(ind);
      				recall += eval.recall(ind);
      				fmeasure += eval.fMeasure(ind);
      			}
      			precision = precision / topeTopics;
      			recall = recall / topeTopics;
      			fmeasure = fmeasure / topeTopics;
      			System.out.println("++++++++++++++ SMO ++++++++++++++++++++");
      			System.out.println(eval.toMatrixString());
      			System.out.println("+++++++++++++++++++++++++++++++++++++++");
      			System.out.printf("Precision: %.3f\n", precision);
      			System.out.printf("Recall: %.3f\n", recall);
      			System.out.printf("F-measure: %.3f\n", fmeasure);
      			System.out.println("***************************************");
      			out.println("++++++++++++++ SMO ++++++++++++++++++++");
      			out.println(eval.toMatrixString());
      			out.println("+++++++++++++++++++++++++++++++++++++++");
      			out.printf("Precision: %.3f\n", precision);
      			out.printf("Recall: %.3f\n", recall);
      			out.printf("F-measure: %.3f\n", fmeasure);
      			out.println("***************************************");
      */
      out.flush();
      out.close();
      dataTest.delete();
      dataTrain.delete();
    } catch (FileNotFoundException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
  public void exec(PrintWriter printer) {
    try {
      FileWriter outFile = null;
      PrintWriter out = null;
      if (printer == null) {
        outFile = new FileWriter(id + ".results");
        out = new PrintWriter(outFile);
      } else out = printer;

      DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
      ProcessTweets tweetsProcessor = null;
      System.out.println("***************************************");
      System.out.println("***\tEXECUTING TEST\t" + id + "***");
      System.out.println("+++++++++++++++++++++++++++++++++++++++");
      System.out.println("Train size:" + traincorpus.size());
      System.out.println("Test size:" + testcorpus.size());
      out.println("***************************************");
      out.println("***\tEXECUTING TEST\t***");
      out.println("+++++++++++++++++++++++++++++++++++++++");
      out.println("Train size:" + traincorpus.size());
      out.println("Test size:" + testcorpus.size());
      String cloneID = "";
      boolean clonar = false;
      if (baseline) {
        System.out.println("***************************************");
        System.out.println("***\tEXECUTING TEST BASELINE\t***");
        System.out.println("+++++++++++++++++++++++++++++++++++++++");
        System.out.println("Train size:" + traincorpus.size());
        System.out.println("Test size:" + testcorpus.size());
        out.println("***************************************");
        out.println("***\tEXECUTING TEST\t***");
        out.println("+++++++++++++++++++++++++++++++++++++++");
        out.println("Train size:" + traincorpus.size());
        out.println("Test size:" + testcorpus.size());

        BaselineClassifier base = new BaselineClassifier(testcorpus, 8);
        precision = base.getPrecision();
        recall = base.getRecall();
        fmeasure = base.getFmeasure();
        System.out.println("+++++++++++++++++++++++++++++++++++++++");
        System.out.printf("Precision: %.3f\n", precision);
        System.out.printf("Recall: %.3f\n", recall);
        System.out.printf("F-measure: %.3f\n", fmeasure);
        System.out.println("***************************************");
        out.println("+++++++++++++++++++++++++++++++++++++++");
        out.printf("Precision: %.3f\n", precision);
        out.printf("Recall: %.3f\n", recall);
        out.printf("F-measure: %.3f\n", fmeasure);
        out.println("***************************************");
        out.flush();
        out.close();
        return;
      } else {
        System.out.println("Stemming: " + stemming);
        System.out.println("Lematization:" + lematization);
        System.out.println("URLs:" + urls);
        System.out.println("Hashtags:" + hashtags);
        System.out.println("Mentions:" + mentions);
        System.out.println("Unigrams:" + unigrams);
        System.out.println("Bigrams:" + bigrams);
        System.out.println("TF:" + tf);
        System.out.println("TF-IDF:" + tfidf);
        out.println("Stemming: " + stemming);
        out.println("Lematization:" + lematization);
        out.println("URLs:" + urls);
        out.println("Hashtags:" + hashtags);
        out.println("Mentions:" + mentions);
        out.println("Unigrams:" + unigrams);
        out.println("Bigrams:" + bigrams);
        out.println("TF:" + tf);
        out.println("TF-IDF:" + tfidf);
      }
      // Si tengo los tweets procesados, me evito un nuevo proceso
      System.out.println("1-Process tweets " + dateFormat.format(new Date()));
      out.println("1-Process tweets " + dateFormat.format(new Date()));

      List<ProcessedTweet> train = null;
      String[] ids = id.split("-");
      cloneID = ids[0] + "-" + (Integer.valueOf(ids[1]) + 6);
      if (((Integer.valueOf(ids[1]) / 6) % 2) == 0) clonar = true;

      if (new File(id + "-train.ptweets").exists()) {
        train = ProcessedTweetSerialization.fromFile(id + "-train.ptweets");
        tweetsProcessor =
            new ProcessTweets(stemming, lematization, urls, hashtags, mentions, unigrams, bigrams);
        if (lematization) {
          tweetsProcessor.doLematization(train);
        }
        if (stemming) {
          tweetsProcessor.doStemming(train);
        }
      } else {
        tweetsProcessor =
            new ProcessTweets(stemming, lematization, urls, hashtags, mentions, unigrams, bigrams);
        // Esto del set training es un añadido para poder diferenciar los idiomas de las url en el
        // corpus paralelo
        //				tweetsProcessor.setTraining(true);
        train = tweetsProcessor.processTweets(traincorpus);
        //				tweetsProcessor.setTraining(false);
        ProcessedTweetSerialization.toFile(id + "-train.ptweets", train);
        /*
        				if (clonar)
        				{
        					File f = new File (id+"-train.ptweets");
        					Path p = f.toPath();
        					CopyOption[] options = new CopyOption[]{
        						      StandardCopyOption.REPLACE_EXISTING,
        						      StandardCopyOption.COPY_ATTRIBUTES
        						     };
        					Files.copy(p, new File (cloneID+"-train.ptweets").toPath(), options);
        					Files.copy(p, new File (ids[0]+"-"+(Integer.valueOf(ids[1])+12)+"-train.ptweets").toPath(), options);
        					Files.copy(p, new File (ids[0]+"-"+(Integer.valueOf(ids[1])+18)+"-train.ptweets").toPath(), options);
        					Files.copy(p, new File (ids[0]+"-"+(Integer.valueOf(ids[1])+24)+"-train.ptweets").toPath(), options);
        					Files.copy(p, new File (ids[0]+"-"+(Integer.valueOf(ids[1])+30)+"-train.ptweets").toPath(), options);
        				}
        */
      }

      // Generamos las BOW. Igual que antes, si existen no las creo.
      System.out.println("2-Fill topics " + dateFormat.format(new Date()));
      out.println("2-Fill topics " + dateFormat.format(new Date()));
      TopicsList topics = null;
      if (new File(id + ".topics").exists()) {
        topics = TopicsSerialization.fromFile(id + ".topics");
        if (tf) topics.setSelectionFeature(TopicDesc.TERM_TF);
        else topics.setSelectionFeature(TopicDesc.TERM_TF_IDF);
        topics.prepareTopics();
      } else {

        topics = new TopicsList();
        if (tf) topics.setSelectionFeature(TopicDesc.TERM_TF);
        else topics.setSelectionFeature(TopicDesc.TERM_TF_IDF);
        System.out.println("Filling topics " + dateFormat.format(new Date()));
        topics.fillTopics(train);
        System.out.println("Preparing topics topics " + dateFormat.format(new Date()));
        // Aquí tengo que serializar antes de preparar, porque si no no puedo calcular los tf y
        // tfidf
        System.out.println("Serializing topics topics " + dateFormat.format(new Date()));
        /*
        				if (clonar)
        				{
        					TopicsSerialization.toFile(cloneID+".topics", topics);
        				}
        */
        topics.prepareTopics();
        TopicsSerialization.toFile(id + ".topics", topics);
      }
      System.out.println("3-Generate arff train file " + dateFormat.format(new Date()));
      out.println("3-Generate arff train file " + dateFormat.format(new Date()));

      // Si el fichero arff no existe, lo creo. en caso contrario vengo haciendo lo que hasta ahora,
      // aprovechar trabajo previo
      if (!new File(id + "-train.arff").exists()) {

        BufferedWriter bw = topics.generateArffHeader(id + "-train.arff");
        int tope = traincorpus.size();
        if (tweetsProcessor == null)
          tweetsProcessor =
              new ProcessTweets(
                  stemming, lematization, urls, hashtags, mentions, unigrams, bigrams);
        for (int indTweet = 0; indTweet < tope; indTweet++) {
          topics.generateArffVector(bw, train.get(indTweet));
        }
        bw.flush();
        bw.close();
      }

      // Ahora proceso los datos de test
      System.out.println("5-build test dataset " + dateFormat.format(new Date()));
      out.println("5-build test dataset " + dateFormat.format(new Date()));

      List<ProcessedTweet> test = null;
      if (new File(id + "-test.ptweets").exists())
        test = ProcessedTweetSerialization.fromFile(id + "-test.ptweets");
      else {
        if (tweetsProcessor == null)
          tweetsProcessor =
              new ProcessTweets(
                  stemming, lematization, urls, hashtags, mentions, unigrams, bigrams);
        test = tweetsProcessor.processTweets(testcorpus);
        ProcessedTweetSerialization.toFile(id + "-test.ptweets", test);
        /*
        				if (clonar)
        				{
        					File f = new File (id+"-test.ptweets");
        					Path p = f.toPath();
        					CopyOption[] options = new CopyOption[]{
        						      StandardCopyOption.REPLACE_EXISTING,
        						      StandardCopyOption.COPY_ATTRIBUTES
        						     };
        					Files.copy(p, new File (cloneID+"-test.ptweets").toPath(), options);
        				}
        */

      }

      // Si el fichero arff no existe, lo creo. en caso contrario vengo haciendo lo que hasta ahora,
      // aprovechar trabajo previo
      if (!new File(id + "-test.arff").exists()) {
        BufferedWriter bw = topics.generateArffHeader(id + "-test.arff");
        int tope = testcorpus.size();
        if (tweetsProcessor == null)
          tweetsProcessor =
              new ProcessTweets(
                  stemming, lematization, urls, hashtags, mentions, unigrams, bigrams);
        for (int indTweet = 0; indTweet < tope; indTweet++) {
          topics.generateArffVector(bw, test.get(indTweet));
        }
        bw.flush();
        bw.close();
      }
      int topeTopics = topics.getTopicsList().size();
      topics.getTopicsList().clear();
      // Genero el clasificador
      // FJRM 25-08-2013 Lo cambio de orden para intentar liberar la memoria de los topics y tener
      // más libre
      System.out.println("4-Generate classifier " + dateFormat.format(new Date()));
      out.println("4-Generate classifier " + dateFormat.format(new Date()));

      Classifier cls = null;
      DataSource sourceTrain = null;
      Instances dataTrain = null;
      if (new File(id + "-MNB.classifier").exists()) {
        ObjectInputStream ois = new ObjectInputStream(new FileInputStream(id + "-MNB.classifier"));
        cls = (Classifier) ois.readObject();
        ois.close();
      } else {
        sourceTrain = new DataSource(id + "-train.arff");
        dataTrain = sourceTrain.getDataSet();
        if (dataTrain.classIndex() == -1) dataTrain.setClassIndex(dataTrain.numAttributes() - 1);
        // Entreno el clasificador
        cls = new weka.classifiers.bayes.NaiveBayesMultinomial();
        int clase = dataTrain.numAttributes() - 1;
        dataTrain.setClassIndex(clase);
        cls.buildClassifier(dataTrain);
        ObjectOutputStream oos =
            new ObjectOutputStream(new FileOutputStream(id + "-MNB.classifier"));
        oos.writeObject(cls);
        oos.flush();
        oos.close();
        // data.delete();//no borro para el svm
      }
      // Ahora evaluo el clasificador con los datos de test
      System.out.println("6-Evaluate classifier MNB " + dateFormat.format(new Date()));
      out.println("6-Evaluate classifier MNB" + dateFormat.format(new Date()));
      DataSource sourceTest = new DataSource(id + "-test.arff");
      Instances dataTest = sourceTest.getDataSet();
      int clase = dataTest.numAttributes() - 1;
      dataTest.setClassIndex(clase);
      Evaluation eval = new Evaluation(dataTest);
      eval.evaluateModel(cls, dataTest);
      // Ahora calculo los valores precision, recall y fmeasure. Además saco las matrices de
      // confusion

      precision = 0;
      recall = 0;
      fmeasure = 0;
      for (int ind = 0; ind < topeTopics; ind++) {
        precision += eval.precision(ind);
        recall += eval.recall(ind);
        fmeasure += eval.fMeasure(ind);
      }
      precision = precision / topeTopics;
      recall = recall / topeTopics;
      fmeasure = fmeasure / topeTopics;
      System.out.println("+++++++++++++++++++++++++++++++++++++++");
      System.out.println(eval.toMatrixString());
      System.out.println("+++++++++++++++++++++++++++++++++++++++");
      System.out.printf("Precision: %.3f\n", precision);
      System.out.printf("Recall: %.3f\n", recall);
      System.out.printf("F-measure: %.3f\n", fmeasure);
      System.out.println("***************************************");
      out.println("+++++++++++++++++++++++++++++++++++++++");
      out.println(eval.toMatrixString());
      out.println("+++++++++++++++++++++++++++++++++++++++");
      out.printf("Precision: %.3f\n", precision);
      out.printf("Recall: %.3f\n", recall);
      out.printf("F-measure: %.3f\n", fmeasure);
      out.println("***************************************");
      /*			NO BORRAR
      			System.out.println("7-Evaluate classifier SVM"+dateFormat.format(new Date()));
      			out.println("7-Evaluate classifier SVM"+dateFormat.format(new Date()));
      			if (new File(id+"-SVM.classifier").exists())
      			{
      				ObjectInputStream ois = new ObjectInputStream(new FileInputStream(id+"-SVM.classifier"));
      				cls = (Classifier) ois.readObject();
      				ois.close();
      			}
      			else
      			{
      				if (dataTrain==null)
      				{
      					sourceTrain = new DataSource(id+"-train.arff");
      					dataTrain = sourceTrain.getDataSet();
      					if (dataTrain.classIndex() == -1)
      						dataTrain.setClassIndex(dataTrain.numAttributes() - 1);
      				}
      	//Entreno el clasificador
      				cls = new weka.classifiers.functions.LibSVM();
      				clase = dataTrain.numAttributes()-1;
      				dataTrain.setClassIndex(clase);
      				cls.buildClassifier(dataTrain);
      				ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(id+"-SVM.classifier"));
      				oos.writeObject(cls);
      				oos.flush();
      				oos.close();
      				dataTrain.delete();
      			}
      			eval.evaluateModel(cls, dataTest);
      			precision=0;
      			recall=0;
      			fmeasure=0;
      			for(int ind=0; ind<topeTopics; ind++)
      			{
      				precision += eval.precision(ind);
      				recall += eval.recall(ind);
      				fmeasure += eval.fMeasure(ind);
      			}
      			precision = precision / topeTopics;
      			recall = recall / topeTopics;
      			fmeasure = fmeasure / topeTopics;
      			System.out.println("+++++++++++++++++++++++++++++++++++++++");
      			System.out.println(eval.toMatrixString());
      			System.out.println("+++++++++++++++++++++++++++++++++++++++");
      			System.out.printf("Precision: %.3f\n", precision);
      			System.out.printf("Recall: %.3f\n", recall);
      			System.out.printf("F-measure: %.3f\n", fmeasure);
      			System.out.println("***************************************");
      			out.println("+++++++++++++++++++++++++++++++++++++++");
      			out.println(eval.toMatrixString());
      			out.println("+++++++++++++++++++++++++++++++++++++++");
      			out.printf("Precision: %.3f\n", precision);
      			out.printf("Recall: %.3f\n", recall);
      			out.printf("F-measure: %.3f\n", fmeasure);
      			out.println("***************************************");
      */
      System.out.println("Done " + dateFormat.format(new Date()));
      out.println("Done " + dateFormat.format(new Date()));
      if (printer == null) {
        out.flush();
        out.close();
      }
      // Intento de liberar memoria
      if (dataTrain != null) dataTrain.delete();
      if (dataTest != null) dataTest.delete();
      if (train != null) train.clear();
      if (test != null) test.clear();
      if (topics != null) {
        topics.getTopicsList().clear();
        topics = null;
      }
      if (dataTest != null) dataTest.delete();
      if (cls != null) cls = null;
      if (tweetsProcessor != null) tweetsProcessor = null;
      System.gc();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
예제 #4
0
  public static void main(String[] args) throws Exception {

    BufferedReader reader = new BufferedReader(new FileReader("spambase.arff"));
    Instances data = new Instances(reader);
    reader.close();
    // setting class attribute
    data.setClassIndex(data.numAttributes() - 1);

    int i = data.numInstances();
    int j = data.numAttributes() - 1;

    File file = new File("tablelog.csv");
    Writer output = null;
    output = new BufferedWriter(new FileWriter(file));
    output.write(
        "%missing,auc1,correct1,fmeasure1,auc2,correct2,fmeasure2,auc3,correct3,fmeasure3\n");

    Random randomGenerator = new Random();
    data.randomize(randomGenerator);
    int numBlock = data.numInstances() / 2;
    double num0 = 0, num1 = 0, num2 = 0;
    /*mdata.instance(0).setMissing(0);
    mdata.deleteWithMissing(0);
    System.out.println(mdata.numInstances()+","+data.numInstances());*/
    // Instances traindata=null;
    // Instances testdata=null;
    // System.out.println(data.instance(3).stringValue(1));
    for (int perc = 10; perc < 101; perc = perc + 10) {
      Instances mdata = new Instances(data);
      int numMissing = perc * numBlock / 100;
      double y11[] = new double[2];
      double y21[] = new double[2];
      double y31[] = new double[2];
      double y12[] = new double[2];
      double y22[] = new double[2];
      double y32[] = new double[2];
      double y13[] = new double[2];
      double y23[] = new double[2];
      double y33[] = new double[2];
      for (int p = 0; p < 2; p++) {
        Instances traindata = mdata.trainCV(2, p);
        Instances testdata = mdata.testCV(2, p);
        num0 = 0;
        num1 = 0;
        num2 = 0;
        for (int t = 0; t < numBlock; t++) {
          if (traindata.instance(t).classValue() == 0) num0++;
          if (traindata.instance(t).classValue() == 1) num1++;
          // if (traindata.instance(t).classValue()==2) num2++;
        }
        // System.out.println(mdata.instance(0).classValue());
        Instances trainwithmissing = new Instances(traindata);
        Instances testwithmissing = new Instances(testdata);
        for (int q = 0; q < j; q++) {
          int r = randomGenerator.nextInt((int) i / 2);

          for (int k = 0; k < numMissing; k++) {
            // int r = randomGenerator.nextInt((int) i/2);
            // int c = randomGenerator.nextInt(j);
            trainwithmissing.instance((r + k) % numBlock).setMissing(q);
            testwithmissing.instance((r + k) % numBlock).setMissing(q);
          }
        }
        // trainwithmissing.deleteWithMissing(0);System.out.println(traindata.numInstances()+","+trainwithmissing.numInstances());
        Classifier cModel =
            (Classifier) new Logistic(); // try for different classifiers and datasets
        cModel.buildClassifier(trainwithmissing);
        Evaluation eTest1 = new Evaluation(trainwithmissing);
        eTest1.evaluateModel(cModel, testdata);
        // eTest.crossValidateModel(cModel,mdata,10,mdata.getRandomNumberGenerator(1));
        y11[p] =
            num0 / numBlock * eTest1.areaUnderROC(0)
                + num1
                    / numBlock
                    * eTest1.areaUnderROC(1) /*+num2/numBlock*eTest1.areaUnderROC(2)*/;
        y21[p] = eTest1.correct();
        y31[p] =
            num0 / numBlock * eTest1.fMeasure(0)
                + num1 / numBlock * eTest1.fMeasure(1) /*+num2/numBlock*eTest1.fMeasure(2)*/;

        Classifier cModel2 = (Classifier) new Logistic();
        cModel2.buildClassifier(traindata);
        Evaluation eTest2 = new Evaluation(traindata);
        eTest2.evaluateModel(cModel2, testwithmissing);
        y12[p] =
            num0 / numBlock * eTest2.areaUnderROC(0)
                + num1
                    / numBlock
                    * eTest2.areaUnderROC(1) /*+num2/numBlock*eTest2.areaUnderROC(2)*/;
        y22[p] = eTest2.correct();
        y32[p] =
            num0 / numBlock * eTest2.fMeasure(0)
                + num1 / numBlock * eTest2.fMeasure(1) /*+num2/numBlock*eTest2.fMeasure(2)*/;

        Classifier cModel3 = (Classifier) new Logistic();
        cModel3.buildClassifier(trainwithmissing);
        Evaluation eTest3 = new Evaluation(trainwithmissing);
        eTest3.evaluateModel(cModel3, testwithmissing);
        y13[p] =
            num0 / numBlock * eTest3.areaUnderROC(0)
                + num1
                    / numBlock
                    * eTest3.areaUnderROC(1) /*+num2/numBlock*eTest3.areaUnderROC(2)*/;
        y23[p] = eTest3.correct();
        y33[p] =
            num0 / numBlock * eTest3.fMeasure(0)
                + num1 / numBlock * eTest3.fMeasure(1) /*+num2/numBlock*eTest3.fMeasure(2)*/;
        // System.out.println(num0+","+num1+","+num2+"\n");
      }
      double auc1 = (y11[0] + y11[1]) / 2;
      double auc2 = (y12[0] + y12[1]) / 2;
      double auc3 = (y13[0] + y13[1]) / 2;
      double corr1 = (y21[0] + y21[1]) / i;
      double corr2 = (y22[0] + y22[1]) / i;
      double corr3 = (y23[0] + y23[1]) / i;
      double fm1 = (y31[0] + y31[1]) / 2;
      double fm2 = (y32[0] + y32[1]) / 2;
      double fm3 = (y33[0] + y33[1]) / 2;
      output.write(
          perc + "," + auc1 + "," + corr1 + "," + fm1 + "," + auc2 + "," + corr2 + "," + fm2 + ","
              + auc3 + "," + corr3 + "," + fm3 + "\n"); // System.out.println(num0);
      // mdata=data;

    }
    output.close();
  }
예제 #5
0
  private double[] classify(String test) {

    String[] lab = {
      "I.2", "I.3", "I.5", "I.6", "I.2.1", "I.2.6", "I.2.8", "I.3.5", "I.3.6", "I.3.7", "I.5.1",
      "I.5.2", "I.5.4", "I.6.3", "I.6.5", "I.6.8",
    };

    int NSel = 1000; //       Number of selection
    Filter[] filters = new Filter[2];
    double[] x = new double[16];
    double[] prd = new double[16];
    double clsLabel;
    Ranker rank = new Ranker();
    Evaluation eval = null;

    StringToWordVector stwv = new StringToWordVector();
    weka.filters.supervised.attribute.AttributeSelection featSel =
        new weka.filters.supervised.attribute.AttributeSelection();

    WordTokenizer wtok = new WordTokenizer();
    String delim = " \r\n\t.,;:'\"()?!$*-&[]+/|\\";

    InfoGainAttributeEval ig = new InfoGainAttributeEval();

    String[] stwvOpts;
    wtok.setDelimiters(delim);

    Instances[] dataRaw = new Instances[10000];

    DataSource[] source = new DataSource[16];

    String str;

    Instances testset = null;
    DataSource testsrc = null;
    try {
      testsrc = new DataSource(test);
      testset = testsrc.getDataSet();
    } catch (Exception e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }

    for (int j = 0; j < 16; j++) // 16 element 0-15
    {
      try {
        str = lab[j];
        source[j] =
            new DataSource(
                "D:/Users/nma1g11/workspace2/WebScraperFlatNew/dataPernode/new/" + str + ".arff");
        dataRaw[j] = source[j].getDataSet();
      } catch (Exception e) {
        e.printStackTrace();
      }

      System.out.println(lab[j]);
      if (dataRaw[j].classIndex() == -1) dataRaw[j].setClassIndex(dataRaw[j].numAttributes() - 1);
    }
    if (testset.classIndex() == -1) testset.setClassIndex(testset.numAttributes() - 1);

    try {
      stwvOpts =
          weka.core.Utils.splitOptions(
              "-R first-last -W 1000000 -prune-rate -1.0 -C -T -I -N 1 -L -S -stemmer weka.core.stemmers.LovinsStemmer -M 2 ");
      stwv.setOptions(stwvOpts);
      stwv.setTokenizer(wtok);

      rank.setOptions(weka.core.Utils.splitOptions("-T -1.7976931348623157E308 -N 100"));
      rank.setNumToSelect(NSel);
      featSel.setEvaluator(ig);
      featSel.setSearch(rank);
    } catch (Exception e) {
      e.printStackTrace();
    }

    filters[0] = stwv;
    filters[1] = featSel;

    System.out.println("Loading is Done!");

    MultiFilter mfilter = new MultiFilter();

    mfilter.setFilters(filters);

    FilteredClassifier classify = new FilteredClassifier();
    classify.setClassifier(
        new NaiveBayesMultinomial()); ///////// Algorithm of The Classification  /////////
    classify.setFilter(mfilter);

    String ss2 = "";

    try {
      Classifier[] clsArr = new Classifier[16];
      clsArr = Classifier.makeCopies(classify, 16);
      String strcls = "";

      List<String> clsList = new ArrayList<String>();
      String s = null;
      String newcls = null;
      String lb = "";
      String prev = "";
      boolean flag = false;
      String Ocls = null;
      int q = 0;

      for (int i = 0; i < 16; i++) {

        for (int k = 0; k < testset.numInstances(); k++) {
          flag = false;

          s = testset.instance(k).stringValue(1);
          clsList.add(s);
          if (lab[i].equals(s)) {
            flag = true;
            newcls = s;
          }
        }

        clsArr[i].buildClassifier(dataRaw[i]);
        eval = new Evaluation(dataRaw[i]);
        for (int j = 0; j < testset.numInstances(); j++) {
          Ocls = testset.instance(j).stringValue(1);

          if (flag && !s.equals(null)) testset.instance(j).setClassValue(lab[i]);

          // -----------------------------------------
          strcls = testset.instance(j).stringValue(1);
          if (i < 4) {
            if (strcls.substring(0, 3).equals(lab[i])) testset.instance(j).setClassValue(lab[i]);
          } else if (lab[i].substring(0, 3).equals(strcls))
            testset.instance(j).setClassValue(lab[i]);
          // ------------------------------------------------
          System.out.println(
              dataRaw[i].classAttribute().value(i)
                  + " --- > Correct%:"
                  + eval.pctCorrect()
                  + "  F-measure:"
                  + eval.fMeasure(i));
          if (!prev.equals(testset.instance(j).stringValue(0)) || !lab[i].equals(lb)) {

            clsLabel = clsArr[i].classifyInstance(testset.instance(j));
            x = clsArr[i].distributionForInstance(testset.instance(j));

            prd[i] = x[i];
            System.out.println(" --- > prob: " + clsLabel);
            System.out.println(" --- > x :" + x[i]);
            System.out.println(clsLabel + " --> " + testset.classAttribute().value((int) clsLabel));
          }
          testset.instance(j).setClassValue(Ocls);

          prev = testset.instance(j).stringValue(0);
          lb = lab[i];
        }

        System.out.println("Done with " + lab[i].replace("99", "") + " !!!!!!!!!!!");
      }
      System.out.println(eval.correct());

    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    return prd;
  }
  public void runFilter() throws Exception {
    System.out.println("filtering attributes...");
    System.out.println("running weka filters and weka-libsvm");
    File svmfile = new File(sentiAnalysis.DIR.concat(sentiAnalysis.outout.concat(".libsvm")));
    LibSVMLoader libl = new LibSVMLoader();
    libl.setFile(svmfile);
    Instances data = libl.getDataSet();

    NumericToNominal nm = new NumericToNominal(); // Converting last index
    // attribute to type
    // nominal from numeric
    nm.setAttributeIndices("last"); // as the last index would be class
    // label for the data
    nm.setInputFormat(data);

    filteredData = Filter.useFilter(data, nm); // filtered data stored in
    // new Instances object

    AttrNo = filteredData.numAttributes(); // number of attributes in given
    // file
    RecordNo = filteredData.numInstances(); // Number of records in given
    // file
    lowerBound = 0;
    upperBound = AttrNo - 1;
    AttributeSelection atsl = new AttributeSelection();
    Ranker search = new Ranker();
    InfoGainAttributeEval infog = new InfoGainAttributeEval(); // Applying
    // Attribute
    // Selection
    // using
    // InfoGain
    // evaluator
    // with
    // Ranker
    // search
    atsl.setEvaluator(infog);
    atsl.setSearch(search);
    atsl.SelectAttributes(filteredData);
    InfoGain = atsl.rankedAttributes();
    SelectedAttributes = atsl.selectedAttributes();

    // count non zero infoGain
    int count = 0;
    for (int i = 0; i < InfoGain.length; i++) {
      count = (InfoGain[i][1] > 0) ? count + 1 : count;
    }

    System.out.println("writing attributes with non-zero InfoGain...");
    FileWriter svmout =
        new FileWriter(sentiAnalysis.DIR.concat(sentiAnalysis.outout.concat("_new.libsvm")));

    for (int i = 0; i < RecordNo; i++) {
      int index = 1;
      svmout.write((int) filteredData.instance(i).value(filteredData.classIndex()) + " ");
      for (int j = 0; j < count; j++) {
        svmout.write(
            index + ":" + (int) filteredData.instance(i).value((int) InfoGain[j][0]) + " ");
        index++;
      }
      svmout.write("\n");
    }
    svmout.close();

    // filtered
    File newsvm = new File(sentiAnalysis.DIR.concat(sentiAnalysis.outout.concat("_new.libsvm")));
    LibSVMLoader liblnew = new LibSVMLoader();
    liblnew.setFile(newsvm);
    Instances newdata = liblnew.getDataSet();
    nm = new NumericToNominal(); // Converting last index attribute to type
    // nominal from numeric
    nm.setAttributeIndices("last"); // as the last index would be class
    // label for the data
    nm.setInputFormat(newdata);
    Instances filteredDataNew = Filter.useFilter(newdata, nm); // filtered
    // data
    // stored in
    // new
    // Instances
    // object

    // test file
    File newsvmtest =
        new File(sentiAnalysis.DIR.concat(sentiAnalysis.outout.concat("_test.libsvm")));
    LibSVMLoader libltest = new LibSVMLoader();
    libltest.setFile(newsvmtest);
    Instances newdatatest = libltest.getDataSet();
    nm = new NumericToNominal(); // Converting last index attribute to type
    // nominal from numeric
    nm.setAttributeIndices("last"); // as the last index would be class
    // label for the data
    nm.setInputFormat(newdatatest);
    Instances filteredDataTest = Filter.useFilter(newdatatest, nm); // filtered
    // data
    // stored
    // in
    // new
    // Instances
    // object

    // weka.classifiers.functions.LibSVM -S 0 -K 2 -D 3 -G 0.0 -R 0.0 -N 0.5
    // -M 40.0 -C 1.0 -E 0.001 -P 0.1 -seed 1
    String[] options = new String[1];
    options[0] = "-S 0 -K 2 -D 3 -G 0.1 -R 0.0 -N 0.5 -M 40.0 -C 1.0 -E 0.001 -P 0.1 -seed 1 -h 0";
    System.out.println("building classifier...");
    LibSVM svm_model = new LibSVM();
    svm_model.setOptions(options); // set the options
    svm_model.buildClassifier(filteredData); // build classifier

    DecimalFormat df = new DecimalFormat("0.00");

    System.out.println("running cross validation...");
    Evaluation eval = new Evaluation(filteredData);
    // eval.crossValidateModel(svm_model, filteredDataNew, 10, new
    // Random(1));
    eval.evaluateModel(svm_model, filteredDataTest);

    FileWriter results =
        new FileWriter(sentiAnalysis.DIR.concat(sentiAnalysis.outout.concat("_results.txt")));

    results.write("Classifier 1: Support Vector Machines\n");
    results.write("Positive class precision: " + df.format(eval.precision(0)) + "\n");
    results.write("Positive class recall: " + df.format(eval.recall(0)) + "\n");
    results.write("Positive class f-score: " + df.format(eval.fMeasure(0)) + "\n");
    results.write("Negative class precision: " + df.format(eval.precision(0)) + "\n");
    results.write("Negative class recall: " + df.format(eval.precision(0)) + "\n");
    results.write("Negative class f-score: " + df.format(eval.fMeasure(0)) + "\n");

    System.out.println("generating results...");
    System.out.println("*" + sentiAnalysis.outout + "*\t" + "\tPositive\tNegative\tNeutral");
    System.out.println(
        "Precision\t"
            + df.format(eval.precision(0))
            + "\t"
            + df.format(eval.precision(2))
            + "\t"
            + df.format(eval.precision(1)));
    System.out.println(
        "Recall\t"
            + df.format(eval.recall(0))
            + "\t"
            + df.format(eval.recall(2))
            + "\t"
            + df.format(eval.recall(1)));
    System.out.println(
        "F-score\t"
            + df.format(eval.fMeasure(0))
            + "\t"
            + df.format(eval.fMeasure(2))
            + "\t"
            + df.format(eval.fMeasure(1)));

    results.close();
  }