Exemple #1
0
 public static Instances getInstances(String file) throws Exception {
   DataSource datasource = new DataSource(file);
   Instances data = datasource.getDataSet();
   System.out.println("Class index is : " + data.classIndex());
   if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1);
   return data;
 }
  public static void run(String[] args) throws Exception {
    /**
     * *************************************************
     *
     * @param args[0]: train arff path
     * @param args[1]: test arff path
     */
    DataSource source = new DataSource(args[0]);
    Instances data = source.getDataSet();
    data.setClassIndex(data.numAttributes() - 1);
    NaiveBayes model = new NaiveBayes();
    model.buildClassifier(data);

    // Evaluation:
    Evaluation eval = new Evaluation(data);
    Instances testData = new DataSource(args[1]).getDataSet();
    testData.setClassIndex(testData.numAttributes() - 1);
    eval.evaluateModel(model, testData);
    System.out.println(model.toString());
    System.out.println(eval.toSummaryString("\nResults\n======\n", false));
    System.out.println("======\nConfusion Matrix:");
    double[][] confusionM = eval.confusionMatrix();
    for (int i = 0; i < confusionM.length; ++i) {
      for (int j = 0; j < confusionM[i].length; ++j) {
        System.out.format("%10s ", confusionM[i][j]);
      }
      System.out.print("\n");
    }
  }
  Instances getinstance(String s) throws Exception {

    DataSource source = new DataSource(s);
    Instances data = source.getDataSet();
    // System.out.println(data);
    // System.out.println("**************");
    return data;
  }
Exemple #4
0
  /**
   * loads the given dataset and prints the Capabilities necessary to process it.
   *
   * <p>Valid parameters:
   *
   * <p>-file filename <br>
   * the file to load
   *
   * <p>-c index the explicit index of the class attribute (default: none)
   *
   * @param args the commandline arguments
   * @throws Exception if something goes wrong
   */
  public static void main(String[] args) throws Exception {
    String tmpStr;
    String filename;
    DataSource source;
    Instances data;
    int classIndex;
    Capabilities cap;
    Iterator iter;

    if (args.length == 0) {
      System.out.println(
          "\nUsage: " + Capabilities.class.getName() + " -file <dataset> [-c <class index>]\n");
      return;
    }

    // get parameters
    tmpStr = Utils.getOption("file", args);
    if (tmpStr.length() == 0) throw new Exception("No file provided with option '-file'!");
    else filename = tmpStr;

    tmpStr = Utils.getOption("c", args);
    if (tmpStr.length() != 0) {
      if (tmpStr.equals("first")) classIndex = 0;
      else if (tmpStr.equals("last")) classIndex = -2; // last
      else classIndex = Integer.parseInt(tmpStr) - 1;
    } else {
      classIndex = -3; // not set
    }

    // load data
    source = new DataSource(filename);
    if (classIndex == -3) data = source.getDataSet();
    else if (classIndex == -2) data = source.getDataSet(source.getStructure().numAttributes() - 1);
    else data = source.getDataSet(classIndex);

    // determine and print capabilities
    cap = forInstances(data);
    System.out.println("File: " + filename);
    System.out.println(
        "Class index: " + ((data.classIndex() == -1) ? "not set" : "" + (data.classIndex() + 1)));
    System.out.println("Capabilities:");
    iter = cap.capabilities();
    while (iter.hasNext()) System.out.println("- " + iter.next());
  }
  public void filterData() throws Exception {
    Instances data = source.getDataSet();
    StringToWordVector stv = new StringToWordVector();
    stv.setOptions(
        weka.core.Utils.splitOptions(
            "-R first-last -W 1000 "
                + "-prune-rate -1.0 -N 0 "
                + "-stemmer weka.core.stemmers.NullStemmer -M 1 "
                + "-tokenizer \"weka.core.tokenizers.WordTokenizer -delimiters  \\\" \\r\\n\\t.,;:\\\'\\\"()?!\""));

    stv.setInputFormat(data);
    Instances newdata = Filter.useFilter(data, stv);
    this.inst = newdata;
    this.inst.setClassIndex(0);
  }
 private static LinkedList<String> getData(String f) {
   LinkedList<String> all_tweets = new LinkedList<String>();
   DataSource ds;
   Instances data = null;
   try {
     ds = new DataSource(main_folder + "test_sets/" + f + ".arff");
     data = ds.getDataSet();
   } catch (Exception e) {
     System.out.println("File not found.");
   }
   for (int i = 0; i < data.numInstances(); i++) {
     all_tweets.add(data.get(i).stringValue(0));
   }
   return all_tweets;
 }
  /**
   * takes a dataset as first argument
   *
   * @param args the commandline arguments
   * @throws Exception if something goes wrong
   */
  public static void main(String[] args) throws Exception {
    // load data
    System.out.println("\n0. Loading data");
    DataSource source = new DataSource(args[0]);
    Instances data = source.getDataSet();
    if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1);

    // 1. meta-classifier
    useClassifier(data);

    // 2. filter
    useFilter(data);

    // 3. low-level
    useLowLevel(data);
  }
  @Override
  public void crossValidation(String traindata) throws Exception {
    DataSource ds = new DataSource(traindata);
    Instances instances = ds.getDataSet();
    StringToWordVector stv = new StringToWordVector();
    stv.setOptions(
        weka.core.Utils.splitOptions(
            "-R first-last -W 1000 "
                + "-prune-rate -1.0 -N 0 "
                + "-stemmer weka.core.stemmers.NullStemmer -M 1 "
                + "-tokenizer \"weka.core.tokenizers.WordTokenizer -delimiters  \\\" \\r\\n\\t.,;:\\\'\\\"()?!\""));

    stv.setInputFormat(instances);
    instances = Filter.useFilter(instances, stv);
    instances.setClassIndex(0);
    Evaluation eval = new Evaluation(instances);
    eval.crossValidateModel(this.classifier, instances, 10, new Random(1));
    System.out.println(eval.toSummaryString());
    System.out.println(eval.toMatrixString());
  }
  public void initialize() throws ResourceInitializationException {
    try {
      String arffHeaderFileName = (String) getConfigParameterValue(PARAM_ARFF_HEADER_FILE_NAME);

      DataSource source = new DataSource(arffHeaderFileName);
      wekaInstances = source.getDataSet();
      System.out.println(
          "Weka Instances successfully instantiated from header file at " + arffHeaderFileName);

      String arffDataFileName = (String) getConfigParameterValue(PARAM_ARFF_DATA_FILE_NAME);
      dataFile = new File(arffDataFileName);
      if (!dataFile.exists()) {
        dataFile.createNewFile();
        System.out.println("ARFF data file created at " + dataFile.getPath());
      } else {
        System.out.println("ARFF data file opened at " + dataFile.getPath());
      }
    } catch (Exception ioe) {
      throw new ResourceInitializationException(ioe);
    }
  }
  private static void run() throws Exception {
    DataSource source = new DataSource("src/files/powerpuffgirls.arff");

    int folds = 10;
    int runs = 30;

    HashMap<String, Classifier> hash = new HashMap<>();

    hash.put("J48", new J48());
    hash.put("NaiveBayes", new NaiveBayes());
    hash.put("IBk=1", new IBk(1));
    hash.put("IBk=3", new IBk(3));
    hash.put("MultilayerPerceptron", new MultilayerPerceptron());

    //        LibSVM svm = new LibSVM();
    //        svm.setOptions(new String[]{"-S 0 -K 2 -D 3 -G 0.0 -R 0.0 -N 0.5 -M 0.40 -C 1.0 -E
    // 0.001 -P 0.1"});

    //        hash.put("LibSVM", svm);

    Instances data = source.getDataSet();
    data.setClassIndex(4);

    System.out.println("#seed \t correctly instances \t percentage of corrects\n");

    for (Entry<String, Classifier> entry : hash.entrySet()) {
      System.out.println("\n Algorithm: " + entry.getKey() + "\n");

      for (int i = 1; i <= runs; i++) {
        Evaluation eval = new Evaluation(data);
        eval.crossValidateModel(entry.getValue(), data, folds, new Random(i));

        System.out.println(summary(eval));
      }
    }
  }
 @Override
 public void loadData(String data) throws Exception {
   this.source = new DataSource(data);
   this.inst = source.getDataSet();
   if (this.inst.classIndex() == -1) this.inst.setClassIndex(this.inst.numAttributes() - 1);
 }
Exemple #12
0
  public static void main(String args[]) {
    Timers timer = new Timers();
    try {
      // Get the data set path.
      String referenceFile = Utils.getOption('r', args);
      String queryFile = Utils.getOption('q', args);
      if (referenceFile.length() == 0)
        throw new IllegalArgumentException(
            "Required option: File containing" + "the reference dataset.");

      // Load input dataset.
      DataSource source = new DataSource(referenceFile);
      Instances referenceData = source.getDataSet();

      Instances queryData = null;
      if (queryFile.length() != 0) {
        source = new DataSource(queryFile);
        queryData = source.getDataSet();
      }

      timer.StartTimer("total_time");

      // Get all the parameters.
      String leafSize = Utils.getOption('l', args);
      String neighbors = Utils.getOption('k', args);

      // Validate options.
      int k = 0;
      if (neighbors.length() == 0) {
        throw new IllegalArgumentException(
            "Required option: Number of " + "furthest neighbors to find.");
      } else {
        k = Integer.parseInt(neighbors);
        if (k < 1 || k > referenceData.numInstances())
          throw new IllegalArgumentException("[Fatal] Invalid k");
      }

      int l = 20;
      if (leafSize.length() != 0) l = Integer.parseInt(leafSize);

      // Create KDTree.
      KDTree tree = new KDTree();
      tree.setMaxInstInLeaf(l);
      tree.setInstances(referenceData);

      // Perform All K-Nearest-Neighbors.
      if (queryFile.length() != 0) {
        for (int i = 0; i < queryData.numInstances(); i++) {
          Instances out = tree.kNearestNeighbours(queryData.instance(i), k);
        }
      } else {
        for (int i = 0; i < referenceData.numInstances(); i++) {
          Instances out = tree.kNearestNeighbours(referenceData.instance(i), k);
        }
      }

      timer.StopTimer("total_time");
      timer.PrintTimer("total_time");
    } catch (IOException e) {
      System.err.println(USAGE);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
 public WekaClassifier() throws Exception {
   DataSource source_train = new DataSource("files/train1.arff");
   DataSource source_test = new DataSource("files/test1.arff");
   _train = source_train.getDataSet();
   _test = source_test.getDataSet();
 }
Exemple #14
0
  private double[] classify(String test) {

    String[] lab = {
      "I.2", "I.3", "I.5", "I.6", "I.2.1", "I.2.6", "I.2.8", "I.3.5", "I.3.6", "I.3.7", "I.5.1",
      "I.5.2", "I.5.4", "I.6.3", "I.6.5", "I.6.8",
    };

    int NSel = 1000; //       Number of selection
    Filter[] filters = new Filter[2];
    double[] x = new double[16];
    double[] prd = new double[16];
    double clsLabel;
    Ranker rank = new Ranker();
    Evaluation eval = null;

    StringToWordVector stwv = new StringToWordVector();
    weka.filters.supervised.attribute.AttributeSelection featSel =
        new weka.filters.supervised.attribute.AttributeSelection();

    WordTokenizer wtok = new WordTokenizer();
    String delim = " \r\n\t.,;:'\"()?!$*-&[]+/|\\";

    InfoGainAttributeEval ig = new InfoGainAttributeEval();

    String[] stwvOpts;
    wtok.setDelimiters(delim);

    Instances[] dataRaw = new Instances[10000];

    DataSource[] source = new DataSource[16];

    String str;

    Instances testset = null;
    DataSource testsrc = null;
    try {
      testsrc = new DataSource(test);
      testset = testsrc.getDataSet();
    } catch (Exception e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }

    for (int j = 0; j < 16; j++) // 16 element 0-15
    {
      try {
        str = lab[j];
        source[j] =
            new DataSource(
                "D:/Users/nma1g11/workspace2/WebScraperFlatNew/dataPernode/new/" + str + ".arff");
        dataRaw[j] = source[j].getDataSet();
      } catch (Exception e) {
        e.printStackTrace();
      }

      System.out.println(lab[j]);
      if (dataRaw[j].classIndex() == -1) dataRaw[j].setClassIndex(dataRaw[j].numAttributes() - 1);
    }
    if (testset.classIndex() == -1) testset.setClassIndex(testset.numAttributes() - 1);

    try {
      stwvOpts =
          weka.core.Utils.splitOptions(
              "-R first-last -W 1000000 -prune-rate -1.0 -C -T -I -N 1 -L -S -stemmer weka.core.stemmers.LovinsStemmer -M 2 ");
      stwv.setOptions(stwvOpts);
      stwv.setTokenizer(wtok);

      rank.setOptions(weka.core.Utils.splitOptions("-T -1.7976931348623157E308 -N 100"));
      rank.setNumToSelect(NSel);
      featSel.setEvaluator(ig);
      featSel.setSearch(rank);
    } catch (Exception e) {
      e.printStackTrace();
    }

    filters[0] = stwv;
    filters[1] = featSel;

    System.out.println("Loading is Done!");

    MultiFilter mfilter = new MultiFilter();

    mfilter.setFilters(filters);

    FilteredClassifier classify = new FilteredClassifier();
    classify.setClassifier(
        new NaiveBayesMultinomial()); ///////// Algorithm of The Classification  /////////
    classify.setFilter(mfilter);

    String ss2 = "";

    try {
      Classifier[] clsArr = new Classifier[16];
      clsArr = Classifier.makeCopies(classify, 16);
      String strcls = "";

      List<String> clsList = new ArrayList<String>();
      String s = null;
      String newcls = null;
      String lb = "";
      String prev = "";
      boolean flag = false;
      String Ocls = null;
      int q = 0;

      for (int i = 0; i < 16; i++) {

        for (int k = 0; k < testset.numInstances(); k++) {
          flag = false;

          s = testset.instance(k).stringValue(1);
          clsList.add(s);
          if (lab[i].equals(s)) {
            flag = true;
            newcls = s;
          }
        }

        clsArr[i].buildClassifier(dataRaw[i]);
        eval = new Evaluation(dataRaw[i]);
        for (int j = 0; j < testset.numInstances(); j++) {
          Ocls = testset.instance(j).stringValue(1);

          if (flag && !s.equals(null)) testset.instance(j).setClassValue(lab[i]);

          // -----------------------------------------
          strcls = testset.instance(j).stringValue(1);
          if (i < 4) {
            if (strcls.substring(0, 3).equals(lab[i])) testset.instance(j).setClassValue(lab[i]);
          } else if (lab[i].substring(0, 3).equals(strcls))
            testset.instance(j).setClassValue(lab[i]);
          // ------------------------------------------------
          System.out.println(
              dataRaw[i].classAttribute().value(i)
                  + " --- > Correct%:"
                  + eval.pctCorrect()
                  + "  F-measure:"
                  + eval.fMeasure(i));
          if (!prev.equals(testset.instance(j).stringValue(0)) || !lab[i].equals(lb)) {

            clsLabel = clsArr[i].classifyInstance(testset.instance(j));
            x = clsArr[i].distributionForInstance(testset.instance(j));

            prd[i] = x[i];
            System.out.println(" --- > prob: " + clsLabel);
            System.out.println(" --- > x :" + x[i]);
            System.out.println(clsLabel + " --> " + testset.classAttribute().value((int) clsLabel));
          }
          testset.instance(j).setClassValue(Ocls);

          prev = testset.instance(j).stringValue(0);
          lb = lab[i];
        }

        System.out.println("Done with " + lab[i].replace("99", "") + " !!!!!!!!!!!");
      }
      System.out.println(eval.correct());

    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    return prd;
  }
  public void generateDataSet() {

    // Read all the instances in the file (ARFF, CSV, XRFF, ...)
    try {
      source = new DataSource("data\\bne.csv");
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    // Create data set
    try {
      instances = source.getDataSet();
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    // Reverse the order of instances in the data set to place them in
    // chronological order
    for (int i = 0; i < (instances.numInstances() / 2); i++) {
      instances.swap(i, instances.numInstances() - 1 - i);
    }

    // Remove "volume", "low price", "high price", "opening price" and
    // "data" from data set
    instances.deleteAttributeAt(instances.numAttributes() - 1);
    instances.deleteAttributeAt(instances.numAttributes() - 2);
    instances.deleteAttributeAt(instances.numAttributes() - 2);
    instances.deleteAttributeAt(instances.numAttributes() - 2);
    instances.deleteAttributeAt(instances.numAttributes() - 2);

    // Create list to hold nominal values "purchase", "sale", "retain"
    List my_nominal_values = new ArrayList(3);
    my_nominal_values.add("purchase");
    my_nominal_values.add("sale");
    my_nominal_values.add("retain");

    // Create nominal attribute "classIndex"
    Attribute classIndex = new Attribute("classIndex", my_nominal_values);

    // Add "classIndex" as an attribute to each instance
    instances.insertAttributeAt(classIndex, instances.numAttributes());

    // Set the value of "classIndex" for each instance
    for (int i = 0; i < instances.numInstances() - 1; i++) {
      if (instances.get(i + 1).value(instances.numAttributes() - 2)
          > instances.get(i).value(instances.numAttributes() - 2)) {
        instances.get(i).setValue(instances.numAttributes() - 1, "purchase");
      } else if (instances.get(i + 1).value(instances.numAttributes() - 2)
          < instances.get(i).value(instances.numAttributes() - 2)) {
        instances.get(i).setValue(instances.numAttributes() - 1, "sale");
      } else if (instances.get(i + 1).value(instances.numAttributes() - 2)
          == instances.get(i).value(instances.numAttributes() - 2)) {
        instances.get(i).setValue(instances.numAttributes() - 1, "retain");
      }
    }

    // Make the last attribute be the class
    instances.setClassIndex(instances.numAttributes() - 1);

    // Calculate and insert technical analysis attributes into data set
    Strategies strategies = new Strategies();
    strategies.applyStrategies();

    // Print header and instances
    System.out.println("\nDataset:\n");
    System.out.println(instances);
    System.out.println(instances.numInstances());
  }
  public void exec(PrintWriter printer) {
    try {
      FileWriter outFile = null;
      PrintWriter out = null;
      if (printer == null) {
        outFile = new FileWriter(id + ".results");
        out = new PrintWriter(outFile);
      } else out = printer;

      DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
      ProcessTweets tweetsProcessor = null;
      System.out.println("***************************************");
      System.out.println("***\tEXECUTING TEST\t" + id + "***");
      System.out.println("+++++++++++++++++++++++++++++++++++++++");
      System.out.println("Train size:" + traincorpus.size());
      System.out.println("Test size:" + testcorpus.size());
      out.println("***************************************");
      out.println("***\tEXECUTING TEST\t***");
      out.println("+++++++++++++++++++++++++++++++++++++++");
      out.println("Train size:" + traincorpus.size());
      out.println("Test size:" + testcorpus.size());
      String cloneID = "";
      boolean clonar = false;
      if (baseline) {
        System.out.println("***************************************");
        System.out.println("***\tEXECUTING TEST BASELINE\t***");
        System.out.println("+++++++++++++++++++++++++++++++++++++++");
        System.out.println("Train size:" + traincorpus.size());
        System.out.println("Test size:" + testcorpus.size());
        out.println("***************************************");
        out.println("***\tEXECUTING TEST\t***");
        out.println("+++++++++++++++++++++++++++++++++++++++");
        out.println("Train size:" + traincorpus.size());
        out.println("Test size:" + testcorpus.size());

        BaselineClassifier base = new BaselineClassifier(testcorpus, 8);
        precision = base.getPrecision();
        recall = base.getRecall();
        fmeasure = base.getFmeasure();
        System.out.println("+++++++++++++++++++++++++++++++++++++++");
        System.out.printf("Precision: %.3f\n", precision);
        System.out.printf("Recall: %.3f\n", recall);
        System.out.printf("F-measure: %.3f\n", fmeasure);
        System.out.println("***************************************");
        out.println("+++++++++++++++++++++++++++++++++++++++");
        out.printf("Precision: %.3f\n", precision);
        out.printf("Recall: %.3f\n", recall);
        out.printf("F-measure: %.3f\n", fmeasure);
        out.println("***************************************");
        out.flush();
        out.close();
        return;
      } else {
        System.out.println("Stemming: " + stemming);
        System.out.println("Lematization:" + lematization);
        System.out.println("URLs:" + urls);
        System.out.println("Hashtags:" + hashtags);
        System.out.println("Mentions:" + mentions);
        System.out.println("Unigrams:" + unigrams);
        System.out.println("Bigrams:" + bigrams);
        System.out.println("TF:" + tf);
        System.out.println("TF-IDF:" + tfidf);
        out.println("Stemming: " + stemming);
        out.println("Lematization:" + lematization);
        out.println("URLs:" + urls);
        out.println("Hashtags:" + hashtags);
        out.println("Mentions:" + mentions);
        out.println("Unigrams:" + unigrams);
        out.println("Bigrams:" + bigrams);
        out.println("TF:" + tf);
        out.println("TF-IDF:" + tfidf);
      }
      // Si tengo los tweets procesados, me evito un nuevo proceso
      System.out.println("1-Process tweets " + dateFormat.format(new Date()));
      out.println("1-Process tweets " + dateFormat.format(new Date()));

      List<ProcessedTweet> train = null;
      String[] ids = id.split("-");
      cloneID = ids[0] + "-" + (Integer.valueOf(ids[1]) + 6);
      if (((Integer.valueOf(ids[1]) / 6) % 2) == 0) clonar = true;

      if (new File(id + "-train.ptweets").exists()) {
        train = ProcessedTweetSerialization.fromFile(id + "-train.ptweets");
        tweetsProcessor =
            new ProcessTweets(stemming, lematization, urls, hashtags, mentions, unigrams, bigrams);
        if (lematization) {
          tweetsProcessor.doLematization(train);
        }
        if (stemming) {
          tweetsProcessor.doStemming(train);
        }
      } else {
        tweetsProcessor =
            new ProcessTweets(stemming, lematization, urls, hashtags, mentions, unigrams, bigrams);
        // Esto del set training es un añadido para poder diferenciar los idiomas de las url en el
        // corpus paralelo
        //				tweetsProcessor.setTraining(true);
        train = tweetsProcessor.processTweets(traincorpus);
        //				tweetsProcessor.setTraining(false);
        ProcessedTweetSerialization.toFile(id + "-train.ptweets", train);
        /*
        				if (clonar)
        				{
        					File f = new File (id+"-train.ptweets");
        					Path p = f.toPath();
        					CopyOption[] options = new CopyOption[]{
        						      StandardCopyOption.REPLACE_EXISTING,
        						      StandardCopyOption.COPY_ATTRIBUTES
        						     };
        					Files.copy(p, new File (cloneID+"-train.ptweets").toPath(), options);
        					Files.copy(p, new File (ids[0]+"-"+(Integer.valueOf(ids[1])+12)+"-train.ptweets").toPath(), options);
        					Files.copy(p, new File (ids[0]+"-"+(Integer.valueOf(ids[1])+18)+"-train.ptweets").toPath(), options);
        					Files.copy(p, new File (ids[0]+"-"+(Integer.valueOf(ids[1])+24)+"-train.ptweets").toPath(), options);
        					Files.copy(p, new File (ids[0]+"-"+(Integer.valueOf(ids[1])+30)+"-train.ptweets").toPath(), options);
        				}
        */
      }

      // Generamos las BOW. Igual que antes, si existen no las creo.
      System.out.println("2-Fill topics " + dateFormat.format(new Date()));
      out.println("2-Fill topics " + dateFormat.format(new Date()));
      TopicsList topics = null;
      if (new File(id + ".topics").exists()) {
        topics = TopicsSerialization.fromFile(id + ".topics");
        if (tf) topics.setSelectionFeature(TopicDesc.TERM_TF);
        else topics.setSelectionFeature(TopicDesc.TERM_TF_IDF);
        topics.prepareTopics();
      } else {

        topics = new TopicsList();
        if (tf) topics.setSelectionFeature(TopicDesc.TERM_TF);
        else topics.setSelectionFeature(TopicDesc.TERM_TF_IDF);
        System.out.println("Filling topics " + dateFormat.format(new Date()));
        topics.fillTopics(train);
        System.out.println("Preparing topics topics " + dateFormat.format(new Date()));
        // Aquí tengo que serializar antes de preparar, porque si no no puedo calcular los tf y
        // tfidf
        System.out.println("Serializing topics topics " + dateFormat.format(new Date()));
        /*
        				if (clonar)
        				{
        					TopicsSerialization.toFile(cloneID+".topics", topics);
        				}
        */
        topics.prepareTopics();
        TopicsSerialization.toFile(id + ".topics", topics);
      }
      System.out.println("3-Generate arff train file " + dateFormat.format(new Date()));
      out.println("3-Generate arff train file " + dateFormat.format(new Date()));

      // Si el fichero arff no existe, lo creo. en caso contrario vengo haciendo lo que hasta ahora,
      // aprovechar trabajo previo
      if (!new File(id + "-train.arff").exists()) {

        BufferedWriter bw = topics.generateArffHeader(id + "-train.arff");
        int tope = traincorpus.size();
        if (tweetsProcessor == null)
          tweetsProcessor =
              new ProcessTweets(
                  stemming, lematization, urls, hashtags, mentions, unigrams, bigrams);
        for (int indTweet = 0; indTweet < tope; indTweet++) {
          topics.generateArffVector(bw, train.get(indTweet));
        }
        bw.flush();
        bw.close();
      }

      // Ahora proceso los datos de test
      System.out.println("5-build test dataset " + dateFormat.format(new Date()));
      out.println("5-build test dataset " + dateFormat.format(new Date()));

      List<ProcessedTweet> test = null;
      if (new File(id + "-test.ptweets").exists())
        test = ProcessedTweetSerialization.fromFile(id + "-test.ptweets");
      else {
        if (tweetsProcessor == null)
          tweetsProcessor =
              new ProcessTweets(
                  stemming, lematization, urls, hashtags, mentions, unigrams, bigrams);
        test = tweetsProcessor.processTweets(testcorpus);
        ProcessedTweetSerialization.toFile(id + "-test.ptweets", test);
        /*
        				if (clonar)
        				{
        					File f = new File (id+"-test.ptweets");
        					Path p = f.toPath();
        					CopyOption[] options = new CopyOption[]{
        						      StandardCopyOption.REPLACE_EXISTING,
        						      StandardCopyOption.COPY_ATTRIBUTES
        						     };
        					Files.copy(p, new File (cloneID+"-test.ptweets").toPath(), options);
        				}
        */

      }

      // Si el fichero arff no existe, lo creo. en caso contrario vengo haciendo lo que hasta ahora,
      // aprovechar trabajo previo
      if (!new File(id + "-test.arff").exists()) {
        BufferedWriter bw = topics.generateArffHeader(id + "-test.arff");
        int tope = testcorpus.size();
        if (tweetsProcessor == null)
          tweetsProcessor =
              new ProcessTweets(
                  stemming, lematization, urls, hashtags, mentions, unigrams, bigrams);
        for (int indTweet = 0; indTweet < tope; indTweet++) {
          topics.generateArffVector(bw, test.get(indTweet));
        }
        bw.flush();
        bw.close();
      }
      int topeTopics = topics.getTopicsList().size();
      topics.getTopicsList().clear();
      // Genero el clasificador
      // FJRM 25-08-2013 Lo cambio de orden para intentar liberar la memoria de los topics y tener
      // más libre
      System.out.println("4-Generate classifier " + dateFormat.format(new Date()));
      out.println("4-Generate classifier " + dateFormat.format(new Date()));

      Classifier cls = null;
      DataSource sourceTrain = null;
      Instances dataTrain = null;
      if (new File(id + "-MNB.classifier").exists()) {
        ObjectInputStream ois = new ObjectInputStream(new FileInputStream(id + "-MNB.classifier"));
        cls = (Classifier) ois.readObject();
        ois.close();
      } else {
        sourceTrain = new DataSource(id + "-train.arff");
        dataTrain = sourceTrain.getDataSet();
        if (dataTrain.classIndex() == -1) dataTrain.setClassIndex(dataTrain.numAttributes() - 1);
        // Entreno el clasificador
        cls = new weka.classifiers.bayes.NaiveBayesMultinomial();
        int clase = dataTrain.numAttributes() - 1;
        dataTrain.setClassIndex(clase);
        cls.buildClassifier(dataTrain);
        ObjectOutputStream oos =
            new ObjectOutputStream(new FileOutputStream(id + "-MNB.classifier"));
        oos.writeObject(cls);
        oos.flush();
        oos.close();
        // data.delete();//no borro para el svm
      }
      // Ahora evaluo el clasificador con los datos de test
      System.out.println("6-Evaluate classifier MNB " + dateFormat.format(new Date()));
      out.println("6-Evaluate classifier MNB" + dateFormat.format(new Date()));
      DataSource sourceTest = new DataSource(id + "-test.arff");
      Instances dataTest = sourceTest.getDataSet();
      int clase = dataTest.numAttributes() - 1;
      dataTest.setClassIndex(clase);
      Evaluation eval = new Evaluation(dataTest);
      eval.evaluateModel(cls, dataTest);
      // Ahora calculo los valores precision, recall y fmeasure. Además saco las matrices de
      // confusion

      precision = 0;
      recall = 0;
      fmeasure = 0;
      for (int ind = 0; ind < topeTopics; ind++) {
        precision += eval.precision(ind);
        recall += eval.recall(ind);
        fmeasure += eval.fMeasure(ind);
      }
      precision = precision / topeTopics;
      recall = recall / topeTopics;
      fmeasure = fmeasure / topeTopics;
      System.out.println("+++++++++++++++++++++++++++++++++++++++");
      System.out.println(eval.toMatrixString());
      System.out.println("+++++++++++++++++++++++++++++++++++++++");
      System.out.printf("Precision: %.3f\n", precision);
      System.out.printf("Recall: %.3f\n", recall);
      System.out.printf("F-measure: %.3f\n", fmeasure);
      System.out.println("***************************************");
      out.println("+++++++++++++++++++++++++++++++++++++++");
      out.println(eval.toMatrixString());
      out.println("+++++++++++++++++++++++++++++++++++++++");
      out.printf("Precision: %.3f\n", precision);
      out.printf("Recall: %.3f\n", recall);
      out.printf("F-measure: %.3f\n", fmeasure);
      out.println("***************************************");
      /*			NO BORRAR
      			System.out.println("7-Evaluate classifier SVM"+dateFormat.format(new Date()));
      			out.println("7-Evaluate classifier SVM"+dateFormat.format(new Date()));
      			if (new File(id+"-SVM.classifier").exists())
      			{
      				ObjectInputStream ois = new ObjectInputStream(new FileInputStream(id+"-SVM.classifier"));
      				cls = (Classifier) ois.readObject();
      				ois.close();
      			}
      			else
      			{
      				if (dataTrain==null)
      				{
      					sourceTrain = new DataSource(id+"-train.arff");
      					dataTrain = sourceTrain.getDataSet();
      					if (dataTrain.classIndex() == -1)
      						dataTrain.setClassIndex(dataTrain.numAttributes() - 1);
      				}
      	//Entreno el clasificador
      				cls = new weka.classifiers.functions.LibSVM();
      				clase = dataTrain.numAttributes()-1;
      				dataTrain.setClassIndex(clase);
      				cls.buildClassifier(dataTrain);
      				ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(id+"-SVM.classifier"));
      				oos.writeObject(cls);
      				oos.flush();
      				oos.close();
      				dataTrain.delete();
      			}
      			eval.evaluateModel(cls, dataTest);
      			precision=0;
      			recall=0;
      			fmeasure=0;
      			for(int ind=0; ind<topeTopics; ind++)
      			{
      				precision += eval.precision(ind);
      				recall += eval.recall(ind);
      				fmeasure += eval.fMeasure(ind);
      			}
      			precision = precision / topeTopics;
      			recall = recall / topeTopics;
      			fmeasure = fmeasure / topeTopics;
      			System.out.println("+++++++++++++++++++++++++++++++++++++++");
      			System.out.println(eval.toMatrixString());
      			System.out.println("+++++++++++++++++++++++++++++++++++++++");
      			System.out.printf("Precision: %.3f\n", precision);
      			System.out.printf("Recall: %.3f\n", recall);
      			System.out.printf("F-measure: %.3f\n", fmeasure);
      			System.out.println("***************************************");
      			out.println("+++++++++++++++++++++++++++++++++++++++");
      			out.println(eval.toMatrixString());
      			out.println("+++++++++++++++++++++++++++++++++++++++");
      			out.printf("Precision: %.3f\n", precision);
      			out.printf("Recall: %.3f\n", recall);
      			out.printf("F-measure: %.3f\n", fmeasure);
      			out.println("***************************************");
      */
      System.out.println("Done " + dateFormat.format(new Date()));
      out.println("Done " + dateFormat.format(new Date()));
      if (printer == null) {
        out.flush();
        out.close();
      }
      // Intento de liberar memoria
      if (dataTrain != null) dataTrain.delete();
      if (dataTest != null) dataTest.delete();
      if (train != null) train.clear();
      if (test != null) test.clear();
      if (topics != null) {
        topics.getTopicsList().clear();
        topics = null;
      }
      if (dataTest != null) dataTest.delete();
      if (cls != null) cls = null;
      if (tweetsProcessor != null) tweetsProcessor = null;
      System.gc();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
  public static void execSVM(String expName) {
    try {
      FileWriter outFile = null;
      PrintWriter out = null;
      outFile = new FileWriter(expName + "-SVM.results");
      out = new PrintWriter(outFile);
      DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
      ProcessTweets tweetsProcessor = null;
      System.out.println("***************************************");
      System.out.println("***\tEXECUTING TEST\t" + expName + "***");
      System.out.println("+++++++++++++++++++++++++++++++++++++++");
      out.println("***************************************");
      out.println("***\tEXECUTING TEST\t" + expName + "***");
      out.println("+++++++++++++++++++++++++++++++++++++++");
      out.println("4-Generate classifier " + dateFormat.format(new Date()));

      Classifier cls = null;
      DataSource sourceTrain = new DataSource(expName + "-train.arff");
      Instances dataTrain = sourceTrain.getDataSet();
      if (dataTrain.classIndex() == -1) dataTrain.setClassIndex(dataTrain.numAttributes() - 1);
      // Entreno el clasificador
      // cls = new weka.classifiers.functions.LibSVM();
      int clase = dataTrain.numAttributes() - 1;
      cls = new weka.classifiers.bayes.ComplementNaiveBayes();
      dataTrain.setClassIndex(clase);
      cls.buildClassifier(dataTrain);
      ObjectOutputStream oos =
          new ObjectOutputStream(new FileOutputStream(expName + "-SVM.classifier"));
      oos.writeObject(cls);
      oos.flush();
      oos.close();
      DataSource sourceTest = new DataSource(expName + "-test.arff");
      Instances dataTest = sourceTest.getDataSet();
      dataTest.setClassIndex(clase);
      Evaluation eval = new Evaluation(dataTest);
      eval.evaluateModel(cls, dataTest);
      // Ahora calculo los valores precision, recall y fmeasure. Además saco las matrices de
      // confusion

      float precision = 0;
      float recall = 0;
      float fmeasure = 0;
      int topeTopics = 8;
      for (int ind = 0; ind < topeTopics; ind++) {
        precision += eval.precision(ind);
        recall += eval.recall(ind);
        fmeasure += eval.fMeasure(ind);
      }
      precision = precision / topeTopics;
      recall = recall / topeTopics;
      fmeasure = fmeasure / topeTopics;
      System.out.println("++++++++++++++ CNB ++++++++++++++++++++");
      System.out.println(eval.toMatrixString());
      System.out.println("+++++++++++++++++++++++++++++++++++++++");
      System.out.printf("Precision: %.3f\n", precision);
      System.out.printf("Recall: %.3f\n", recall);
      System.out.printf("F-measure: %.3f\n", fmeasure);
      System.out.println("***************************************");
      out.println("++++++++++++++ CNB ++++++++++++++++++++");
      out.println(eval.toMatrixString());
      out.println("+++++++++++++++++++++++++++++++++++++++");
      out.printf("Precision: %.3f\n", precision);
      out.printf("Recall: %.3f\n", recall);
      out.printf("F-measure: %.3f\n", fmeasure);
      out.println("***************************************");
      // OTRO CLASIFICADOR ZeroR
      cls = new weka.classifiers.rules.ZeroR();
      dataTrain.setClassIndex(clase);
      cls.buildClassifier(dataTrain);
      eval = new Evaluation(dataTest);
      eval.evaluateModel(cls, dataTest);
      precision = 0;
      recall = 0;
      fmeasure = 0;
      for (int ind = 0; ind < topeTopics; ind++) {
        precision += eval.precision(ind);
        recall += eval.recall(ind);
        fmeasure += eval.fMeasure(ind);
      }
      precision = precision / topeTopics;
      recall = recall / topeTopics;
      fmeasure = fmeasure / topeTopics;
      System.out.println("++++++++++++++ ZEROR ++++++++++++++++++++");
      System.out.println(eval.toMatrixString());
      System.out.println("+++++++++++++++++++++++++++++++++++++++");
      System.out.printf("Precision: %.3f\n", precision);
      System.out.printf("Recall: %.3f\n", recall);
      System.out.printf("F-measure: %.3f\n", fmeasure);
      System.out.println("***************************************");
      out.println("++++++++++++++ ZEROR ++++++++++++++++++++");
      out.println(eval.toMatrixString());
      out.println("+++++++++++++++++++++++++++++++++++++++");
      out.printf("Precision: %.3f\n", precision);
      out.printf("Recall: %.3f\n", recall);
      out.printf("F-measure: %.3f\n", fmeasure);
      out.println("***************************************");
      // OTRO CLASIFICADOR J48
      /*
      			cls = new weka.classifiers.trees.J48();
      			dataTrain.setClassIndex(clase);
      			cls.buildClassifier(dataTrain);
      			eval = new Evaluation(dataTest);
      			eval.evaluateModel(cls, dataTest);
      			precision=0;
      			recall=0;
      			fmeasure=0;
      			for(int ind=0; ind<topeTopics; ind++)
      			{
      				precision += eval.precision(ind);
      				recall += eval.recall(ind);
      				fmeasure += eval.fMeasure(ind);
      			}
      			precision = precision / topeTopics;
      			recall = recall / topeTopics;
      			fmeasure = fmeasure / topeTopics;
      			System.out.println("++++++++++++++ J48 ++++++++++++++++++++");
      			System.out.println(eval.toMatrixString());
      			System.out.println("+++++++++++++++++++++++++++++++++++++++");
      			System.out.printf("Precision: %.3f\n", precision);
      			System.out.printf("Recall: %.3f\n", recall);
      			System.out.printf("F-measure: %.3f\n", fmeasure);
      			System.out.println("***************************************");
      			out.println("++++++++++++++ J48 ++++++++++++++++++++");
      			out.println(eval.toMatrixString());
      			out.println("+++++++++++++++++++++++++++++++++++++++");
      			out.printf("Precision: %.3f\n", precision);
      			out.printf("Recall: %.3f\n", recall);
      			out.printf("F-measure: %.3f\n", fmeasure);
      			out.println("***************************************");

      //OTRO SMO
      			cls = new weka.classifiers.functions.SMO();
      			dataTrain.setClassIndex(clase);
      			cls.buildClassifier(dataTrain);
      			eval = new Evaluation(dataTest);
      			eval.evaluateModel(cls, dataTest);
      			precision=0;
      			recall=0;
      			fmeasure=0;
      			for(int ind=0; ind<topeTopics; ind++)
      			{
      				precision += eval.precision(ind);
      				recall += eval.recall(ind);
      				fmeasure += eval.fMeasure(ind);
      			}
      			precision = precision / topeTopics;
      			recall = recall / topeTopics;
      			fmeasure = fmeasure / topeTopics;
      			System.out.println("++++++++++++++ SMO ++++++++++++++++++++");
      			System.out.println(eval.toMatrixString());
      			System.out.println("+++++++++++++++++++++++++++++++++++++++");
      			System.out.printf("Precision: %.3f\n", precision);
      			System.out.printf("Recall: %.3f\n", recall);
      			System.out.printf("F-measure: %.3f\n", fmeasure);
      			System.out.println("***************************************");
      			out.println("++++++++++++++ SMO ++++++++++++++++++++");
      			out.println(eval.toMatrixString());
      			out.println("+++++++++++++++++++++++++++++++++++++++");
      			out.printf("Precision: %.3f\n", precision);
      			out.printf("Recall: %.3f\n", recall);
      			out.printf("F-measure: %.3f\n", fmeasure);
      			out.println("***************************************");
      */
      out.flush();
      out.close();
      dataTest.delete();
      dataTrain.delete();
    } catch (FileNotFoundException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
  public HashMap<String, String> process(
      Sentence sent,
      String dep,
      HashSet<String> terms,
      List<NamedEntity> entities,
      String author,
      String aidx) {
    try {
      // System.out.println("ML start!");
      // System.out.println("List : " + terms);
      HashMap<String, String> ht = new HashMap<String, String>();

      List<NamedEntity> newEntities = new ArrayList<NamedEntity>();
      for (NamedEntity entity : entities) {
        // System.out.println("original: " + entity.entity);
        boolean check = false;

        for (NamedEntity temp : entities) {
          if (entity == temp) continue;

          if (entity.entity.contains(temp.entity)) {
            check = true;
          }
        }

        if (!check) newEntities.add(entity);
      }

      List<DependencyTriple> dtl = getDependencyTripleList(dep);
      List<NamedEntity> targetCands = new ArrayList<NamedEntity>();
      HashMap<NamedEntity, String> tOpinTerm = new HashMap<NamedEntity, String>();
      List<NamedEntity> holderCands = new ArrayList<NamedEntity>();
      HashMap<NamedEntity, String> hOpinTerm = new HashMap<NamedEntity, String>();

      BufferedWriter writer = new BufferedWriter(new FileWriter("weka_target.csv"));
      writer.write("A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,Class\n");

      boolean check = false;
      List<NamedEntity> targetTmp = new ArrayList<NamedEntity>();
      for (NamedEntity entity : newEntities) {
        // System.out.println("extracted: " + entity.entity);
        String temp = getTargetFeatures(entity, author, terms, dtl);
        // System.out.println(temp);
        if (temp.length() > 1) {
          check = true;
          writer.write(temp);
          String[] toks = temp.split("\n");
          for (int i = 0; i < toks.length; i++) {
            targetTmp.add(entity);
            tOpinTerm.put(entity, toks[i].substring(0, toks[i].indexOf(",")));
          }
        }
      }

      writer.close();

      if (check) {
        DataSource source = new DataSource("weka_target.csv");
        Instances testdata = source.getDataSet();
        testdata.setClassIndex(testdata.numAttributes() - 1);

        Classifier models = (Classifier) weka.core.SerializationHelper.read("target_smoreg.model");

        if (testdata.numInstances() != targetTmp.size())
          System.out.println("wrong number of instances");

        for (int i = 0; i < testdata.numInstances(); i++) {
          double pred = models.classifyInstance(testdata.instance(i));
          if (pred >= 1.0) {
            // System.out.println(pred + " , " + targetTmp.get(i).entity);
            targetCands.add(targetTmp.get(i));
          }
        }
      }

      writer = new BufferedWriter(new FileWriter("weka_holder.csv"));
      writer.write("A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,A15,Class\n");

      check = false;
      List<NamedEntity> holderTmp = new ArrayList<NamedEntity>();
      for (NamedEntity entity : newEntities) {
        // System.out.println("extracted: " + entity.entity);
        String temp = getHolderFeatures(entity, author, terms, dtl);
        // System.out.println(temp);
        if (temp.length() > 1) {
          check = true;
          writer.write(temp);
          String[] toks = temp.split("\n");
          for (int i = 0; i < toks.length; i++) {
            holderTmp.add(entity);
            hOpinTerm.put(entity, toks[i].substring(0, toks[i].indexOf(",")));
          }
        }
      }

      writer.close();

      if (check) {
        DataSource source = new DataSource("weka_holder.csv");
        Instances testdata = source.getDataSet();
        testdata.setClassIndex(testdata.numAttributes() - 1);

        Classifier models = (Classifier) weka.core.SerializationHelper.read("holder_smoreg.model");

        if (testdata.numInstances() != holderTmp.size())
          System.out.println("wrong number of instances");

        for (int i = 0; i < testdata.numInstances(); i++) {
          double pred = models.classifyInstance(testdata.instance(i));
          if (pred >= 1.0) {
            // System.out.println(pred + " , " + holderTmp.get(i).entity);
            holderCands.add(holderTmp.get(i));
          }
        }
      }

      if ((targetCands.size() == 0) || (holderCands.size() == 0)) return ht;

      List<NamedEntity> holderCandTmp = new ArrayList<NamedEntity>();
      for (NamedEntity holderCand : holderCands) {
        boolean hasLonger = false;
        for (NamedEntity temp : holderCands) {
          if (temp.entity.compareTo(holderCand.entity) == 0) continue;

          if (temp.entity.contains(holderCand.entity)) {
            hasLonger = true;
            break;
          }
        }

        if (!hasLonger) holderCandTmp.add(holderCand);
      }

      List<NamedEntity> targetCandTmp = new ArrayList<NamedEntity>();
      for (NamedEntity targetCand : targetCands) {
        boolean hasLonger = false;
        for (NamedEntity temp : targetCands) {
          if (temp.entity.compareTo(targetCand.entity) == 0) continue;

          if (temp.entity.contains(targetCand.entity)) {
            hasLonger = true;
            break;
          }
        }

        if (!hasLonger) targetCandTmp.add(targetCand);
      }

      for (NamedEntity targetCand : targetCandTmp) {
        if (targetCand.entity.compareTo(author) == 0) continue;

        for (NamedEntity holderCand : holderCandTmp) {
          if (targetCand.entity.compareTo(holderCand.entity) == 0) continue;

          String targetOpin = tOpinTerm.get(targetCand);
          String holderOpin = hOpinTerm.get(holderCand);

          // System.out.println(targetOpin + ", " + holderOpin);
          if (targetOpin.compareTo(holderOpin) != 0) continue;

          String opin =
              targetOpin
                  .concat("\t")
                  .concat(
                      Integer.toString(sent.sent.indexOf(targetOpin) + sent.beg)
                          .concat("-")
                          .concat(
                              Integer.toString(
                                  sent.sent.indexOf(targetOpin) + sent.beg + targetOpin.length())));

          String holder =
              holderCand
                  .entity
                  .concat("\t")
                  .concat(
                      Integer.toString(holderCand.beg)
                          .concat("-")
                          .concat(Integer.toString(holderCand.end)));
          String target =
              targetCand
                  .entity
                  .concat("\t")
                  .concat(
                      Integer.toString(targetCand.beg)
                          .concat("-")
                          .concat(Integer.toString(targetCand.end)));
          ht.put(targetOpin, opin.concat("\t").concat(holder).concat("\t").concat(target));
        }
      }

      return ht;
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
      return null;
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
      return null;
    }
  }