Пример #1
0
  public static void RetrieveTrainMatrix() {
    try {
      BufferedReader flrdr =
          new BufferedReader(
              new FileReader("D:\\IR\\Assignment7\\Pytest\\part2\\output\\train_matrix.txt"));
      String line = "";
      int doc_count = 0;

      while ((line = flrdr.readLine()) != null) {
        SortedSet<Integer> word_ids = new TreeSet<Integer>();
        int val_count = 0;

        String[] key_value = line.split(" :: ");
        key_value[1] = key_value[1].substring(1, key_value[1].length() - 2);
        String[] values = key_value[1].split(",");

        FeatureNode[] node = new FeatureNode[values.length];

        for (String val : values) word_ids.add(Integer.parseInt(val.trim()));

        for (int val : word_ids) node[val_count++] = new FeatureNode(val, 1);

        if (spam_docs.contains(key_value[0].trim())) ylable[doc_count] = 1;
        else ylable[doc_count] = 0;

        train_matrix[doc_count++] = node;
      }
      flrdr.close();
    } catch (Exception e) {
      e.printStackTrace();
      System.exit(0);
    }
  }
Пример #2
0
 public static void WriteToFile() {
   doc_score = sortByComparator(doc_score, false);
   try {
     FileWriter wrtr = new FileWriter("D:/IR/Assignment7/Pytest/part2/output/prediction.txt");
     for (String key : doc_score.keySet()) {
       wrtr.write(key + " : " + doc_score.get(key) + " \n");
     }
     wrtr.close();
   } catch (Exception e) {
     e.printStackTrace();
   }
 }
Пример #3
0
  public static void RetrieveSpamFiles() {
    try {
      BufferedReader flrdr =
          new BufferedReader(
              new FileReader("D:\\IR\\Assignment7\\Pytest\\part2\\output\\spam_docs.txt"));
      String line = flrdr.readLine();

      line = line.substring(1, line.length() - 1);

      for (String doc : line.split(",")) {
        spam_docs.add(doc.substring(2, doc.length() - 1).trim());
      }
      flrdr.close();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
Пример #4
0
  public static void MLalgo() {
    try {
      Problem problem = new Problem();
      problem.l = train_count; // number of training examples
      problem.n = max_feature_count; // number of features
      problem.x = train_matrix; // feature nodes
      problem.y = ylable; // target values;

      SolverType solver = SolverType.L2R_LR; // -s 0
      double C = 1.0; // cost of constraints violation
      double eps = 0.01; // stopping criteria

      Parameter parameter = new Parameter(solver, C, eps);
      model = Linear.train(problem, parameter);

      File modelFile = new File("model");
      model.save(modelFile);
      // load model or use it directly
      model = Model.load(modelFile);
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
Пример #5
0
  public static void RetrieveTestMatrix() {
    try {
      BufferedReader flrdr =
          new BufferedReader(
              new FileReader("D:\\IR\\Assignment7\\Pytest\\part2\\output\\test_matrix.txt"));
      String line = "";

      while ((line = flrdr.readLine()) != null) {
        SortedSet<Integer> word_ids = new TreeSet<Integer>();
        int val_count = 0;

        String[] key_value = line.split(" :: ");

        if (key_value[1].trim().length() < 1)
          System.out.println("Error on Train Doc " + key_value[0]);

        key_value[1] = key_value[1].substring(1, key_value[1].length() - 2);
        String[] values = key_value[1].split(",");

        FeatureNode[] node = new FeatureNode[values.length];

        for (String val : values) word_ids.add(Integer.parseInt(val.trim()));

        for (int val : word_ids) node[val_count++] = new FeatureNode(val, 1);

        double predict = Linear.predict(model, node);
        doc_score.put(key_value[0].trim(), predict);
      }

      flrdr.close();

    } catch (Exception e) {
      e.printStackTrace();
      System.exit(0);
    }
  }