public static void RetrieveTrainMatrix() { try { BufferedReader flrdr = new BufferedReader( new FileReader("D:\\IR\\Assignment7\\Pytest\\part2\\output\\train_matrix.txt")); String line = ""; int doc_count = 0; while ((line = flrdr.readLine()) != null) { SortedSet<Integer> word_ids = new TreeSet<Integer>(); int val_count = 0; String[] key_value = line.split(" :: "); key_value[1] = key_value[1].substring(1, key_value[1].length() - 2); String[] values = key_value[1].split(","); FeatureNode[] node = new FeatureNode[values.length]; for (String val : values) word_ids.add(Integer.parseInt(val.trim())); for (int val : word_ids) node[val_count++] = new FeatureNode(val, 1); if (spam_docs.contains(key_value[0].trim())) ylable[doc_count] = 1; else ylable[doc_count] = 0; train_matrix[doc_count++] = node; } flrdr.close(); } catch (Exception e) { e.printStackTrace(); System.exit(0); } }
public static void WriteToFile() { doc_score = sortByComparator(doc_score, false); try { FileWriter wrtr = new FileWriter("D:/IR/Assignment7/Pytest/part2/output/prediction.txt"); for (String key : doc_score.keySet()) { wrtr.write(key + " : " + doc_score.get(key) + " \n"); } wrtr.close(); } catch (Exception e) { e.printStackTrace(); } }
public static void RetrieveSpamFiles() { try { BufferedReader flrdr = new BufferedReader( new FileReader("D:\\IR\\Assignment7\\Pytest\\part2\\output\\spam_docs.txt")); String line = flrdr.readLine(); line = line.substring(1, line.length() - 1); for (String doc : line.split(",")) { spam_docs.add(doc.substring(2, doc.length() - 1).trim()); } flrdr.close(); } catch (Exception e) { e.printStackTrace(); } }
public static void MLalgo() { try { Problem problem = new Problem(); problem.l = train_count; // number of training examples problem.n = max_feature_count; // number of features problem.x = train_matrix; // feature nodes problem.y = ylable; // target values; SolverType solver = SolverType.L2R_LR; // -s 0 double C = 1.0; // cost of constraints violation double eps = 0.01; // stopping criteria Parameter parameter = new Parameter(solver, C, eps); model = Linear.train(problem, parameter); File modelFile = new File("model"); model.save(modelFile); // load model or use it directly model = Model.load(modelFile); } catch (Exception e) { e.printStackTrace(); } }
public static void RetrieveTestMatrix() { try { BufferedReader flrdr = new BufferedReader( new FileReader("D:\\IR\\Assignment7\\Pytest\\part2\\output\\test_matrix.txt")); String line = ""; while ((line = flrdr.readLine()) != null) { SortedSet<Integer> word_ids = new TreeSet<Integer>(); int val_count = 0; String[] key_value = line.split(" :: "); if (key_value[1].trim().length() < 1) System.out.println("Error on Train Doc " + key_value[0]); key_value[1] = key_value[1].substring(1, key_value[1].length() - 2); String[] values = key_value[1].split(","); FeatureNode[] node = new FeatureNode[values.length]; for (String val : values) word_ids.add(Integer.parseInt(val.trim())); for (int val : word_ids) node[val_count++] = new FeatureNode(val, 1); double predict = Linear.predict(model, node); doc_score.put(key_value[0].trim(), predict); } flrdr.close(); } catch (Exception e) { e.printStackTrace(); System.exit(0); } }