public static void MLalgo() { try { Problem problem = new Problem(); problem.l = train_count; // number of training examples problem.n = max_feature_count; // number of features problem.x = train_matrix; // feature nodes problem.y = ylable; // target values; SolverType solver = SolverType.L2R_LR; // -s 0 double C = 1.0; // cost of constraints violation double eps = 0.01; // stopping criteria Parameter parameter = new Parameter(solver, C, eps); model = Linear.train(problem, parameter); File modelFile = new File("model"); model.save(modelFile); // load model or use it directly model = Model.load(modelFile); } catch (Exception e) { e.printStackTrace(); } }
private void mainClassifierFunction(int option, String trainFile, String testFile, String ddgFile) throws IOException { // SentimentClassifierHindi this = new SentimentClassifierHindi(); // int finalSize = this.SentimentClassifierHindi(); int finalSize = this.generateFeature(option, trainFile, testFile, ddgFile); System.out.println("Hello aspectCategorizationSemEval2016!"); // Create features Problem problem = new Problem(); // Save X to problem double a[] = new double[this.trainingFeature.size()]; File file = new File(rootDirectory + "\\dataset\\trainingLabels.txt"); BufferedReader reader = new BufferedReader(new FileReader(file)); String read; int count = 0; while ((read = reader.readLine()) != null) { // System.out.println(read); a[count++] = Double.parseDouble(read.toString()); } // Feature[][] f = new Feature[][]{ {}, {}, {}, {}, {}, {} }; // trainingFeature = trainingObject.getList(); Feature[][] trainFeatureVector = new Feature[trainingFeature.size()][finalSize]; System.out.println("Training Instances: " + trainingFeature.size()); System.out.println("Feature Length: " + finalSize); System.out.println("Test Instances: " + testFeature.size()); for (int i = 0; i < trainingFeature.size(); i++) { // System.out.println(); // System.out.println(trainingFeature.get(i)); System.out.println(i + " trained."); for (int j = 0; j < finalSize; j++) { // System.out.print(trainingFeature.get(i).get(j + 1)+" "); // trainingFeature.get(i). if (trainingFeature.get(i).containsKey(j + 1)) { // System.out.print(j + 1 + ", "); trainFeatureVector[i][j] = new FeatureNode(j + 1, trainingFeature.get(i).get(j + 1)); } else { trainFeatureVector[i][j] = new FeatureNode(j + 1, 0.0); } } // System.out.println(); } problem.l = trainingFeature.size(); // number of training examples problem.n = finalSize; // number of features problem.x = trainFeatureVector; // feature nodes problem.y = a; // target values ---- BasicParser bp = new BasicParser(); SolverType solver = SolverType.L2R_LR; // -s 7 double C = 0.75; // cost of constraints violation double eps = 0.0001; // stopping criteria Parameter parameter = new Parameter(solver, C, eps); Model model = Linear.train(problem, parameter); File modelFile = new File("model"); model.save(modelFile); // PrintWriter write = new PrintWriter(new BufferedWriter(new FileWriter(rootDirectory + // "\\dataset\\predictedLabels.txt"))); PrintWriter write = new PrintWriter( new BufferedWriter( new FileWriter( rootDirectory + "\\dataset\\dataset_aspectCategorization\\predictedHotelsLabels.txt"))); if (option == 1) { BufferedReader trainReader = new BufferedReader( new FileReader( new File( rootDirectory + "\\dataset\\dataset_aspectCategorization\\" + trainFile))); HashMap<String, Integer> id = new HashMap<String, Integer>(); HashMap<String, String> review = new HashMap<String, String>(); double[] val = new double[trainingFeature.size()]; double[] tempVal = new double[trainingFeature.size()]; LinearCopy.crossValidation(problem, parameter, 5, val, tempVal); for (int i = 0; i < trainingFeature.size(); i++) { int flag = 0; String tokens[] = trainReader.readLine().split("\\|"); if (id.containsKey(tokens[1]) == true || tokens[2].compareToIgnoreCase("True") == 0) { } else { // System.out.println(tokens[1]); /*int max = -1; double probMax = -1.0; for(int j=0; j<13; j++){ if(probMax<val[i][j]){ probMax = val[i][j]; max = j; } }*/ // System.out.println(tempVal[i]); write.println((int) (val[i])); write.println("next"); id.put(tokens[1], 1); System.out.println(tokens[1] + "\t" + (int) (val[i])); if (review.containsKey(tokens[1])) { System.out.println(tokens[3]); System.out.println(review.get(tokens[1])); } else { review.put(tokens[1], tokens[3]); } } /*else{ for (int j = 0; j < 13; j++) { //System.out.print(val[i][j]+", "); if (val[i] >= 0.185) { flag = 1; //System.out.println("i"); write.println(j + 1); } } if (flag == 1) { write.println("next"); } else { write.println("-1"); write.println("next"); } //write.println(prediction); id.put(tokens[1], 1); //System.out.println(); }*/ } write.close(); return; } if (option == 3) { System.out.println(rootDirectory); BufferedReader testReader = new BufferedReader( new FileReader( new File( rootDirectory + "\\dataset\\dataset_aspectCategorization\\" + testFile))); HashMap<String, Integer> id = new HashMap<String, Integer>(); model = Model.load(modelFile); int countNext = 0; for (int i = 0; i < testFeature.size(); i++) { // System.out.println(i+", "+testFeature.size()+", "+testFeature.get(i).size()); Feature[] instance = new Feature[testFeature.get(i).size()]; int j = 0; for (Map.Entry<Integer, Double> entry : testFeature.get(i).entrySet()) { // System.out.print(entry.getKey() + ": " + entry.getValue() + "; "); // listOfMaps.get(i).put(start + entry.getKey(), entry.getValue()); // do stuff instance[j++] = new FeatureNode(entry.getKey(), entry.getValue()); } // double d = LinearCopy.predict(model, instance); double[] predict = new double[85]; double prediction = LinearCopy.predictProbability(model, instance, predict); int labelMap[] = new int[13]; labelMap = model.getLabels(); for (int ar = 0; ar < labelMap.length; ar++) { System.out.println("********************** " + ar + ": " + labelMap[ar]); } // System.out.println(prediction); // Arrays.sort(predict, Collections.reverseOrder()); // System.out.println(); // double prediction = LinearCopy.predict(model, instance); String tokens[] = testReader.readLine().split("\\|"); // System.out.println(tokens[1]); int flag = -1; if (id.containsKey(tokens[1]) == true || tokens[2].compareToIgnoreCase("True") == 0) { flag = 4; // System.out.println("OutofScope: "+tokens[1]); } else if (tokens[3].compareToIgnoreCase("abc") == 0) { flag = 2; System.out.println(tokens[1]); write.println("-1"); write.println("next"); countNext++; id.put(tokens[1], 1); } else { flag = 0; for (int p = 0; p < 85; p++) { if (predict[p] >= 0.128) { flag = 1; write.println(labelMap[p]); } } if (flag == 1) { countNext++; write.println("next"); } else { countNext++; write.println("-1"); write.println("next"); } // write.println((int)d); // write.println("next"); /*write.println(prediction); write.println("next");*/ id.put(tokens[1], 1); } if (flag == -1) { System.out.println("-1, " + tokens[1]); } } write.close(); System.out.println("count " + countNext); } write.close(); }