@Override public void initialize(Parameters params) throws ResourceInitializationException { super.initialize(params); try { model = Model.load(((ResourceReader) params.get(Constants.MODEL)).getReader()); } catch (IOException e) { throw new ResourceInitializationException("Failed to load SVM model.", e); } labelIndeces = new int[labels.size()]; labelIndeces = model.getLabels(); representer = (TextRepresenter) params.get(Constants.REPRESENTER); }
public static void MLalgo() { try { Problem problem = new Problem(); problem.l = train_count; // number of training examples problem.n = max_feature_count; // number of features problem.x = train_matrix; // feature nodes problem.y = ylable; // target values; SolverType solver = SolverType.L2R_LR; // -s 0 double C = 1.0; // cost of constraints violation double eps = 0.01; // stopping criteria Parameter parameter = new Parameter(solver, C, eps); model = Linear.train(problem, parameter); File modelFile = new File("model"); model.save(modelFile); // load model or use it directly model = Model.load(modelFile); } catch (Exception e) { e.printStackTrace(); } }
private void mainClassifierFunction(int option, String trainFile, String testFile, String ddgFile) throws IOException { // SentimentClassifierHindi this = new SentimentClassifierHindi(); // int finalSize = this.SentimentClassifierHindi(); int finalSize = this.generateFeature(option, trainFile, testFile, ddgFile); System.out.println("Hello aspectCategorizationSemEval2016!"); // Create features Problem problem = new Problem(); // Save X to problem double a[] = new double[this.trainingFeature.size()]; File file = new File(rootDirectory + "\\dataset\\trainingLabels.txt"); BufferedReader reader = new BufferedReader(new FileReader(file)); String read; int count = 0; while ((read = reader.readLine()) != null) { // System.out.println(read); a[count++] = Double.parseDouble(read.toString()); } // Feature[][] f = new Feature[][]{ {}, {}, {}, {}, {}, {} }; // trainingFeature = trainingObject.getList(); Feature[][] trainFeatureVector = new Feature[trainingFeature.size()][finalSize]; System.out.println("Training Instances: " + trainingFeature.size()); System.out.println("Feature Length: " + finalSize); System.out.println("Test Instances: " + testFeature.size()); for (int i = 0; i < trainingFeature.size(); i++) { // System.out.println(); // System.out.println(trainingFeature.get(i)); System.out.println(i + " trained."); for (int j = 0; j < finalSize; j++) { // System.out.print(trainingFeature.get(i).get(j + 1)+" "); // trainingFeature.get(i). if (trainingFeature.get(i).containsKey(j + 1)) { // System.out.print(j + 1 + ", "); trainFeatureVector[i][j] = new FeatureNode(j + 1, trainingFeature.get(i).get(j + 1)); } else { trainFeatureVector[i][j] = new FeatureNode(j + 1, 0.0); } } // System.out.println(); } problem.l = trainingFeature.size(); // number of training examples problem.n = finalSize; // number of features problem.x = trainFeatureVector; // feature nodes problem.y = a; // target values ---- BasicParser bp = new BasicParser(); SolverType solver = SolverType.L2R_LR; // -s 7 double C = 0.75; // cost of constraints violation double eps = 0.0001; // stopping criteria Parameter parameter = new Parameter(solver, C, eps); Model model = Linear.train(problem, parameter); File modelFile = new File("model"); model.save(modelFile); // PrintWriter write = new PrintWriter(new BufferedWriter(new FileWriter(rootDirectory + // "\\dataset\\predictedLabels.txt"))); PrintWriter write = new PrintWriter( new BufferedWriter( new FileWriter( rootDirectory + "\\dataset\\dataset_aspectCategorization\\predictedHotelsLabels.txt"))); if (option == 1) { BufferedReader trainReader = new BufferedReader( new FileReader( new File( rootDirectory + "\\dataset\\dataset_aspectCategorization\\" + trainFile))); HashMap<String, Integer> id = new HashMap<String, Integer>(); HashMap<String, String> review = new HashMap<String, String>(); double[] val = new double[trainingFeature.size()]; double[] tempVal = new double[trainingFeature.size()]; LinearCopy.crossValidation(problem, parameter, 5, val, tempVal); for (int i = 0; i < trainingFeature.size(); i++) { int flag = 0; String tokens[] = trainReader.readLine().split("\\|"); if (id.containsKey(tokens[1]) == true || tokens[2].compareToIgnoreCase("True") == 0) { } else { // System.out.println(tokens[1]); /*int max = -1; double probMax = -1.0; for(int j=0; j<13; j++){ if(probMax<val[i][j]){ probMax = val[i][j]; max = j; } }*/ // System.out.println(tempVal[i]); write.println((int) (val[i])); write.println("next"); id.put(tokens[1], 1); System.out.println(tokens[1] + "\t" + (int) (val[i])); if (review.containsKey(tokens[1])) { System.out.println(tokens[3]); System.out.println(review.get(tokens[1])); } else { review.put(tokens[1], tokens[3]); } } /*else{ for (int j = 0; j < 13; j++) { //System.out.print(val[i][j]+", "); if (val[i] >= 0.185) { flag = 1; //System.out.println("i"); write.println(j + 1); } } if (flag == 1) { write.println("next"); } else { write.println("-1"); write.println("next"); } //write.println(prediction); id.put(tokens[1], 1); //System.out.println(); }*/ } write.close(); return; } if (option == 3) { System.out.println(rootDirectory); BufferedReader testReader = new BufferedReader( new FileReader( new File( rootDirectory + "\\dataset\\dataset_aspectCategorization\\" + testFile))); HashMap<String, Integer> id = new HashMap<String, Integer>(); model = Model.load(modelFile); int countNext = 0; for (int i = 0; i < testFeature.size(); i++) { // System.out.println(i+", "+testFeature.size()+", "+testFeature.get(i).size()); Feature[] instance = new Feature[testFeature.get(i).size()]; int j = 0; for (Map.Entry<Integer, Double> entry : testFeature.get(i).entrySet()) { // System.out.print(entry.getKey() + ": " + entry.getValue() + "; "); // listOfMaps.get(i).put(start + entry.getKey(), entry.getValue()); // do stuff instance[j++] = new FeatureNode(entry.getKey(), entry.getValue()); } // double d = LinearCopy.predict(model, instance); double[] predict = new double[85]; double prediction = LinearCopy.predictProbability(model, instance, predict); int labelMap[] = new int[13]; labelMap = model.getLabels(); for (int ar = 0; ar < labelMap.length; ar++) { System.out.println("********************** " + ar + ": " + labelMap[ar]); } // System.out.println(prediction); // Arrays.sort(predict, Collections.reverseOrder()); // System.out.println(); // double prediction = LinearCopy.predict(model, instance); String tokens[] = testReader.readLine().split("\\|"); // System.out.println(tokens[1]); int flag = -1; if (id.containsKey(tokens[1]) == true || tokens[2].compareToIgnoreCase("True") == 0) { flag = 4; // System.out.println("OutofScope: "+tokens[1]); } else if (tokens[3].compareToIgnoreCase("abc") == 0) { flag = 2; System.out.println(tokens[1]); write.println("-1"); write.println("next"); countNext++; id.put(tokens[1], 1); } else { flag = 0; for (int p = 0; p < 85; p++) { if (predict[p] >= 0.128) { flag = 1; write.println(labelMap[p]); } } if (flag == 1) { countNext++; write.println("next"); } else { countNext++; write.println("-1"); write.println("next"); } // write.println((int)d); // write.println("next"); /*write.println(prediction); write.println("next");*/ id.put(tokens[1], 1); } if (flag == -1) { System.out.println("-1, " + tokens[1]); } } write.close(); System.out.println("count " + countNext); } write.close(); }
public void crossValidateFold(int a, int b, int c, int d, boolean skip, double param) { System.out.println("Starting cross validation"); int testSize = wayList.size() / 5; List<OSMWay> trainList = new ArrayList<>(); for (int g = a * testSize; g < b * testSize; g++) { // 0~~1~~2~~3~~4~~5 if (skip) { if (g == (c) * testSize) { g = (c + 1) * testSize; } } trainList.add(wayList.get(g)); } int wayListSizeWithoutUnclassified = trainList.size(); int u = 0; System.out.println("trainList size: " + wayListSizeWithoutUnclassified); // set classes for each osm instance int sizeToBeAddedToArray = 0; // this will be used to proper init the features array, adding the multiple vectors size int lalala = 0; for (OSMWay way : trainList) { OSMClassification classifyInstances = new OSMClassification(); classifyInstances.calculateClasses( way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs); if (way.getClassIDs().isEmpty()) { wayListSizeWithoutUnclassified -= 1; u++; } else { sizeToBeAddedToArray = sizeToBeAddedToArray + way.getClassIDs().size() - 1; } } double C = param; double eps = 0.001; double[] GROUPS_ARRAY2 = new double[wayListSizeWithoutUnclassified + sizeToBeAddedToArray]; // new double[117558];// FeatureNode[][] trainingSetWithUnknown2 = new FeatureNode[wayListSizeWithoutUnclassified + sizeToBeAddedToArray] [numberOfFeatures]; // working[3812]; int k = 0; for (OSMWay way : trainList) { // adding multiple vectors int id; if (USE_CLASS_FEATURES) { ClassFeatures class_vector = new ClassFeatures(); class_vector.createClassFeatures( way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs); id = 1422; } else { id = 1; } // pass id also: 1422 if using classes, 1 if not GeometryFeatures geometryFeatures = new GeometryFeatures(id); geometryFeatures.createGeometryFeatures(way); id = geometryFeatures.getLastID(); // id after geometry, cases: all geometry features with mean-variance boolean intervals: // id = 1526 if (USE_RELATION_FEATURES) { RelationFeatures relationFeatures = new RelationFeatures(id); relationFeatures.createRelationFeatures(way, relationList); id = relationFeatures.getLastID(); } else { id = geometryFeatures.getLastID(); } // id 1531 TextualFeatures textualFeatures; if (USE_TEXTUAL_FEATURES) { textualFeatures = new TextualFeatures(id, namesList, languageDetector); textualFeatures.createTextualFeatures(way); } List<FeatureNode> featureNodeList = way.getFeatureNodeList(); FeatureNode[] featureNodeArray = new FeatureNode[featureNodeList.size()]; if (!way.getClassIDs().isEmpty()) { int i = 0; for (FeatureNode featureNode : featureNodeList) { featureNodeArray[i] = featureNode; i++; } for (int classID : way.getClassIDs()) { lalala++; trainingSetWithUnknown2[k] = featureNodeArray; GROUPS_ARRAY2[k] = classID; k++; } } } // Linear.enableDebugOutput(); Problem problem = new Problem(); problem.l = wayListSizeWithoutUnclassified + sizeToBeAddedToArray; // wayListSizeWithoutUnclassified;//wayList.size(); problem.n = numberOfFeatures; // (geometry 105 + textual //3797; // number of features //the // largest index of all features //3811;//3812 //1812 with classes problem.x = trainingSetWithUnknown2; // feature nodes problem.y = GROUPS_ARRAY2; // target values // SolverType solver = SolverType.MCSVM_CS; //Cramer and Singer for multiclass classification - // equivalent of SVMlight SolverType solver2 = SolverType.getById(2); // 2 -- L2-regularized L2-loss support vector classification (primal) Parameter parameter = new Parameter(solver2, C, eps); // System.out.println("param set ok"); // System.out.println("number of features: " + vc.getNumOfFeatures()); long start = System.nanoTime(); System.out.println("training..."); PrintStream original = System.out; System.setOut( new PrintStream( new OutputStream() { @Override public void write(int arg0) throws IOException {} })); Model model = Linear.train(problem, parameter); long end = System.nanoTime(); Long elapsedTime = end - start; System.setOut(original); System.out.println( "training process completed in: " + NANOSECONDS.toSeconds(elapsedTime) + " seconds."); // System.out.println("trained"); // decide model path and naming and/or way of deleting/creating 1 or more models. // File inFile = new File(inputFilePath).getParentFile(); File modelFile; if (USE_CLASS_FEATURES) { modelFile = new File(modelDirectory.getAbsolutePath() + "/model_with_classes_c=" + param); } else { modelFile = new File(modelDirectory.getAbsolutePath() + "/model_geometries_textual_c=" + param); } if (modelFile.exists()) { modelFile.delete(); } try { // System.out.println("file created"); model.save(modelFile); System.out.println("model saved at: " + modelFile); } catch (IOException ex) { Logger.getLogger(TrainWorker.class.getName()).log(Level.SEVERE, null, ex); } // end of evaluation training // test set List<OSMWay> testList = new ArrayList<>(); for (int g = c * testSize; g < d * testSize; g++) { testList.add(wayList.get(g)); // liblinear test } System.out.println("testList size: " + testList.size()); int succededInstances = 0; int succededInstances5 = 0; int succededInstances10 = 0; try { model = Model.load(modelFile); } catch (IOException ex) { Logger.getLogger(TrainWorker.class.getName()).log(Level.SEVERE, null, ex); } int modelLabelSize = model.getLabels().length; int[] labels = model.getLabels(); Map<Integer, Integer> mapLabelsToIDs = new HashMap<>(); for (int h = 0; h < model.getLabels().length; h++) { mapLabelsToIDs.put(labels[h], h); // System.out.println(h + " <-> " + labels[h]); } int wayListSizeWithoutUnclassified2 = testList.size(); for (OSMWay way : testList) { OSMClassification classifyInstances = new OSMClassification(); classifyInstances.calculateClasses( way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs); if (way.getClassIDs().isEmpty()) { // System.out.println("found unclassified" + way.getClassIDs() + "class: " // +way.getClassID()); wayListSizeWithoutUnclassified2 -= 1; // u++; } } FeatureNode[] testInstance2; for (OSMWay way : testList) { int id; if (USE_CLASS_FEATURES) { ClassFeatures class_vector = new ClassFeatures(); class_vector.createClassFeatures( way, mappings, mapperWithIDs, indirectClasses, indirectClassesWithIDs); id = 1422; } else { id = 1; } // pass id also: 1422 if using classes, 1 if not GeometryFeatures geometryFeatures = new GeometryFeatures(id); geometryFeatures.createGeometryFeatures(way); id = geometryFeatures.getLastID(); // id after geometry, cases: all geometry features with mean-variance boolean intervals: // id = 1526 // System.out.println("id 1526 -> " + geometryFeatures.getLastID()); if (USE_RELATION_FEATURES) { RelationFeatures relationFeatures = new RelationFeatures(id); relationFeatures.createRelationFeatures(way, relationList); id = relationFeatures.getLastID(); } else { id = geometryFeatures.getLastID(); // System.out.println("geom feat " + id); } // id 1531 // System.out.println("id 1532 -> " + relationFeatures.getLastID()); if (USE_TEXTUAL_FEATURES) { TextualFeatures textualFeatures = new TextualFeatures(id, namesList, languageDetector); textualFeatures.createTextualFeatures(way); // System.out.println("last textual id: " + textualFeatures.getLastID()); // System.out.println("full: " + way.getFeatureNodeList()); } else { } List<FeatureNode> featureNodeList = way.getFeatureNodeList(); FeatureNode[] featureNodeArray = new FeatureNode[featureNodeList.size()]; int i = 0; for (FeatureNode featureNode : featureNodeList) { featureNodeArray[i] = featureNode; i++; } testInstance2 = featureNodeArray; // double prediction = Linear.predict(model, testInstance2); // System.out.println("test prediction: " + prediction); double[] scores = new double[modelLabelSize]; Linear.predictValues(model, testInstance2, scores); // find index of max values in scores array: predicted classes are the elements of these // indexes from array model.getlabels // iter scores and find 10 max values with their indexes first. then ask those indexes from // model.getlabels Map<Double, Integer> scoresValues = new HashMap<>(); for (int h = 0; h < scores.length; h++) { scoresValues.put(scores[h], h); // System.out.println(h + " <-> " + scores[h]); } Arrays.sort(scores); // System.out.println("max value: " + scores[scores.length-1] + " second max: " + // scores[scores.length-2]); // System.out.println("ask this index from labels: " + // scoresValues.get(scores[scores.length-1])); // System.out.println("got from labels: " + // labels[scoresValues.get(scores[scores.length-1])]); // System.out.println("next prediction: " + // labels[scoresValues.get(scores[scores.length-2])]); // System.out.println("way labels: " + way.getClassIDs()); // System.out.println("test prediction: " + prediction); if (way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 1])])) { succededInstances++; } if (way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 1])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 2])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 3])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 4])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 5])])) { succededInstances5++; } if (way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 1])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 2])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 3])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 4])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 5])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 6])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 7])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 8])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 9])]) || way.getClassIDs().contains(labels[scoresValues.get(scores[scores.length - 10])])) { succededInstances10++; } // System.out.println("labels: " + Arrays.toString(model.getLabels())); // System.out.println("label[0]: " + model.getLabels()[0]); } System.out.println( "Succeeded " + succededInstances + " of " + testList.size() + " total (1 class prediction)"); double precision1 = succededInstances / (double) wayListSizeWithoutUnclassified2; score1 = precision1; System.out.println(precision1); System.out.println( "Succeeded " + succededInstances5 + " of " + testList.size() + " total (5 class prediction)"); double precision5 = succededInstances5 / (double) wayListSizeWithoutUnclassified2; score5 = precision5; System.out.println(precision5); System.out.println( "Succeeded " + succededInstances10 + " of " + testList.size() + " total (10 class prediction)"); double precision10 = succededInstances10 / (double) wayListSizeWithoutUnclassified2; score10 = precision10; System.out.println(precision10); }