public int predict(String text) { LMClassifier<?, ?> model = null; try { model = createModelInputAndTrain(true, INPUT_TWEETS_ORIG_TXT); } catch (ClassNotFoundException | IOException e) { e.printStackTrace(); } ConditionalClassification classification = model.classify(text); String category = classification.bestCategory(); // System.out.println("Tweet: " + text + "\tSentiment: " + category); return Integer.parseInt(category); }
/** * Method is executed after the init() method has finished its execution. <br> * * @throws ExecutionException */ public void execute() throws ExecutionException { // lets start the progress and initialize the progress counter fireProgressChanged(0); // If no document provided to process throw an exception if (document == null) { fireProcessFinished(); throw new GateRuntimeException("No document to process!"); } // langugage ID feature Name if (languageIdFeatureName == null || languageIdFeatureName.trim().length() == 0) languageIdFeatureName = "lang"; /* Default behaviour: classify the text of the whole document and * store the result as a document feature. */ if ((annotationType == null) || (annotationType.length() == 0)) { String docText = document.getContent().toString(); Classification classification = classifier.classify(docText); document.getFeatures().put(languageIdFeatureName, classification.bestCategory()); } /* Optional behaviour: classify the text underlying each annotation * and store each results as an annotation feature. */ else { AnnotationSet annotations = document.getAnnotations(annotationSetName).get(annotationType); for (Annotation annotation : annotations) { String text = Utils.stringFor(document, annotation); Classification classification = classifier.classify(text); annotation.getFeatures().put(languageIdFeatureName, classification.bestCategory()); } } // process finished, acknowledge user about this. fireProcessFinished(); }
public static void main(String[] args) throws ClassNotFoundException, IOException { if (args.length != 3) { System.out.println("No Arguments. No Model Trainer."); return; } TESTING_DIR = new File(args[0]); TESTING_RESULTS_DIR = new File(args[1]); MODELS_DIR = new File(args[2]); CATEGORIES = TESTING_DIR.list(); NGRAM_SIZE = CATEGORIES.length; File[] models = MODELS_DIR.listFiles(); PartsOfSpeech = new String[models.length]; compiledClassifiers = new HashMap<String, LMClassifier>(); for (int i = 0; i < models.length; i++) { LMClassifier classifier = loadModel(models[i].getAbsolutePath()); PartsOfSpeech[i] = models[i].getName().substring(0, models[i].getName().length() - 6); compiledClassifiers.put(PartsOfSpeech[i], classifier); } standardClassifier = compiledClassifiers.get("all"); // testing // ConfusionMatrix confMatrix = new ConfusionMatrix(CATEGORIES); ClassifierEvaluator evaluator = new ClassifierEvaluator(standardClassifier, CATEGORIES); int hits = 0, misses = 0, hits_standard = 0, misses_standard = 0; for (int i = 0; i < CATEGORIES.length; i++) { File classDir = new File(TESTING_DIR, CATEGORIES[i]); String[] testingFiles = classDir.list(); for (int j = 0; j < testingFiles.length; j++) { // String text = Files.readFromFile(new File(classDir,testingFiles[j])); System.out.print("Testing on " + CATEGORIES[i] + "/" + testingFiles[j] + " "); // evaluator.addCase(CATEGORIES[i],text); Document d = new Document(new File(classDir, testingFiles[j])); Set<Document> s = new HashSet<Document>(); s.add(d); POSFinder p = new POSFinder(s); p.process(); JointClassification jc = jointJointClassify(d); String bestCategory = jc.bestCategory(); JointClassification jc_standard = standardClassifier.classify(d.mText_str); // evaluator.addClassification(CATEGORIES[i], new // com.aliasi.classify.Classification(bestCategory)); evaluator.addCase(CATEGORIES[i], d.mText_str); System.out.println("Got best category of: " + bestCategory); System.out.println("(standard) Got best category of: " + jc_standard.bestCategory()); System.out.println("---------------"); File bestCategoryDirectory = new File(TESTING_RESULTS_DIR, bestCategory); File bestCategoryDirectory_inTest = new File(TESTING_DIR, bestCategory); File bestCategoryDirectory_inTest_standard = new File(TESTING_DIR, jc_standard.bestCategory()); if (!bestCategoryDirectory.exists()) bestCategoryDirectory.mkdir(); boolean hit = false; for (String ss : bestCategoryDirectory_inTest.list()) if (ss.equals(d.mFile.getName())) hit = true; if (hit) { hits++; System.err.println( "-------------HIT! P()=" + (int) (jc.conditionalProbability(0) * 100) + "% (" + hits + " hits)------------"); } else { misses++; System.err.println( "-------------MISS! P()=" + (int) (jc.conditionalProbability(0) * 100) + "% (" + misses + " misses)------------"); } hit = false; for (String ss : bestCategoryDirectory_inTest_standard.list()) if (ss.equals(d.mFile.getName())) hit = true; if (hit) { hits_standard++; System.err.println( "-----(standard)---HIT! P()=" + (int) (jc_standard.conditionalProbability(0) * 100) + "% (" + hits_standard + " hits)------------"); } else { misses_standard++; System.err.println( "----(standard)---MISS! P()=" + (int) (jc_standard.conditionalProbability(0) * 100) + "% (" + misses_standard + " misses)------------"); } if (hits_standard < hits) System.err.println( "Akshat's Classifier is WINNING <" + ((double) hits / (double) (hits + misses)) + "> to <" + (double) ((double) hits_standard / ((double) hits_standard + (double) misses_standard)) + ">"); else System.err.println( "Akshat's Classifier is LOSING <" + ((double) hits / (double) (hits + misses)) + "> to <" + (double) ((double) hits_standard / ((double) hits_standard + (double) misses_standard)) + ">"); // DocTreeWidget.copyFile(d.mFile, new File(bestCategoryDirectory,d.mFile.getName())); // //this is just a file copy function } } ConfusionMatrix confMatrix = evaluator.confusionMatrix(); String myresults = "Hits: " + hits + ", Misses: " + misses + ", Total Accuracy: " + ((double) hits / (double) (hits + misses)); myresults += "\n(standard) Hits :" + hits_standard + ", Misses: " + misses_standard + " Total accuracy = " + (double) ((double) hits_standard / ((double) hits_standard + (double) misses_standard)); System.out.println(myresults); com.aliasi.util.Files.writeStringToFile( myresults, new File(TESTING_RESULTS_DIR, "results.txt")); }