예제 #1
0
 public int predict(String text) {
   LMClassifier<?, ?> model = null;
   try {
     model = createModelInputAndTrain(true, INPUT_TWEETS_ORIG_TXT);
   } catch (ClassNotFoundException | IOException e) {
     e.printStackTrace();
   }
   ConditionalClassification classification = model.classify(text);
   String category = classification.bestCategory();
   // System.out.println("Tweet: " + text + "\tSentiment: " + category);
   return Integer.parseInt(category);
 }
예제 #2
0
  /**
   * Method is executed after the init() method has finished its execution. <br>
   *
   * @throws ExecutionException
   */
  public void execute() throws ExecutionException {
    // lets start the progress and initialize the progress counter
    fireProgressChanged(0);

    // If no document provided to process throw an exception
    if (document == null) {
      fireProcessFinished();
      throw new GateRuntimeException("No document to process!");
    }

    // langugage ID feature Name
    if (languageIdFeatureName == null || languageIdFeatureName.trim().length() == 0)
      languageIdFeatureName = "lang";

    /* Default behaviour: classify the text of the whole document and
     * store the result as a document feature.     */
    if ((annotationType == null) || (annotationType.length() == 0)) {
      String docText = document.getContent().toString();
      Classification classification = classifier.classify(docText);
      document.getFeatures().put(languageIdFeatureName, classification.bestCategory());
    }

    /* Optional behaviour: classify the text underlying each annotation
     * and store each results as an annotation feature.     */
    else {
      AnnotationSet annotations = document.getAnnotations(annotationSetName).get(annotationType);

      for (Annotation annotation : annotations) {
        String text = Utils.stringFor(document, annotation);
        Classification classification = classifier.classify(text);
        annotation.getFeatures().put(languageIdFeatureName, classification.bestCategory());
      }
    }

    // process finished, acknowledge user about this.
    fireProcessFinished();
  }
  public static void main(String[] args) throws ClassNotFoundException, IOException {

    if (args.length != 3) {
      System.out.println("No Arguments. No Model Trainer.");
      return;
    }
    TESTING_DIR = new File(args[0]);
    TESTING_RESULTS_DIR = new File(args[1]);
    MODELS_DIR = new File(args[2]);
    CATEGORIES = TESTING_DIR.list();
    NGRAM_SIZE = CATEGORIES.length;

    File[] models = MODELS_DIR.listFiles();
    PartsOfSpeech = new String[models.length];
    compiledClassifiers = new HashMap<String, LMClassifier>();

    for (int i = 0; i < models.length; i++) {

      LMClassifier classifier = loadModel(models[i].getAbsolutePath());
      PartsOfSpeech[i] = models[i].getName().substring(0, models[i].getName().length() - 6);
      compiledClassifiers.put(PartsOfSpeech[i], classifier);
    }
    standardClassifier = compiledClassifiers.get("all");

    // testing
    //		ConfusionMatrix confMatrix = new ConfusionMatrix(CATEGORIES);
    ClassifierEvaluator evaluator = new ClassifierEvaluator(standardClassifier, CATEGORIES);
    int hits = 0, misses = 0, hits_standard = 0, misses_standard = 0;
    for (int i = 0; i < CATEGORIES.length; i++) {
      File classDir = new File(TESTING_DIR, CATEGORIES[i]);
      String[] testingFiles = classDir.list();
      for (int j = 0; j < testingFiles.length; j++) {
        // String text = Files.readFromFile(new File(classDir,testingFiles[j]));
        System.out.print("Testing on " + CATEGORIES[i] + "/" + testingFiles[j] + " ");
        // evaluator.addCase(CATEGORIES[i],text);
        Document d = new Document(new File(classDir, testingFiles[j]));
        Set<Document> s = new HashSet<Document>();
        s.add(d);
        POSFinder p = new POSFinder(s);
        p.process();
        JointClassification jc = jointJointClassify(d);
        String bestCategory = jc.bestCategory();
        JointClassification jc_standard = standardClassifier.classify(d.mText_str);
        // evaluator.addClassification(CATEGORIES[i], new
        // com.aliasi.classify.Classification(bestCategory));
        evaluator.addCase(CATEGORIES[i], d.mText_str);

        System.out.println("Got best category of: " + bestCategory);
        System.out.println("(standard) Got best category of: " + jc_standard.bestCategory());

        System.out.println("---------------");
        File bestCategoryDirectory = new File(TESTING_RESULTS_DIR, bestCategory);
        File bestCategoryDirectory_inTest = new File(TESTING_DIR, bestCategory);
        File bestCategoryDirectory_inTest_standard =
            new File(TESTING_DIR, jc_standard.bestCategory());
        if (!bestCategoryDirectory.exists()) bestCategoryDirectory.mkdir();

        boolean hit = false;
        for (String ss : bestCategoryDirectory_inTest.list())
          if (ss.equals(d.mFile.getName())) hit = true;
        if (hit) {
          hits++;
          System.err.println(
              "-------------HIT! P()="
                  + (int) (jc.conditionalProbability(0) * 100)
                  + "% ("
                  + hits
                  + " hits)------------");
        } else {
          misses++;
          System.err.println(
              "-------------MISS! P()="
                  + (int) (jc.conditionalProbability(0) * 100)
                  + "% ("
                  + misses
                  + " misses)------------");
        }

        hit = false;
        for (String ss : bestCategoryDirectory_inTest_standard.list())
          if (ss.equals(d.mFile.getName())) hit = true;
        if (hit) {
          hits_standard++;
          System.err.println(
              "-----(standard)---HIT! P()="
                  + (int) (jc_standard.conditionalProbability(0) * 100)
                  + "% ("
                  + hits_standard
                  + " hits)------------");
        } else {
          misses_standard++;
          System.err.println(
              "----(standard)---MISS! P()="
                  + (int) (jc_standard.conditionalProbability(0) * 100)
                  + "% ("
                  + misses_standard
                  + " misses)------------");
        }

        if (hits_standard < hits)
          System.err.println(
              "Akshat's Classifier is WINNING <"
                  + ((double) hits / (double) (hits + misses))
                  + "> to <"
                  + (double)
                      ((double) hits_standard / ((double) hits_standard + (double) misses_standard))
                  + ">");
        else
          System.err.println(
              "Akshat's Classifier is LOSING <"
                  + ((double) hits / (double) (hits + misses))
                  + "> to <"
                  + (double)
                      ((double) hits_standard / ((double) hits_standard + (double) misses_standard))
                  + ">");

        // DocTreeWidget.copyFile(d.mFile, new File(bestCategoryDirectory,d.mFile.getName()));
        // //this is just a file copy function
      }
    }
    ConfusionMatrix confMatrix = evaluator.confusionMatrix();

    String myresults =
        "Hits: "
            + hits
            + ", Misses: "
            + misses
            + ", Total Accuracy: "
            + ((double) hits / (double) (hits + misses));
    myresults +=
        "\n(standard) Hits :"
            + hits_standard
            + ", Misses: "
            + misses_standard
            + " Total accuracy = "
            + (double)
                ((double) hits_standard / ((double) hits_standard + (double) misses_standard));

    System.out.println(myresults);
    com.aliasi.util.Files.writeStringToFile(
        myresults, new File(TESTING_RESULTS_DIR, "results.txt"));
  }