예제 #1
0
  public void classifyLemma(
      String lemma, String lemmaPoS, String tag, boolean checkPoS, FoundFormHandler theCallback) {
    if (tag == null) {
      nl.openconvert.log.ConverterLog.defaultLog.println("HEY: tag = null for " + lemma);
    }
    if (!tag.startsWith(lemmaPoS)) {
      return; // doe dit anders!
    }
    Classifier classifier = classifiersPerTag.get(tag);
    if (classifier == null) {
      nl.openconvert.log.ConverterLog.defaultLog.println("Error: no classifier trained for " + tag);
      return;
    }

    Instance testItem = features.makeTestInstance(lemma);

    Distribution outcomes = classifier.distributionForInstance(testItem);
    // nl.openconvert.log.ConverterLog.defaultLog.println(outcomes);
    outcomes.sort();
    double cumulativeP = 0;

    for (int rank = 0; rank < outcomes.size(); rank++) {
      // Problem: the pattern suggested by the classifier need not be applicable to the given lemma
      // TODO: solve this by using a different classifier (or pruning the decision trees)

      String classId = outcomes.get(rank).label;
      double p = outcomes.get(rank).p;
      Rule r = ruleID2Rule.get(classId);
      LemmaLog.addToLog("rule for " + lemma + ":  " + r.toString());
      cumulativeP += p;
      // nl.openconvert.log.ConverterLog.defaultLog.println(r + " " + lemmaPoS + " "  + r.lemmaPoS);
      if (p > cumulativeP / MIN_PROBABILITY && (!checkPoS || r.lemmaPoS.equals(lemmaPoS))) {
        if (theCallback != null) {
          theCallback.foundForm(lemma, tag, lemmaPoS, r, p, rank);
        } else {
          String wf = r.pattern.applyConverse(lemma);
          if (wf != null) {
            System.out.println(
                String.format(
                    "%s\t%s\t%s\t%s\t%f\t[%d]\t%s=%s",
                    wf, lemma, tag, lemmaPoS, p, rank, classId, r.toString()));
          }
        }
      }
    }
  }
예제 #2
0
 public void buildClassifiers() {
   for (String tag : datasetsPerTag.keySet()) {
     // nl.openconvert.log.ConverterLog.defaultLog.println("Build classifier for " + tag);
     Dataset d = datasetsPerTag.get(tag);
     Classifier c = null;
     try {
       c = (Classifier) classifierClass.newInstance();
       c.setType(classifierType);
     } catch (InstantiationException e) {
       // TODO Auto-generated catch block
       e.printStackTrace();
     } catch (IllegalAccessException e) {
       // TODO Auto-generated catch block
       e.printStackTrace();
     }
     c.setType(classifierType);
     classifiersPerTag.put(tag, c);
     c.train(d, MAX_ITEMS_USED);
   }
   tagsSorted = new ArrayList<String>(datasetsPerTag.keySet());
   Collections.sort(tagsSorted);
 }
예제 #3
0
 public void saveToDirectory(String dirName) {
   int k = 1;
   try {
     Properties p = new Properties();
     p.setProperty("directory", dirName);
     p.setProperty("class", classifierType);
     for (String tag : datasetsPerTag.keySet()) {
       // Dataset d = datasetsPerTag.get(tag);
       Classifier c = classifiersPerTag.get(tag);
       String fileName = String.format("%s/M%d.model", dirName, k);
       p.setProperty(String.format("M%d.model", k), tag);
       c.save(fileName);
       k++;
     }
     for (String s : ruleID2Rule.keySet()) {
       p.setProperty(s, ruleID2Rule.get(s).toString());
     }
     p.store(new FileOutputStream(dirName + "/model.properties"), "no comments");
   } catch (Exception e) {
     e.printStackTrace();
   }
 }