public void classifyLemma( String lemma, String lemmaPoS, String tag, boolean checkPoS, FoundFormHandler theCallback) { if (tag == null) { nl.openconvert.log.ConverterLog.defaultLog.println("HEY: tag = null for " + lemma); } if (!tag.startsWith(lemmaPoS)) { return; // doe dit anders! } Classifier classifier = classifiersPerTag.get(tag); if (classifier == null) { nl.openconvert.log.ConverterLog.defaultLog.println("Error: no classifier trained for " + tag); return; } Instance testItem = features.makeTestInstance(lemma); Distribution outcomes = classifier.distributionForInstance(testItem); // nl.openconvert.log.ConverterLog.defaultLog.println(outcomes); outcomes.sort(); double cumulativeP = 0; for (int rank = 0; rank < outcomes.size(); rank++) { // Problem: the pattern suggested by the classifier need not be applicable to the given lemma // TODO: solve this by using a different classifier (or pruning the decision trees) String classId = outcomes.get(rank).label; double p = outcomes.get(rank).p; Rule r = ruleID2Rule.get(classId); LemmaLog.addToLog("rule for " + lemma + ": " + r.toString()); cumulativeP += p; // nl.openconvert.log.ConverterLog.defaultLog.println(r + " " + lemmaPoS + " " + r.lemmaPoS); if (p > cumulativeP / MIN_PROBABILITY && (!checkPoS || r.lemmaPoS.equals(lemmaPoS))) { if (theCallback != null) { theCallback.foundForm(lemma, tag, lemmaPoS, r, p, rank); } else { String wf = r.pattern.applyConverse(lemma); if (wf != null) { System.out.println( String.format( "%s\t%s\t%s\t%s\t%f\t[%d]\t%s=%s", wf, lemma, tag, lemmaPoS, p, rank, classId, r.toString())); } } } } }
public void buildClassifiers() { for (String tag : datasetsPerTag.keySet()) { // nl.openconvert.log.ConverterLog.defaultLog.println("Build classifier for " + tag); Dataset d = datasetsPerTag.get(tag); Classifier c = null; try { c = (Classifier) classifierClass.newInstance(); c.setType(classifierType); } catch (InstantiationException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IllegalAccessException e) { // TODO Auto-generated catch block e.printStackTrace(); } c.setType(classifierType); classifiersPerTag.put(tag, c); c.train(d, MAX_ITEMS_USED); } tagsSorted = new ArrayList<String>(datasetsPerTag.keySet()); Collections.sort(tagsSorted); }
public void saveToDirectory(String dirName) { int k = 1; try { Properties p = new Properties(); p.setProperty("directory", dirName); p.setProperty("class", classifierType); for (String tag : datasetsPerTag.keySet()) { // Dataset d = datasetsPerTag.get(tag); Classifier c = classifiersPerTag.get(tag); String fileName = String.format("%s/M%d.model", dirName, k); p.setProperty(String.format("M%d.model", k), tag); c.save(fileName); k++; } for (String s : ruleID2Rule.keySet()) { p.setProperty(s, ruleID2Rule.get(s).toString()); } p.store(new FileOutputStream(dirName + "/model.properties"), "no comments"); } catch (Exception e) { e.printStackTrace(); } }