/* (non-Javadoc) * @see com.joliciel.talismane.maxent.MaxentObserver#onAnalyse(java.util.List, java.util.Collection) */ @Override public void onAnalyse( Object event, List<FeatureResult<?>> featureResults, Collection<Decision<T>> decisions) { try { Map<String, Double> outcomeTotals = new TreeMap<String, Double>(); for (String outcome : modelParams.getOutcomes()) outcomeTotals.put(outcome, 0.0); writer.append("####### Event: " + event.toString() + "\n"); writer.append("### Feature results:\n"); for (FeatureResult<?> featureResult : featureResults) { if (featureResult.getOutcome() instanceof List) { @SuppressWarnings("unchecked") FeatureResult<List<WeightedOutcome<String>>> stringCollectionResult = (FeatureResult<List<WeightedOutcome<String>>>) featureResult; for (WeightedOutcome<String> stringOutcome : stringCollectionResult.getOutcome()) { String featureName = featureResult.getTrainingName() + "|" + featureResult.getTrainingOutcome(stringOutcome.getOutcome()); String featureOutcome = stringOutcome.getOutcome(); double value = stringOutcome.getWeight(); this.writeFeatureResult(featureName, featureOutcome, value, outcomeTotals); } } else { double value = 1.0; if (featureResult.getFeature() instanceof DoubleFeature) { value = (Double) featureResult.getOutcome(); } this.writeFeatureResult( featureResult.getTrainingName(), featureResult.getOutcome().toString(), value, outcomeTotals); } } List<Integer> featureIndexList = new ArrayList<Integer>(); List<Double> featureValueList = new ArrayList<Double>(); modelParams.prepareData(featureResults, featureIndexList, featureValueList); double[] results = decisionMaker.predict(featureIndexList, featureValueList); writer.append("### Outcome totals:\n"); writer.append( String.format("%1$-30s", "outcome") + String.format("%1$#15s", "total") + String.format("%1$#15s", "normalised") + "\n"); int j = 0; for (String outcome : modelParams.getOutcomes()) { double total = outcomeTotals.get(outcome); double normalised = results[j++]; writer.append( String.format("%1$-30s", outcome) + String.format("%1$#15s", decFormat.format(total)) + String.format("%1$#15s", decFormat.format(normalised)) + "\n"); } writer.append("\n"); Map<String, Double> outcomeWeights = new TreeMap<String, Double>(); for (Decision<T> decision : decisions) { outcomeWeights.put(decision.getCode(), decision.getProbability()); } writer.append("### Outcome list:\n"); Set<WeightedOutcome<String>> weightedOutcomes = new TreeSet<WeightedOutcome<String>>(); for (String outcome : modelParams.getOutcomes()) { Double weightObj = outcomeWeights.get(outcome); double weight = (weightObj == null ? 0.0 : weightObj.doubleValue()); WeightedOutcome<String> weightedOutcome = new WeightedOutcome<String>(outcome, weight); weightedOutcomes.add(weightedOutcome); } for (WeightedOutcome<String> weightedOutcome : weightedOutcomes) { writer.append( String.format("%1$-30s", weightedOutcome.getOutcome()) + String.format("%1$#15s", decFormat.format(weightedOutcome.getWeight())) + "\n"); } writer.append("\n"); writer.flush(); } catch (IOException ioe) { throw new RuntimeException(ioe); } }
@Test public void testGetTokenSequence(@NonStrict final Sentence sentence) { new NonStrictExpectations() { { sentence.getText(); returns("Je n'ai pas encore l'ourang-outan."); } }; TokeniserOutcome[] tokeniserOutcomeArray = new TokeniserOutcome[] { TokeniserOutcome.SEPARATE, // Je TokeniserOutcome.SEPARATE, // _ TokeniserOutcome.SEPARATE, // n TokeniserOutcome.JOIN, // ' TokeniserOutcome.SEPARATE, // ai TokeniserOutcome.SEPARATE, // _ TokeniserOutcome.SEPARATE, // pas TokeniserOutcome.JOIN, // _ TokeniserOutcome.JOIN, // encore TokeniserOutcome.SEPARATE, // _ TokeniserOutcome.SEPARATE, // l TokeniserOutcome.JOIN, // ' TokeniserOutcome.SEPARATE, // ourang TokeniserOutcome.JOIN, // - TokeniserOutcome.JOIN, // outan TokeniserOutcome.SEPARATE // . }; final TalismaneSession talismaneSession = TalismaneSession.getInstance(""); TokenisedAtomicTokenSequence atomicTokenSequence = new TokenisedAtomicTokenSequence(sentence, talismaneSession); TokenSequence tokenSequence = new TokenSequence(sentence, Tokeniser.SEPARATORS, talismaneSession); int i = 0; for (Token token : tokenSequence.listWithWhiteSpace()) { Decision decision = new Decision(tokeniserOutcomeArray[i++].name()); TaggedToken<TokeniserOutcome> taggedToken = new TaggedToken<>(token, decision, TokeniserOutcome.valueOf(decision.getOutcome())); atomicTokenSequence.add(taggedToken); } TokenSequence newTokenSequence = atomicTokenSequence.inferTokenSequence(); LOG.debug(newTokenSequence.toString()); i = 0; for (Token token : newTokenSequence) { if (i == 0) { assertEquals("Je", token.getText()); } else if (i == 1) { assertEquals("n'", token.getText()); } else if (i == 2) { assertEquals("ai", token.getText()); } else if (i == 3) { assertEquals("pas encore", token.getText()); } else if (i == 4) { assertEquals("l'", token.getText()); } else if (i == 5) { assertEquals("ourang-outan", token.getText()); } else if (i == 6) { assertEquals(".", token.getText()); } i++; } assertEquals(7, newTokenSequence.size()); }