@Test public void testPruningThreshold() { SimpleDataset dataset; String exampleString = "Relevant |BT:tree| (NOTYPE##ROOT(NOTYPE##S(NOTYPE##NP(NOTYPE##NNP(LEX##good::n))(NOTYPE##NNP(LEX##bank::n)))))|ET| "; dataset = getDatasetFromString(exampleString); List<List<Double>> info = new ArrayList<List<Double>>(); ArrayList<Double> weights = new ArrayList<Double>(); weights.add(0.1); weights.add(0.4); info.add(weights); TreeAddAdditionalInfoFromArray weigthLoader = new TreeAddAdditionalInfoFromArray(new TreeNodeSelectorAllLeaves(), info, "weight", "tree"); PruneNodeLowerThanThreshold nodePrunerChecker = new PruneNodeLowerThanThreshold(0.3, "weight", true, 1.0); TreeNodePruner treePruner = new TreeNodePruner(nodePrunerChecker, "tree"); System.out.println("Removing all nodes whose value of the field -weight- is lower than 0.3"); System.out.println(weigthLoader.describe()); System.out.println(treePruner.describe()); dataset.manipulate(weigthLoader); dataset.manipulate(treePruner); String treepairAfter = "Relevant |BT:tree| (NOTYPE##ROOT(NOTYPE##S(NOTYPE##NP(NOTYPE##NNP)(NOTYPE##NNP(LEX##bank::n)))))|ET| "; System.out.println("Pruned sentence:" + getStringFromDataset(dataset)); System.out.println("Expected result:" + treepairAfter); Assert.assertEquals(getStringFromDataset(dataset), treepairAfter); }
public static void pruningMaxSentenceLength( SimpleDataset dataset, int maxNumberOfLeavesPerSubtree, SelectRepresentationFromExample treeSelector) { PruneNodeLeafNumber sentenceLengthPruner = new PruneNodeLeafNumber(maxNumberOfLeavesPerSubtree); PruneNodeIfLeaf internalNodePruner = new PruneNodeIfLeaf(); TreeNodeSelectorAllChildren sentenceRoots = new TreeNodeSelectorAllChildren(); TreeNodePruner sentencePrunerClass = new TreeNodePruner( sentenceLengthPruner, treeSelector, internalNodePruner, sentenceRoots, TreeNodePruner.UNLIMITED_RECURSION); System.out.println(sentencePrunerClass.describe()); dataset.manipulate(sentencePrunerClass); }
@Test public void testMaxNumberOfSentencesPruning() { SimpleDataset dataset; String exampleString = "Relevant |BT:tree| (NOTYPE##ROOT(NOTYPE##S(NOTYPE##NP(NOTYPE##NNP(LEX##good::n))(NOTYPE##NNP(LEX##bank::n))))(NOTYPE##S(NOTYPE##NP(NOTYPE##WDT(LEX##which::w)))(NOTYPE##VP(NOTYPE##VBZ(LEX##be::v)))(NOTYPE##NP(NOTYPE##DT(LEX##a::d))(NOTYPE##JJ(LEX##good::j))(NOTYPE##NN(LEX##bank::n)))(NOTYPE##PP(NOTYPE##IN(LEX##as::i)))(NOTYPE##PP(NOTYPE##IN(LEX##per::i)))(NOTYPE##NP(NOTYPE##PRP$(LEX##your::p))(NOTYPE##NN(LEX##experience::n)))(NOTYPE##PP(NOTYPE##IN(LEX##in::i)))(NOTYPE##NP(NOTYPE##NNP(LEX##doha::n)))))|ET|"; dataset = getDatasetFromString(exampleString); PruneNodeNumberOfChildren childrenPruner = new PruneNodeNumberOfChildren(1); TreeNodePruner treePruner = new TreeNodePruner(childrenPruner, "tree", null, 0); System.out.println("Removing all sentences but the first one"); System.out.println(treePruner.describe()); dataset.manipulate(treePruner); String treepairAfter = "Relevant |BT:tree| (NOTYPE##ROOT(NOTYPE##S(NOTYPE##NP(NOTYPE##NNP(LEX##good::n))(NOTYPE##NNP(LEX##bank::n)))))|ET| "; System.out.println("Pruned sentence:" + getStringFromDataset(dataset)); System.out.println("Expected result:" + treepairAfter); Assert.assertEquals(getStringFromDataset(dataset), treepairAfter); }