public Tree transformTree(Tree tree) { Label lab = tree.label(); if (tree.isLeaf()) { Tree leaf = tf.newLeaf(lab); leaf.setScore(tree.score()); return leaf; } String s = lab.value(); s = treebankLanguagePack().basicCategory(s); s = treebankLanguagePack().stripGF(s); int numKids = tree.numChildren(); List<Tree> children = new ArrayList<Tree>(numKids); for (int cNum = 0; cNum < numKids; cNum++) { Tree child = tree.getChild(cNum); Tree newChild = transformTree(child); children.add(newChild); } CategoryWordTag newLabel = new CategoryWordTag(lab); newLabel.setCategory(s); if (lab instanceof HasTag) { String tag = ((HasTag) lab).tag(); tag = treebankLanguagePack().basicCategory(tag); tag = treebankLanguagePack().stripGF(tag); newLabel.setTag(tag); } Tree node = tf.newTreeNode(newLabel, children); node.setScore(tree.score()); return node; }
public Tree transformTree(Tree tree) { Label lab = tree.label(); if (tree.isLeaf()) { Tree leaf = tf.newLeaf(lab); leaf.setScore(tree.score()); return leaf; } String s = lab.value(); s = treebankLanguagePack().basicCategory(s); int numKids = tree.numChildren(); List<Tree> children = new ArrayList<Tree>(numKids); for (int cNum = 0; cNum < numKids; cNum++) { Tree child = tree.getChild(cNum); Tree newChild = transformTree(child); // cdm 2007: for just subcategory stripping, null shouldn't happen // if (newChild != null) { children.add(newChild); // } } // if (children.isEmpty()) { // return null; // } CategoryWordTag newLabel = new CategoryWordTag(lab); newLabel.setCategory(s); if (lab instanceof HasTag) { String tag = ((HasTag) lab).tag(); tag = treebankLanguagePack().basicCategory(tag); newLabel.setTag(tag); } Tree node = tf.newTreeNode(newLabel, children); node.setScore(tree.score()); return node; }
private static void transformCC(Tree t, List<Tree> left, Tree conj, List<Tree> right) { TreeFactory tf = t.treeFactory(); LabelFactory lf = t.label().labelFactory(); Tree leftQP = tf.newTreeNode(lf.newLabel("NP"), left); Tree rightQP = tf.newTreeNode(lf.newLabel("NP"), right); List<Tree> newChildren = new ArrayList<Tree>(); newChildren.add(leftQP); newChildren.add(conj); newChildren.add(rightQP); t.setChildren(newChildren); }
public static <K, V> TreeMap<K, V> treeMap( final TreeFactory factory, final Comparator<K> comparator, final List<Pair<K, V>> sortedList) { if (sortedList.size() == 0) return factory.create(comparator); int middle = sortedList.size() / 2; Pair<K, V> pair = sortedList.get(middle); TreeMap<K, V> left = treeMap(factory, comparator, sortedList.subList(0, middle)); TreeMap<K, V> right = treeMap(factory, comparator, sortedList.subList(middle + 1, sortedList.size())); return factory.create(comparator, pair.first(), pair.second(), left, right); }
public static Tree copyHelper(Tree t, Map<Tree, Tree> newToOld, Map<Tree, Tree> oldToNew) { Tree[] kids = t.children(); Tree[] newKids = new Tree[kids.length]; for (int i = 0, n = kids.length; i < n; i++) { newKids[i] = copyHelper(kids[i], newToOld, oldToNew); } TreeFactory tf = t.treeFactory(); if (kids.length == 0) { Tree newLeaf = tf.newLeaf(t.label()); newToOld.put(newLeaf, t); oldToNew.put(newLeaf, t); return newLeaf; } Tree newNode = tf.newTreeNode(t.label(), Arrays.asList(newKids)); newToOld.put(newNode, t); oldToNew.put(t, newNode); return newNode; }
private static void transformQP(Tree t) { List<Tree> children = t.getChildrenAsList(); TreeFactory tf = t.treeFactory(); LabelFactory lf = t.label().labelFactory(); // create the new XS having the first two children of the QP Tree left = tf.newTreeNode(lf.newLabel("XS"), null); for (int i = 0; i < 2; i++) { left.addChild(children.get(i)); } // remove all the two first children of t before for (int i = 0; i < 2; i++) { t.removeChild(0); } // add XS as the first child t.addChild(0, left); }
public static Tree untransformTree(Tree tree) { TreeFactory tf = tree.treeFactory(); if (tree.isPrePreTerminal()) { if (tree.firstChild().label().value().matches(".*_.")) { StringBuilder word = new StringBuilder(); for (int i = 0; i < tree.children().length; i++) { Tree child = tree.children()[i]; word.append(child.firstChild().label().value()); } Tree newChild = tf.newLeaf(word.toString()); tree.setChildren(Collections.singletonList(newChild)); } } else { for (int i = 0; i < tree.children().length; i++) { Tree child = tree.children()[i]; untransformTree(child); } } return tree; }
public static <K, V, NewV> TreeMap<K, NewV> map( Function1<? super V, ? extends NewV> transformer, final TreeFactory factory, final TreeMap<K, V> treeMap) { return factory.create( treeMap.comparator(), treeMap.key(), call(transformer, treeMap.value()), treeMap.left().map(transformer), treeMap.right().map(transformer)); }
private void initChildren( TreeFactory<T> treeFactory, SortableMemory<Column> cols, int childOffset, int start, int end, int[] selectiveBits, int prevSelBitIndex, int bitsToUse) throws IOException { if (end > start) { final int median = AbstractColumn.partition(cols, start, end, selectiveBits[++prevSelBitIndex]); if (bitsToUse > 1 && end > start + Leaf.MAX_LEAF_SIZE) { initChildren( treeFactory, cols, childOffset, start, median, selectiveBits, prevSelBitIndex, bitsToUse - 1); initChildren( treeFactory, cols, childOffset + (1 << (bitsToUse - 1)), median, end, selectiveBits, prevSelBitIndex, bitsToUse - 1); } else { children[childOffset] = treeFactory.createNode(cols, selectiveBits, prevSelBitIndex, start, median); children[childOffset + 1] = treeFactory.createNode(cols, selectiveBits, prevSelBitIndex, median, end); } } }
public Tree transformTree(Tree tree) { TreeFactory tf = tree.treeFactory(); String tag = tree.label().value(); if (tree.isPreTerminal()) { String word = tree.firstChild().label().value(); List<Tree> newPreterms = new ArrayList<>(); for (int i = 0, size = word.length(); i < size; i++) { String singleCharLabel = new String(new char[] {word.charAt(i)}); Tree newLeaf = tf.newLeaf(singleCharLabel); String suffix; if (useTwoCharTags) { if (word.length() == 1 || i == 0) { suffix = "_S"; } else { suffix = "_M"; } } else { if (word.length() == 1) { suffix = "_S"; } else if (i == 0) { suffix = "_B"; } else if (i == word.length() - 1) { suffix = "_E"; } else { suffix = "_M"; } } newPreterms.add(tf.newTreeNode(tag + suffix, Collections.<Tree>singletonList(newLeaf))); } return tf.newTreeNode(tag, newPreterms); } else { List<Tree> newChildren = new ArrayList<>(); for (int i = 0; i < tree.children().length; i++) { Tree child = tree.children()[i]; newChildren.add(transformTree(child)); } return tf.newTreeNode(tag, newChildren); } }
/** {@inheritDoc} */ public Node mutate(Random rng, Probability mutationProbability, TreeFactory treeFactory) { if (mutationProbability.nextEvent(rng)) { return treeFactory.generateRandomCandidate(rng); } else { Node newLeft = left.mutate(rng, mutationProbability, treeFactory); Node newRight = right.mutate(rng, mutationProbability, treeFactory); if (newLeft != left && newRight != right) { return newInstance(newLeft, newRight); } else { // Tree has not changed. return this; } } }
public static void main(String[] args) { Options options = new Options(); options.addOption("l", true, "Language to generate"); options.addOption("o", true, "Output root folder for kernel files"); options.addOption("a", true, "Output root folder for api files"); options.addOption("d", true, "Template dir to use (use either this or 't')"); options.addOption("t", true, "Template file to use (use either this or 'd')"); options.addOption( "g", true, "The type of grammar to generate, current options are 'SDK' or 'API'"); options.addOption("mainApiFile", true, "FileName specifying the api"); options.addOption( "codeSamplesJava", true, "A path to search for files that have Java code samples"); options.addOption( "codeSamplesPython", true, "A path to search for files that have Python code samples"); CommandLineParser cparser = new PosixParser(); try { CommandLine cmd = cparser.parse(options, args); String mainApiFile = cmd.getOptionValue("mainApiFile"); String outputKernelFolder = cmd.getOptionValue('o'); String outputApiFolder = cmd.getOptionValue('a'); String codeSamplesJava = cmd.getOptionValue("codeSamplesJava"); String codeSamplesPython = cmd.getOptionValue("codeSamplesPython"); GenType genType = GenType.valueOf(cmd.getOptionValue('g')); // The language will ultimately choose the walker class String language = cmd.getOptionValue('l'); if (cmd.hasOption('d') && cmd.hasOption('t')) { throw new IllegalArgumentException( "Cannot define both a template folder ('d') and file ('t'). Please use one OR the other."); } // And off we go TLexer lexer = new TLexer(); ResourceBasedApiReader apiReader = new ResourceBasedApiReader(); lexer.setApiReader(apiReader); lexer.setCharStream(apiReader.read(mainApiFile)); // Using the lexer as the token source, we create a token // stream to be consumed by the parser // CommonTokenStream tokens = new CommonTokenStream(lexer); // Now we need an instance of our parser // TParser parser = new TParser(tokens); hmxdef_return psrReturn = parser.hmxdef(); // load in T.stg template group, put in templates variable StringTemplateGroup templates = null; if (!isSlateMd(language)) { templates = TemplateRepo.getTemplates(language, genType); } Tree t = psrReturn.getTree(); CommonTreeNodeStream ns = new CommonTreeNodeStream(t); ns.setTokenStream(tokens); if (templates != null) { templates.registerRenderer(String.class, new UpCaseRenderer()); } AbstractTTree walker = TreeFactory.createTreeWalker(ns, templates, language); System.out.println("Generating files with a " + walker.getClass().getName()); if (walker instanceof TTree) { if (genType.equals(GenType.API)) { ((TTree) walker).apiGen(); } else { ((TTree) walker).sdkGen(); } } else if (walker instanceof TTreeRuby) { System.out.println("Running for Ruby"); /* TTreeRuby.hmxdef_return out = */ ((TTreeRuby) walker).hmxdef(); } else if (walker instanceof TTreeJS) { System.out.println("Running for JavaScript"); /* TTreeJS.hmxdef_return out = */ ((TTreeJS) walker).hmxdef(); } else if (walker instanceof TTreeDoc) { System.out.println("Running for Documentation"); /* TTreeDoc.hmxdef_return out = */ ((TTreeDoc) walker).hmxdef(); } else if (walker instanceof TTreeVB) { System.out.println("Running for VB"); /* TTreeVB.hmxdef_return out = */ ((TTreeVB) walker).hmxdef(); } else if (walker instanceof TTreeGo) { System.out.println("Running for Go"); /* TTreeGo.hmxdef_return out = */ ((TTreeGo) walker).hmxdef(); } else if (walker instanceof TTreePython) { System.out.println("Running for Python"); /* TTreePython.hmxdef_return out = */ ((TTreePython) walker).hmxdef(); } else if (walker instanceof TTreeSlateMd) { System.out.println("Running for Slate Markdown"); TTreeSlateMd slateMdWalker = (TTreeSlateMd) walker; slateMdWalker.setupCodeParser(codeSamplesJava, codeSamplesPython); slateMdWalker.hmxdef(); } else if (walker instanceof TTreeDotNet) { System.out.println("Running for DotNet"); ((TTreeDotNet) walker).apiGen(); } else if (walker instanceof TTreeCurtisDoc) { System.out.println("Running for CurtisDoc"); ((TTreeCurtisDoc) walker).apiGen(); } // Now dump the files out System.out.println("Dumping files to " + outputKernelFolder + " and " + outputApiFolder); walker.dumpFiles(outputKernelFolder, outputApiFolder); } catch (ParseException e) { System.err.println("Error parsing command line - " + e.getMessage()); System.out.println("Usage: " + options.toString()); } catch (IOException | RecognitionException e) { System.err.println("Error running GenApi: " + ExceptionToString.format(e)); } }
/** * Normalize a whole tree -- one can assume that this is the root. This implementation deletes * empty elements (ones with nonterminal tag label '-NONE-') from the tree. */ @Override public Tree normalizeWholeTree(Tree tree, TreeFactory tf) { TreeTransformer transformer1 = new TreeTransformer() { @Override public Tree transformTree(Tree t) { if (doSGappedStuff) { String lab = t.label().value(); if (lab.equals("S") && includesEmptyNPSubj(t)) { LabelFactory lf = t.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! t.setLabel(lf.newLabel(t.label().value() + "-G")); } } return t; } }; Filter<Tree> subtreeFilter = new Filter<Tree>() { private static final long serialVersionUID = -7250433816896327901L; @Override public boolean accept(Tree t) { Tree[] kids = t.children(); Label l = t.label(); // The special Switchboard non-terminals clause. // Note that it deletes IP which other Treebanks might use! if ("RS".equals(t.label().value()) || "RM".equals(t.label().value()) || "IP".equals(t.label().value()) || "CODE".equals(t.label().value())) { return false; } if ((l != null) && l.value() != null && (l.value().equals("-NONE-")) && !t.isLeaf() && kids.length == 1 && kids[0].isLeaf()) { // Delete empty/trace nodes (ones marked '-NONE-') return false; } return true; } }; Filter<Tree> nodeFilter = new Filter<Tree>() { private static final long serialVersionUID = 9000955019205336311L; @Override public boolean accept(Tree t) { if (t.isLeaf() || t.isPreTerminal()) { return true; } // The special switchboard non-terminals clause. Try keeping EDITED for now.... // if ("EDITED".equals(t.label().value())) { // return false; // } if (t.numChildren() != 1) { return true; } if (t.label() != null && t.label().value() != null && t.label().value().equals(t.children()[0].label().value())) { return false; } return true; } }; TreeTransformer transformer2 = new TreeTransformer() { @Override public Tree transformTree(Tree t) { if (temporalAnnotation == TEMPORAL_ANY_TMP_PERCOLATED) { String lab = t.label().value(); if (TmpPattern.matcher(lab).matches()) { Tree oldT = t; Tree ht; do { ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.label().value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } LabelFactory lf = ht.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); oldT = ht; } while (!ht.isPreTerminal()); if (lab.startsWith("PP")) { ht = headFinder.determineHead(t); // look to right int j = t.objectIndexOf(ht); int sz = t.children().length; if (j + 1 < sz) { ht = t.getChild(j + 1); } if (ht.label().value().startsWith("NP")) { while (!ht.isLeaf()) { LabelFactory lf = ht.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); ht = headFinder.determineHead(ht); } } } } } else if (temporalAnnotation == TEMPORAL_ALL_TERMINALS) { String lab = t.label().value(); if (NPTmpPattern.matcher(lab).matches()) { Tree ht; ht = headFinder.determineHead(t); if (ht.isPreTerminal()) { // change all tags to -TMP LabelFactory lf = ht.label().labelFactory(); Tree[] kids = t.children(); for (Tree kid : kids) { if (kid.isPreTerminal()) { // Note: this changes the tree label, rather // than creating a new tree node. Beware! kid.setLabel(lf.newLabel(kid.value() + "-TMP")); } } } else { Tree oldT = t; do { ht = headFinder.determineHead(oldT); oldT = ht; } while (!ht.isPreTerminal()); LabelFactory lf = ht.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); } } } else if (temporalAnnotation == TEMPORAL_ALL_NP) { String lab = t.label().value(); if (NPTmpPattern.matcher(lab).matches()) { Tree oldT = t; Tree ht; do { ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.label().value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } if (ht.isPreTerminal() || ht.value().startsWith("NP")) { LabelFactory lf = ht.labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); oldT = ht; } } while (ht.value().startsWith("NP")); } } else if (temporalAnnotation == TEMPORAL_ALL_NP_AND_PP || temporalAnnotation == TEMPORAL_NP_AND_PP_WITH_NP_HEAD || temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP) { // also allow chain to start with PP String lab = t.value(); if (NPTmpPattern.matcher(lab).matches() || PPTmpPattern.matcher(lab).matches()) { Tree oldT = t; do { Tree ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } else if ((temporalAnnotation == TEMPORAL_NP_AND_PP_WITH_NP_HEAD || temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP) && (ht.value().equals("IN") || ht.value().equals("TO"))) { // change the head to be NP if possible Tree[] kidlets = oldT.children(); for (int k = kidlets.length - 1; k > 0; k--) { if (kidlets[k].value().startsWith("NP")) { ht = kidlets[k]; } } } LabelFactory lf = ht.labelFactory(); // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! if (ht.isPreTerminal() || ht.value().startsWith("NP")) { ht.setLabel(lf.newLabel(ht.value() + "-TMP")); } if (temporalAnnotation == TEMPORAL_ALL_NP_EVEN_UNDER_PP && oldT.value().startsWith("PP")) { oldT.setLabel(lf.newLabel(tlp.basicCategory(oldT.value()))); } oldT = ht; } while (oldT.value().startsWith("NP") || oldT.value().startsWith("PP")); } } else if (temporalAnnotation == TEMPORAL_ALL_NP_PP_ADVP) { // also allow chain to start with PP or ADVP String lab = t.value(); if (NPTmpPattern.matcher(lab).matches() || PPTmpPattern.matcher(lab).matches() || ADVPTmpPattern.matcher(lab).matches()) { Tree oldT = t; do { Tree ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! if (ht.isPreTerminal() || ht.value().startsWith("NP")) { LabelFactory lf = ht.labelFactory(); ht.setLabel(lf.newLabel(ht.value() + "-TMP")); } oldT = ht; } while (oldT.value().startsWith("NP")); } } else if (temporalAnnotation == TEMPORAL_9) { // also allow chain to start with PP or ADVP String lab = t.value(); if (NPTmpPattern.matcher(lab).matches() || PPTmpPattern.matcher(lab).matches() || ADVPTmpPattern.matcher(lab).matches()) { // System.err.println("TMP: Annotating " + t); addTMP9(t); } } else if (temporalAnnotation == TEMPORAL_ACL03PCFG) { String lab = t.label().value(); if (lab != null && NPTmpPattern.matcher(lab).matches()) { Tree oldT = t; Tree ht; do { ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.label().value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } oldT = ht; } while (!ht.isPreTerminal()); if (!onlyTagAnnotateNstar || ht.label().value().startsWith("N")) { LabelFactory lf = ht.label().labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-TMP")); } } } if (doAdverbialNP) { String lab = t.value(); if (NPAdvPattern.matcher(lab).matches()) { Tree oldT = t; Tree ht; do { ht = headFinder.determineHead(oldT); // special fix for possessives! -- make noun before head if (ht.label().value().equals("POS")) { int j = oldT.objectIndexOf(ht); if (j > 0) { ht = oldT.getChild(j - 1); } } if (ht.isPreTerminal() || ht.value().startsWith("NP")) { LabelFactory lf = ht.labelFactory(); // Note: this changes the tree label, rather than // creating a new tree node. Beware! ht.setLabel(lf.newLabel(ht.label().value() + "-ADV")); oldT = ht; } } while (ht.value().startsWith("NP")); } } return t; } }; // if there wasn't an empty nonterminal at the top, but an S, wrap it. if (tree.label().value().equals("S")) { tree = tf.newTreeNode("ROOT", Collections.singletonList(tree)); } // repair for the phrasal VB in Switchboard (PTB version 3) that should be a VP for (Tree subtree : tree) { if (subtree.isPhrasal() && "VB".equals(subtree.label().value())) { subtree.setValue("VP"); } } tree = tree.transform(transformer1); if (tree == null) { return null; } tree = tree.prune(subtreeFilter, tf); if (tree == null) { return null; } tree = tree.spliceOut(nodeFilter, tf); if (tree == null) { return null; } return tree.transform(transformer2, tf); }