public static void addImport(Context context, ClassDefinitionNode node, String packageName) { NodeFactory nodeFactory = new NodeFactory(context); PackageIdentifiersNode packageIdentifiers = null; StringTokenizer stringTokenizer = new StringTokenizer(packageName, "."); while (stringTokenizer.hasMoreTokens()) { String token = stringTokenizer.nextToken(); IdentifierNode identifier = nodeFactory.identifier(token); packageIdentifiers = nodeFactory.packageIdentifiers(packageIdentifiers, identifier, true); } PackageNameNode packageNameNode = nodeFactory.packageName(packageIdentifiers); ImportDirectiveNode importDirective = nodeFactory.importDirective(null, packageNameNode, null, context); importDirective.pkgdef = node.pkgdef; if (node.statements == null) { node.statements = new StatementListNode(null); } node.statements.items.add(0, importDirective); }
private static void loadObject(final AbstractGetInfo<?> data, final NodeFactory node) { if (node.getInfos() != null) { final String[] keys = node.getInfos().keySet().toArray(new String[node.getInfos().keySet().size()]); for (final String key : keys) { data.setInfo(key + EMPTY, node.getInfos().get(key)); } } }
/* * Generate the states array initializer (AST). */ private StatementListNode genStatesAST( StatesModel model, NodeFactory nodeFactory, HashSet<String> configNamespaces, boolean generateDocComments, StatementListNode statementList) { StatementListNode result = statementList; Set<String> states = model.info.getStateNames(); if (!states.isEmpty()) { ArgumentListNode statesArgumentList = null; for (Iterator<String> iter = states.iterator(); iter.hasNext(); ) { State state = (State) model.stateByName((String) iter.next()); if (state != null) { MemberExpressionNode stateExpression = state.generateDefinitionBody( nodeFactory, configNamespaces, generateDocComments, bindingsQueue); statesArgumentList = nodeFactory.argumentList(statesArgumentList, stateExpression); } } LiteralArrayNode literalArray = nodeFactory.literalArray(statesArgumentList); ArgumentListNode argList = nodeFactory.argumentList(null, literalArray); IdentifierNode statesIdentifier = nodeFactory.identifier(STATES, false); SetExpressionNode selector = nodeFactory.setExpression(statesIdentifier, argList, false); MemberExpressionNode memberExpression = nodeFactory.memberExpression(null, selector); ListNode list = nodeFactory.list(null, memberExpression); ExpressionStatementNode expressionStatement = nodeFactory.expressionStatement(list); result = nodeFactory.statementList(result, expressionStatement); } return result; }
private static Node readNode(TrieReader reader, int depth, int maxDepth) throws IOException { if (depth > maxDepth) { skipNode(reader); return null; } long count = reader.readCount(); int depthPlus1 = depth + 1; long sym1 = reader.readSymbol(); // 0+ daughters if (sym1 == -1L) return NodeFactory.createNode(count); // 1+ daughters Node node1 = readNode(reader, depthPlus1, maxDepth); long sym2 = reader.readSymbol(); if (sym2 == -1L) return NodeFactory.createNodeFold((char) sym1, node1, count); Node node2 = readNode(reader, depthPlus1, maxDepth); long sym3 = reader.readSymbol(); if (sym3 == -1L) return NodeFactory.createNode((char) sym1, node1, (char) sym2, node2, count); Node node3 = readNode(reader, depthPlus1, maxDepth); long sym4 = reader.readSymbol(); if (sym4 == -1L) return NodeFactory.createNode( (char) sym1, node1, (char) sym2, node2, (char) sym3, node3, count); Node node4 = readNode(reader, depthPlus1, maxDepth); // 4+ daughters StringBuilder cBuf = new StringBuilder(); cBuf.append((char) sym1); cBuf.append((char) sym2); cBuf.append((char) sym3); cBuf.append((char) sym4); List<Node> nodeList = new ArrayList<Node>(); nodeList.add(node1); nodeList.add(node2); nodeList.add(node3); nodeList.add(node4); long sym; while ((sym = reader.readSymbol()) != -1L) { cBuf.append((char) sym); nodeList.add(readNode(reader, depthPlus1, maxDepth)); } Node[] nodes = nodeList.toArray(EMPTY_NODE_ARRAY); char[] cs = Strings.toCharArray(cBuf); return NodeFactory.createNode(cs, nodes, count); // > 3 daughters }
// returns list of statements protected static void replaceMultSPO( Statement st, NodeFactory f, Map o2n, Collection result, RDFNode toReplace, int position) throws ModelException { Collection replacements; if (toReplace instanceof Statement) { List l = new ArrayList(); replaceMult((Statement) toReplace, f, o2n, l); if (l.size() == 1 && toReplace == l.get(0)) { result.add(st); return; // keep the same } else replacements = l; } else { Object ro = o2n.get(toReplace); if (ro instanceof Collection) replacements = (Collection) ro; else if (ro != null) { replacements = new ArrayList(); replacements.add(ro); } else { // no replacement needed result.add(st); // keep the same statement return; } } for (Iterator it = replacements.iterator(); it.hasNext(); ) { Statement rs = null; Object rr = it.next(); switch (position) { case 0: rs = f.createStatement((Resource) rr, st.predicate(), st.object()); break; case 1: rs = f.createStatement(st.subject(), (Resource) rr, st.object()); break; case 2: rs = f.createStatement(st.subject(), st.predicate(), (RDFNode) rr); break; } result.add(rs); } }
static <E extends ValueNode> E createNode(NodeFactory<E> factory, Object... constants) { ArgumentNode[] argumentNodes = arguments(factory.getExecutionSignature().size()); List<Object> argumentList = new ArrayList<>(); argumentList.addAll(Arrays.asList(constants)); if (ChildrenNode.class.isAssignableFrom(factory.getNodeClass()) || BuiltinNode.class.isAssignableFrom(factory.getNodeClass())) { argumentList.add(argumentNodes); } else { argumentList.addAll(Arrays.asList(argumentNodes)); } return factory.createNode(argumentList.toArray(new Object[argumentList.size()])); }
public void testFeatures() { assertEquals("nb features", 2, factory.getFeatures().size()); Node node; node = factory.getFeatures().get("test.main(java.lang.String[])"); assertNotNull("feature test.main(java.lang.String[]) missing", node); assertTrue("feature test.main(java.lang.String[]) not concrete", node.isConfirmed()); node = factory.getFeatures().get("test.test()"); assertNotNull("feature test.test() missing", node); assertTrue("feature test.test() not concrete", node.isConfirmed()); }
public void testRetrieveTriplesByNode() { Graph G = getGraph(); Node N = NodeFactory.createBlankNode(), M = NodeFactory.createBlankNode(); ReifierStd.reifyAs(G, N, triple("x R y")); assertEquals("gets correct triple", triple("x R y"), ReifierStd.getTriple(G, N)); ReifierStd.reifyAs(G, M, triple("p S q")); assertDiffer("the anon nodes must be distinct", N, M); assertEquals("gets correct triple", triple("p S q"), ReifierStd.getTriple(G, M)); assertTrue("node is known bound", ReifierStd.hasTriple(G, M)); assertTrue("node is known bound", ReifierStd.hasTriple(G, N)); assertFalse( "node is known unbound", ReifierStd.hasTriple(G, NodeFactory.createURI("any:thing"))); }
public Set<Node> getNodes() { Set<Node> nodes = new HashSet<Node>(); for (org.eclipse.uml2.uml.ActivityNode n : uml_activity.getNodes()) { nodes.add(NodeFactory.getInstance(n)); } return nodes; }
public void testFeatureFeature() { a.addDependency(b); a.addDependency(b_B); a.addDependency(b_B_b); a_A.addDependency(b); a_A.addDependency(b_B); a_A.addDependency(b_B_b); a_A_a.addDependency(b); a_A_a.addDependency(b_B); a_A_a.addDependency(b_B_b); Visitor visitor = new LinkMinimizer(); visitor.traverseNodes(factory.getPackages().values()); assertEquals("a outbound", 0, a.getOutboundDependencies().size()); assertEquals("a inbound", 0, a.getInboundDependencies().size()); assertEquals("a_A outbound", 0, a_A.getOutboundDependencies().size()); assertEquals("a_A inbound", 0, a_A.getInboundDependencies().size()); assertEquals("a_A_a outbound", 1, a_A_a.getOutboundDependencies().size()); assertTrue("Missing a.A.a --> b.B.b", a_A_a.getOutboundDependencies().contains(b_B_b)); assertEquals("a_A_a inbound", 0, a_A_a.getInboundDependencies().size()); assertEquals("b outbound", 0, b.getOutboundDependencies().size()); assertEquals("b inbound", 0, b.getInboundDependencies().size()); assertEquals("b_B outbound", 0, b_B.getOutboundDependencies().size()); assertEquals("b_B inbound", 0, b_B.getInboundDependencies().size()); assertEquals("b_B_b outbound", 0, b_B_b.getOutboundDependencies().size()); assertEquals("b_B_b inbound", 1, b_B_b.getInboundDependencies().size()); assertTrue("Missing b.B.b <-- a.A.a", b_B_b.getInboundDependencies().contains(a_A_a)); }
public void addNodes(int[] nodes, int before) { Prop vec[] = new Prop[nodes.length]; for (int i = 0; i < nodes.length; i++) { vec[i] = mFactory.makeNode(nodes[i]); } mCb.addNodes(vec, before); }
private Node instantiateNodeForRecipe( RecipeTraceInfo recipeTrace, final ReteNodeRecipe recipe, Collection<ReteNodeRecipe> sameClassRecipes) { if (recipe instanceof IndexerRecipe) { // INSTANTIATE AND HOOK UP // (cannot delay hooking up, because parent determines indexer implementation) ensureParents(recipeTrace); final ReteNodeRecipe parentRecipe = recipeTrace.getParentRecipeTraces().iterator().next().getRecipe(); final Indexer result = nodeFactory.createIndexer( reteContainer, (IndexerRecipe) recipe, asSupplier( (Address<? extends Supplier>) reteContainer.network.getExistingNodeByRecipe(parentRecipe)), recipeTrace); // REMEMBER if (Options.nodeSharingOption != Options.NodeSharingOption.NEVER) { getNodesByRecipe().put(recipe, reteContainer.makeAddress(result)); sameClassRecipes.add(recipe); } return result; } else { // INSTANTIATE Node result = nodeFactory.createNode(reteContainer, recipe, recipeTrace); // REMEMBER if (Options.nodeSharingOption == Options.NodeSharingOption.ALL) { getNodesByRecipe().put(recipe, reteContainer.makeAddress(result)); sameClassRecipes.add(recipe); } // HOOK UP // (recursion-tolerant due to this delayed order of initialization) ensureParents(recipeTrace); if (recipe instanceof InputRecipe) inputConnector.connectInput((InputRecipe) recipe, result); else connectionFactory.connectToParents(recipeTrace, result); return result; } }
public void testClasses() { assertEquals("nb classes", 3, factory.getClasses().size()); Node node; node = factory.getClasses().get("test"); assertNotNull("class test missing", node); assertTrue("class test not concrete", node.isConfirmed()); node = factory.getClasses().get("java.io.PrintStream"); assertNotNull("class java.io.PrintStream missing", node); assertFalse("class java.io.PrintStream is concrete", node.isConfirmed()); node = factory.getClasses().get("java.util.Set"); assertNotNull("class java.util.Set missing", node); assertFalse("class java.util.Set is concrete", node.isConfirmed()); }
public void testPackages() { assertEquals("nb packages", 3, factory.getPackages().size()); Node node; node = factory.getPackages().get(""); assertNotNull("default package missing", node); assertTrue("default package not concrete", node.isConfirmed()); node = factory.getPackages().get("java.io"); assertNotNull("package java.io missing", node); assertFalse("package java.io is concrete", node.isConfirmed()); node = factory.getPackages().get("java.util"); assertNotNull("package java.util missing", node); assertFalse("package java.util is concrete", node.isConfirmed()); }
public static Resource createGuessedResource(NodeFactory f, String uri) throws ModelException { int l = getNamespaceEnd(uri); String ns = l > 1 ? uri.substring(0, l) : null; String name = uri.substring(l); return f.createResource(ns, name); }
/** * Removes strings with counts below the specified minimum. Counts for remaining strings are not * affected. Pruning may be interleaved with updating counts in any order. * * @param minCount Minimum count required to retain a substring count. * @throws IllegalArgumentException If the count is less than <code>1</code>. */ public void prune(int minCount) { if (minCount < 1) { String msg = "Prune minimum count must be more than 1." + " Found minCount=" + minCount; throw new IllegalArgumentException(msg); } mRootNode = mRootNode.prune(minCount); if (mRootNode == null) mRootNode = NodeFactory.createNode(0); }
/* * Generates the initializers (AST) for all values that are shared between states (e.g. all instance factories * shared by AddItems overrides). */ private StatementListNode genSharedFactoriesAST( StatesModel model, NodeFactory nodeFactory, StatementListNode statementList) { Map<String, SharedObject> shared = model.sharedObjects; StatementListNode result = statementList; for (Iterator<String> iter = shared.keySet().iterator(); iter.hasNext(); ) { SharedObject symbol = shared.get(iter.next()); String varName = ((String) symbol.name + _FACTORY).intern(); String typeName = NameFormatter.retrieveClassName(DEFERREDINSTANCEFROMFUNCTION); String factory = symbol.name + (symbol.model.isDeclared() ? _I : _C); String resetFunc = symbol.name + _R; MemberExpressionNode memberExpression = AbstractSyntaxTreeUtil.generateGetterSelector(nodeFactory, factory, true); ArgumentListNode callExpressionArgumentList = nodeFactory.argumentList(null, memberExpression); if (symbol.model.getIsTransient()) { memberExpression = AbstractSyntaxTreeUtil.generateGetterSelector(nodeFactory, resetFunc, true); callExpressionArgumentList = nodeFactory.argumentList(callExpressionArgumentList, memberExpression); } QualifiedIdentifierNode qualifiedIdentifier = AbstractSyntaxTreeUtil.generateQualifiedIdentifier( nodeFactory, standardDefs.getCorePackage(), typeName, false); CallExpressionNode callExpression = (CallExpressionNode) nodeFactory.callExpression(qualifiedIdentifier, callExpressionArgumentList); callExpression.is_new = true; callExpression.setRValue(false); MemberExpressionNode ad = nodeFactory.memberExpression(null, callExpression); VariableDefinitionNode variableDefinition = AbstractSyntaxTreeUtil.generateVariable(nodeFactory, varName, typeName, false, ad); result = nodeFactory.statementList(result, variableDefinition); } return result; }
/** Flatten complex expressions within the AST */ public Node leave(Node old, Node n, NodeVisitor v) { if (n == noFlatten) { noFlatten = null; return n; } if (n instanceof Block) { List l = (List) stack.removeFirst(); return ((Block) n).statements(l); } else if (n instanceof Stmt && !(n instanceof LocalDecl)) { List l = (List) stack.getFirst(); l.add(n); return n; } else if (n instanceof Expr && !(n instanceof Lit) && !(n instanceof Special) && !(n instanceof Local)) { Expr e = (Expr) n; if (e instanceof Assign) { return n; } // create a local temp, initialized to the value of the complex // expression String name = newID(); LocalDecl def = nf.LocalDecl( e.position(), Flags.FINAL, nf.CanonicalTypeNode(e.position(), e.type()), name, e); def = def.localInstance(ts.localInstance(e.position(), Flags.FINAL, e.type(), name)); List l = (List) stack.getFirst(); l.add(def); // return the local temp instead of the complex expression Local use = nf.Local(e.position(), name); use = (Local) use.type(e.type()); use = use.localInstance(ts.localInstance(e.position(), Flags.FINAL, e.type(), name)); return use; } return n; }
@SuppressWarnings("unchecked") private void expression() { term(); while (token == Lexer.OR) { NonTerminal or = NodeFactory.createNonTerminal(token); or.setLeft(root); term(); or.setRight(root); root = or; } }
@SuppressWarnings("unchecked") private void term() { factor(); while (token == Lexer.AND) { NonTerminal and = NodeFactory.createNonTerminal(token); and.setLeft(root); factor(); and.setRight(root); root = and; } }
public static TreeSet<Object> filter(NodeFactory<Boolean> cmp, TreeSet<Object> s) { TreeSet<Object> ret = (TreeSet<Object>) s.clone(); Iterator<Object> x = ret.iterator(); while (x.hasNext()) { if (!cmp.invoke(new Object[] {x.next()}, null)) { x.remove(); } } return ret; }
public static Statement replaceNamespace( Statement st, String o, String n, NodeFactory f, Map o2n, Set resourcesToIgnore) throws ModelException { boolean replaced = false; Resource subj = st.subject(); Resource pred = st.predicate(); RDFNode obj = st.object(); if (obj instanceof Resource && !(obj instanceof Statement) && o.equals(((Resource) obj).getNamespace()) && (resourcesToIgnore == null || !resourcesToIgnore.contains(obj))) { replaced = true; Resource r = f.createResource(n, ((Resource) obj).getLocalName()); if (o2n != null) o2n.put(obj, r); obj = r; } if (o.equals(subj.getNamespace()) && (resourcesToIgnore == null || !resourcesToIgnore.contains(subj))) { replaced = true; Resource r = f.createResource(n, subj.getLocalName()); if (o2n != null) o2n.put(subj, r); subj = r; } if (o.equals(pred.getNamespace()) && (resourcesToIgnore == null || !resourcesToIgnore.contains(pred))) { replaced = true; Resource r = f.createResource(n, pred.getLocalName()); if (o2n != null) o2n.put(pred, r); pred = r; } return replaced ? f.createStatement(subj, pred, obj) : st; }
private void value() { if (token == Lexer.VARIABLE || token == Lexer.NUMBER) { root = NodeFactory.createTerminal(token, lexer.getValue()); if (token == Lexer.VARIABLE && allowedIdentifiers != null) { if (!allowedIdentifiers.contains(root.getSymbol())) throw new MalformedExpressionException( String.format("Unknown identifier '%s'", root.getSymbol())); } token = lexer.nextToken(); } else { throw new MalformedExpressionException( String.format("Value instead of <%s> expected.", token)); } }
protected void setUp() throws Exception { factory = new NodeFactory(); a = factory.createPackage("a"); a_A = factory.createClass("a.A"); a_A_a = factory.createFeature("a.A.a()"); b = factory.createPackage("b"); b_B = factory.createClass("b.B"); b_B_b = factory.createFeature("b.B.b()"); }
/* * Generate all AST calls to instantiate itemCreationPolicy='immediate' nodes. */ private StatementListNode genImmediateInitsAST( StatesModel model, NodeFactory nodeFactory, StatementListNode statementList) { StatementListNode result = statementList; List<String> objects = model.earlyInitObjects; for (Iterator<String> iter = objects.iterator(); iter.hasNext(); ) { String symbol = iter.next(); String identifier = ((String) symbol + "_factory").intern(); IdentifierNode idNode = nodeFactory.identifier(identifier, false); GetExpressionNode getIndexExpression = nodeFactory.getExpression(idNode); MemberExpressionNode base = nodeFactory.memberExpression(null, getIndexExpression); IdentifierNode getNode = nodeFactory.identifier(GETINSTANCE, false); CallExpressionNode selector = (CallExpressionNode) nodeFactory.callExpression(getNode, null); selector.setRValue(false); MemberExpressionNode memberExpression = nodeFactory.memberExpression(base, selector); ListNode list = nodeFactory.list(null, memberExpression); ExpressionStatementNode expressionStatement = nodeFactory.expressionStatement(list); result = nodeFactory.statementList(result, expressionStatement); } return result; }
@SuppressWarnings("unchecked") private void condition() { value(); if (token == Lexer.GREATER || token == Lexer.GREATEROREQUAL || token == Lexer.LESS || token == Lexer.LESSOREQUAL || token == Lexer.EQUAL || token == Lexer.NOTEQUAL) { NonTerminal condition = NodeFactory.createNonTerminal(token); condition.setLeft(root); token = lexer.nextToken(); value(); condition.setRight(root); root = condition; } else { throw new MalformedExpressionException( String.format("Conditional operator instead of <%s> expected.", token)); } }
/** Appends additional sqlWhere in relationship if any to the end of the join. */ private void appendAdditionalJoinCondition( SqlWriter writer, DatabaseType databaseType, final String sourceVarName, final String targetVarName) { if (relationship.getSqlFilter() == null || relationship.getSqlFilter().length() == 0) { return; } Node node = NodeFactory.parseExpression(relationship.getSqlFilter()); NodeVisitor visit = new AbstractNodeVisitor() { @Override public boolean visitIdentifier(Identifier identifier) { String path = identifier.getName(); int dotPos = path.indexOf('.'); if (dotPos < 0) { throw new EJBQLException( "Invalid sqlWhere in relationship: " + relationship + " - " + relationship.getSqlFilter()); } String entityName = path.substring(0, dotPos); if (entityName.equals(relationship.getSourceEntity().getSystemName()) || entityName.equals(relationship.getSourceEntity().getTableName())) { identifier.setName(sourceVarName + path.substring(dotPos)); } else if (entityName.equals(relationship.getTargetEntity().getSystemName()) || entityName.equals(relationship.getTargetEntity().getTableName())) { identifier.setName(targetVarName + path.substring(dotPos)); } return true; } }; node.accept(visit); writer.write(" AND "); node.toString(writer, databaseType); }
public static Statement replaceResources(Statement st, NodeFactory f, Map o2n) throws ModelException { boolean replaced = false; Resource subj = st.subject(); Resource pred = st.predicate(); RDFNode obj = st.object(); Object n = null; if (obj instanceof Statement) { n = obj; obj = replaceResources((Statement) obj, f, o2n); replaced = n != obj; } else if ((n = o2n.get(obj)) != null) { replaced = true; obj = (RDFNode) n; } if (subj instanceof Statement) { n = subj; subj = replaceResources((Statement) subj, f, o2n); replaced = n != subj; } if ((n = o2n.get(subj)) != null) { replaced = true; subj = (Resource) n; } if ((n = o2n.get(pred)) != null) { replaced = true; pred = (Resource) n; } return replaced ? f.createStatement(subj, pred, obj) : st; }
/** * A <code>TrieCharSeqCounter</code> stores counts for substrings of strings. When the counter is * constructed, a maximum length is specified, and counts are only stored for strings up to that * length. For instance, an n-gram language model needs only counts for strings up to length n. * * <p>Strings may be added to the counter using {@link #incrementSubstrings(char[],int,int)}, which * increments the counts for all substrings of the specified character slice up to the specified * maximum length substring. The method {@link #incrementPrefixes(char[],int,int)} increments only * the prefixes of the specified string. All substrings are incremented by incrementing prefixes for * each suffix. A substring counter may be pruned using {@link #prune(int)}, which removes all * substrings with count below the specified threshold. * * <p>There are a wide range of reporting methods for trie-based counters. * * <p><i>Implementation Note:</i> The trie counters are a heavily unfolded implementation of a * character-based Patricia (PAT) trie. * * @author Bob Carpenter * @version 3.8 * @since LingPipe2.0 */ public class TrieCharSeqCounter implements CharSeqCounter { Node mRootNode = NodeFactory.createNode(0); final int mMaxLength; /** * Construct a substring counter that stores substrings up to the specified maximum length. * * @param maxLength Maximum length of substrings stored by this counter. * @throws IllegalArgumentException If the maximum length is negative. */ public TrieCharSeqCounter(int maxLength) { if (maxLength < 0) { String msg = "Max length must be >= 0." + " Found length=" + maxLength; throw new IllegalArgumentException(msg); } mMaxLength = maxLength; } // following is CharSeqCounter interface w. inherited comments public long count(char[] cs, int start, int end) { Strings.checkArgsStartEnd(cs, start, end); return mRootNode.count(cs, start, end); } public long extensionCount(char[] cs, int start, int end) { Strings.checkArgsStartEnd(cs, start, end); return mRootNode.contextCount(cs, start, end); } public char[] observedCharacters() { return com.aliasi.util.Arrays.copy(mRootNode.outcomes(new char[] {}, 0, 0)); } public char[] charactersFollowing(char[] cs, int start, int end) { Strings.checkArgsStartEnd(cs, start, end); return com.aliasi.util.Arrays.copy(mRootNode.outcomes(cs, start, end)); } public int numCharactersFollowing(char[] cs, int start, int end) { Strings.checkArgsStartEnd(cs, start, end); return mRootNode.numOutcomes(cs, start, end); } /** * Returns the sum of counts for all non-empty character sequences. * * @return The sum of counts for all non-empty character sequences. */ public long totalSequenceCount() { long sum = 0l; long[][] uniqueTotals = uniqueTotalNGramCount(); for (int i = 0; i < uniqueTotals.length; ++i) sum += uniqueTotals[i][1]; return sum; } /** * Returns the sum of the counts of all character sequences of the specified length. * * @return The sum of the counts of all character sequences of the specified length. */ public long totalSequenceCount(int length) { return mRootNode.totalNGramCount(length); } /** * Returns the number of character sequences with non-zero counts, including the empty (zero * length) character sequence. * * @return Number of character sequences with non-zero counts. */ public long uniqueSequenceCount() { return mRootNode.size(); } /** * Returns the number of character sequences of the specified length with non-zero counts. * * @return The number of character sequences of the specified length with non-zero counts. */ public long uniqueSequenceCount(int nGramOrder) { return mRootNode.uniqueNGramCount(nGramOrder); } /** * Removes strings with counts below the specified minimum. Counts for remaining strings are not * affected. Pruning may be interleaved with updating counts in any order. * * @param minCount Minimum count required to retain a substring count. * @throws IllegalArgumentException If the count is less than <code>1</code>. */ public void prune(int minCount) { if (minCount < 1) { String msg = "Prune minimum count must be more than 1." + " Found minCount=" + minCount; throw new IllegalArgumentException(msg); } mRootNode = mRootNode.prune(minCount); if (mRootNode == null) mRootNode = NodeFactory.createNode(0); } /** * Returns an array of frequency counts for n-grams of the specified n-gram order sorted in * descending frequency order. This form of result is sometimes called a Zipf plot because of the * sorting. * * @param nGramOrder Order of n-gram counted. * @return Array of frequency counts, sorted in decreasing order of rank. */ public int[] nGramFrequencies(int nGramOrder) { List<Long> counts = countsList(nGramOrder); int[] result = new int[counts.size()]; for (int i = 0; i < result.length; ++i) result[i] = counts.get(i).intValue(); java.util.Arrays.sort(result); for (int i = result.length / 2; i >= 0; --i) { int iOpp = result.length - i - 1; int tmp = result[i]; result[i] = result[iOpp]; result[iOpp] = tmp; } return result; } /** * Returns the array of unique and total n-gram counts for each n-gram length. The return array is * indexed in the first position by n-gram length, and in the second position by <code>0</code> * for unique counts and <code>1</code> for total counts. Thus for <code>0<=n<=maxLength() * </code>: * * <blockquote> * * <code> * uniqueTotalNGramCount()[n][0] == uniqueNGramCount(n) * </code> * * </blockquote> * * and * * <blockquote> * * <code> * uniqueTotalNGramCount()[n][1] == totalNGramCount(n) * </code> * * </blockquote> * * If unique and total counts are required for several n-gram depths, this method is much more * efficient than calling all of the individual methods separately. * * @return The array of unique and total n-gram counts for each n-gram length. */ public long[][] uniqueTotalNGramCount() { long[][] result = new long[mMaxLength + 1][2]; mRootNode.addNGramCounts(result, 0); return result; } /** * Returns a counter of occurrences of the highest frequency n-grams of a specified n-gram order. * The actual n-grams are represented as strings in the result; recall that strings are instances * of {@link CharSequence}. * * <p>The maximum number of results returned must be specified, because the entire set of n-grams * is usually too large to return as a counter. * * @param nGramOrder Order of n-gram to count. * @param maxReturn Maximum number of objects returned. */ public ObjectToCounterMap<String> topNGrams(int nGramOrder, int maxReturn) { NBestCounter counter = new NBestCounter(maxReturn, true); mRootNode.topNGrams(counter, new char[nGramOrder], 0, nGramOrder); return counter.toObjectToCounter(); } /** * Returns the count in the training corpus for the specified sequence of characters. The count * returned may have been reduced from the raw counts in training cases by pruning. * * @param cSeq Character sequence. * @return Count of character sequence in model. */ public long count(CharSequence cSeq) { return count(com.aliasi.util.Arrays.toArray(cSeq), 0, cSeq.length()); } /** * Returns the sum of the counts of all character sequences one character longer than the * specified character sequence. * * @param cSeq Character sequence. * @return The sum of the counts of all character sequences one character longer than the * specified character sequence. */ public long extensionCount(CharSequence cSeq) { return mRootNode.contextCount(com.aliasi.util.Arrays.toArray(cSeq), 0, cSeq.length()); } /** * Increments the count of all substrings of the specified character array slice up to the maximum * length specified in the constructor. * * @param cs Underlying character array. * @param start Index of first character in slice. * @param end Index of one past last character in slice. * @throws IndexOutOfBoundsException If the specified start and one plus end point are not in the * bounds of character sequence. */ public void incrementSubstrings(char[] cs, int start, int end) { incrementSubstrings(cs, start, end, 1); } /** * Increments by the specified count all substrings of the specified character array slice up to * the maximum length specified in the constructor. * * @param cs Underlying character array. * @param start Index of first character in slice. * @param end Index of one past last character in slice. * @param count Amount to increment. * @throws IndexOutOfBoundsException If the specified start and one plus end point are not in the * bounds of character sequence. */ public void incrementSubstrings(char[] cs, int start, int end, int count) { Strings.checkArgsStartEnd(cs, start, end); // increment maximal strings and prefixes for (int i = start; i + mMaxLength <= end; ++i) incrementPrefixes(cs, i, i + mMaxLength, count); // increment short final strings and prefixes for (int i = Math.max(start, end - mMaxLength + 1); i < end; ++i) incrementPrefixes(cs, i, end, count); } /** * Increments the count of all substrings of the specified character sequence up to the maximum * length specified in the constructor. * * @param cSeq Character sequence. */ public void incrementSubstrings(CharSequence cSeq) { incrementSubstrings(cSeq, 1); } /** * Increments by the specified count all substrings of the specified character sequence up to the * maximum length specified in the constructor. * * @param cSeq Character sequence. * @param count Amount to increment. */ public void incrementSubstrings(CharSequence cSeq, int count) { incrementSubstrings(com.aliasi.util.Arrays.toArray(cSeq), 0, cSeq.length(), count); } /** * Increments the count of all prefixes of the specified character sequence up to the maximum * length specified in the constructor. * * @param cs Underlying character array. * @param start Index of first character in slice. * @param end Index of one past last character in slice. * @throws IndexOutOfBoundsException If the specified start and one plus end point are not in the * bounds of character sequence. */ public void incrementPrefixes(char[] cs, int start, int end) { incrementPrefixes(cs, start, end, 1); } /** * Increments the count of all prefixes of the specified character sequence up to the maximum * length specified in the constructor. * * @param cs Underlying character array. * @param start Index of first character in slice. * @param end Index of one past last character in slice. * @param count Amount to increment. * @throws IndexOutOfBoundsException If the specified start and one plus end point are not in the * bounds of character sequence. */ public void incrementPrefixes(char[] cs, int start, int end, int count) { Strings.checkArgsStartEnd(cs, start, end); mRootNode = mRootNode.increment(cs, start, end, count); } /** * Decrements all of the substrings of the specified character slice by one. This method may be * used in conjunction with {@link #incrementSubstrings(char[],int,int)} to implement counts for * conditional probability estimates without affecting underlying estimates. For example, the * following code: * * <blockquote> * * <pre> * char[] cs = "abcdefghi".toCharArray(); * counter.incrementSubstrings(cs,3,7); * counter.decrementSubstrings(cs,3,5); * </pre> * * </blockquote> * * will increment the substrings of <code>"defg"</code> and then decrement the * substrings of <code>"de"</code>, causing the net effect of incrementing the counts of * substrings <code>"defg"</code>, <code>"efg"</code>, <code>"fg" * </code>, <code>"g"</code>, <code>"def"</code>, <code>"ef"</code>, * and <code>"f"</code>. This has the effect of increasing the estimate of <code>g * </code> given <code>def</code>, without increasing the estimate of <code>d</code> in an empty * context. * * @param cs Underlying array of characters in slice. * @param start Index of first character in slice. * @param end Index of one past last character in slice. * @throws IllegalArgumentException If the array slice is valid. */ public void decrementSubstrings(char[] cs, int start, int end) { Strings.checkArgsStartEnd(cs, start, end); for (int i = start; i < end; ++i) for (int j = i; j <= end; ++j) mRootNode = mRootNode.decrement(cs, i, j); } /** * Returns a string representation of the trie structure of counts underlying this counter. * * <p><b>Warning:</b> The resulting string will be very large if the number of substrings is * large. To avoid blowing out memory, do not call this method for large counters. * * @return String representation of this counter. */ @Override public String toString() { return mRootNode.toString(); } void toStringBuilder(StringBuilder sb) { mRootNode.toString(sb, 0); } /** * Decrements the unigram count for the specified character. This method is useful for training * conditional probabilities, even though it is not powerful enough to do it in full generality. * * @param c Decrement the unigram count for the specified character. */ public void decrementUnigram(char c) { decrementUnigram(c, 1); } /** * Decrements the unigram count by the specified amount for the specified character. This method * is useful for training conditional probabilities, even though it is not powerful enough to do * it in full generality. * * @param c Decrement the unigram count for the specified character. * @param count Amount to decrement. */ public void decrementUnigram(char c, int count) { mRootNode = mRootNode.decrement(new char[] {c}, 0, 1, count); } private List<Long> countsList(int nGramOrder) { List<Long> accum = new ArrayList<Long>(); mRootNode.addCounts(accum, nGramOrder); return accum; } /** * Writes an encoding of this counter to the specified output stream. It may be read back in using * {@link #readFrom(InputStream)}. * * <p>The output is produced using a {@link BitTrieWriter} wrapped around a {@link BitOutput} * wrapped around the specified underlying output stream. First, the bit output is used to * delta-code the maximum n-gram plus 1. Then, the trie is encoded as described in {@link * BitTrieWriter}. Finally, the bit output is flushed. The underlying output stream is neither * flushed nor closed, allowing them to be used for other pruposes after this counter is written. * * <p>If necessary for efficiency, streams should be buffered before being passed to this method. * * @param out Underlying output stream for writing. * @throws IOException If there is an underlying I/O error. */ public void writeTo(OutputStream out) throws IOException { BitOutput bitOut = new BitOutput(out); bitOut.writeDelta(mMaxLength + 1L); TrieWriter writer = new BitTrieWriter(bitOut); writeCounter(this, writer, mMaxLength); bitOut.flush(); } /** * Writes the specified sequence counter to the specified trie writer, restricting output to * n-grams not longer than the specified maximum. * * @param counter Counter to write. * @param writer Trie writer to which counter is written. * @param maxNGram Maximum length n-gram written. * @throws IOException If there is an underlying I/O error. */ public static void writeCounter(CharSeqCounter counter, TrieWriter writer, int maxNGram) throws IOException { writeCounter(new char[maxNGram], 0, counter, writer); } /** * Reads a trie character sequence counter from the specified input stream. * * <p>The expected encoding is described in {@link #writeTo(OutputStream)}. * * <p>If necessary for efficiency, streams should be buffered before being passed to this method. * * @param in Underlying input stream for reading. * @throws IOException If there is an underlying I/O error. */ public static TrieCharSeqCounter readFrom(InputStream in) throws IOException { BitInput bitIn = new BitInput(in); int maxNGram = (int) (bitIn.readDelta() - 1L); BitTrieReader reader = new BitTrieReader(bitIn); return readCounter(reader, maxNGram); } /** * Reads a trie character sequence counter from the specified trie reader, restricting the result * to the specified maximum n-gram. * * @param reader Reader from which to read the trie. * @param maxNGram Maximum length n-gram to read. * @return The counter read from the reader. * @throws IOException If there is an underlying I/O error. */ public static TrieCharSeqCounter readCounter(TrieReader reader, int maxNGram) throws IOException { TrieCharSeqCounter counter = new TrieCharSeqCounter(maxNGram); counter.mRootNode = readNode(reader, 0, maxNGram); return counter; } static void writeCounter(char[] cs, int pos, CharSeqCounter counter, TrieWriter writer) throws IOException { long count = counter.count(cs, 0, pos); writer.writeCount(count); if (pos < cs.length) { // daughters within n-gram bound char[] csNext = counter.charactersFollowing(cs, 0, pos); for (int i = 0; i < csNext.length; ++i) { writer.writeSymbol(csNext[i]); cs[pos] = csNext[i]; writeCounter(cs, pos + 1, counter, writer); } } writer.writeSymbol(-1L); // end of daughters } private static void skipNode(TrieReader reader) throws IOException { reader.readCount(); while (reader.readSymbol() != -1) skipNode(reader); } private static Node readNode(TrieReader reader, int depth, int maxDepth) throws IOException { if (depth > maxDepth) { skipNode(reader); return null; } long count = reader.readCount(); int depthPlus1 = depth + 1; long sym1 = reader.readSymbol(); // 0+ daughters if (sym1 == -1L) return NodeFactory.createNode(count); // 1+ daughters Node node1 = readNode(reader, depthPlus1, maxDepth); long sym2 = reader.readSymbol(); if (sym2 == -1L) return NodeFactory.createNodeFold((char) sym1, node1, count); Node node2 = readNode(reader, depthPlus1, maxDepth); long sym3 = reader.readSymbol(); if (sym3 == -1L) return NodeFactory.createNode((char) sym1, node1, (char) sym2, node2, count); Node node3 = readNode(reader, depthPlus1, maxDepth); long sym4 = reader.readSymbol(); if (sym4 == -1L) return NodeFactory.createNode( (char) sym1, node1, (char) sym2, node2, (char) sym3, node3, count); Node node4 = readNode(reader, depthPlus1, maxDepth); // 4+ daughters StringBuilder cBuf = new StringBuilder(); cBuf.append((char) sym1); cBuf.append((char) sym2); cBuf.append((char) sym3); cBuf.append((char) sym4); List<Node> nodeList = new ArrayList<Node>(); nodeList.add(node1); nodeList.add(node2); nodeList.add(node3); nodeList.add(node4); long sym; while ((sym = reader.readSymbol()) != -1L) { cBuf.append((char) sym); nodeList.add(readNode(reader, depthPlus1, maxDepth)); } Node[] nodes = nodeList.toArray(EMPTY_NODE_ARRAY); char[] cs = Strings.toCharArray(cBuf); return NodeFactory.createNode(cs, nodes, count); // > 3 daughters } static final Node[] EMPTY_NODE_ARRAY = new Node[0]; }
@Override protected DefaultNode<OWLClass> getNode(Set<OWLClass> entities) { return NodeFactory.getOWLClassNode(entities); }